diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 792373839107f..ff8b1339de966 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1357,6 +1357,9 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) { IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Triple TT(M.getTargetTriple()); + if (TT.isAMDGPU() || TT.isNVPTX()) + IRLevelVersionVariable->setVisibility( + llvm::GlobalValue::ProtectedVisibility); if (TT.supportsCOMDAT()) { IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage); IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1012128085c7a..e0f1206496486 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions, options::OPT_fno_convergent_functions); - // NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support - // for sampling, overhead of call arc collection is way too high and there's - // no way to collect the output. - if (!Triple.isNVPTX() && !Triple.isAMDGCN()) - addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); + addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); diff --git a/clang/test/Driver/cuda-no-pgo-or-coverage.cu b/clang/test/Driver/cuda-no-pgo-or-coverage.cu deleted file mode 100644 index b84587e1e182b..0000000000000 --- a/clang/test/Driver/cuda-no-pgo-or-coverage.cu +++ /dev/null @@ -1,33 +0,0 @@ -// Check that profiling/coverage arguments doen't get passed down to device-side -// compilation. -// -// -// XRUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// XRUN: -fprofile-generate %s 2>&1 | \ -// XRUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -ftest-coverage %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,GCOV %s -// -// RUN: not %clang -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 \ -// RUN: -fprofile-instr-generate -fcoverage-mapping %s 2>&1 | \ -// RUN: FileCheck --check-prefixes=CHECK,PROF %s -// -// -// CHECK-NOT: error: unsupported option '-fprofile -// CHECK-NOT: error: invalid argument -// CHECK-DAG: "-fcuda-is-device" -// CHECK-NOT: "-f{{[^"/]*coverage.*}}" -// CHECK-NOT: "-fprofile{{[^"]*}}" -// CHECK: "-triple" "x86_64-unknown-linux-gnu" -// PROF: "-fprofile{{.*}}" -// GCOV: "-coverage-notes-file= diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 77c8d6c79322d..a90558fdcfbbf 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target, const __llvm_profile_data *DataEnd, const char *CountersBegin, const char *CountersEnd, const char *NamesBegin, - const char *NamesEnd); + const char *NamesEnd, + const uint64_t *VersionOverride); /*! * This variable is defined in InstrProfilingRuntime.cpp as a hidden diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index 1c451d7ec7563..b406e8db74f3f 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal( &BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd, /*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL, - /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0); + /*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0, + __llvm_profile_get_version()); } diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 4667c02892505..19467429cf4c3 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File, return 0; } -COMPILER_RT_USED int __llvm_write_custom_profile( - const char *Target, const __llvm_profile_data *DataBegin, - const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) { +int __llvm_write_custom_profile(const char *Target, + const __llvm_profile_data *DataBegin, + const __llvm_profile_data *DataEnd, + const char *CountersBegin, + const char *CountersEnd, const char *NamesBegin, + const char *NamesEnd, + const uint64_t *VersionOverride) { int ReturnValue = 0, FilenameLength, TargetLength; char *FilenameBuf, *TargetFilename; const char *Filename; @@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile( ProfDataWriter fileWriter; initFileWriter(&fileWriter, OutputFile); + uint64_t Version = __llvm_profile_get_version(); + if (VersionOverride) + Version = *VersionOverride; + /* Write custom data to the file */ - ReturnValue = lprofWriteDataImpl( - &fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL, - lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0); + ReturnValue = + lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin, + CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL, + NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version); closeFileObject(OutputFile); // Restore SIGKILL. diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h index b100343ca04f9..03df71828b91d 100644 --- a/compiler-rt/lib/profile/InstrProfilingInternal.h +++ b/compiler-rt/lib/profile/InstrProfilingInternal.h @@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer, VPDataReaderType *VPDataReader, const char *NamesBegin, const char *NamesEnd, const VTableProfData *VTableBegin, const VTableProfData *VTableEnd, const char *VNamesBegin, - const char *VNamesEnd, int SkipNameDataWrite); + const char *VNamesEnd, int SkipNameDataWrite, + uint64_t Version); /* Merge value profile data pointed to by SrcValueProfData into * in-memory profile counters pointed by to DstData. */ diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c index 8816a71155511..633fdb9661162 100644 --- a/compiler-rt/lib/profile/InstrProfilingWriter.c +++ b/compiler-rt/lib/profile/InstrProfilingWriter.c @@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer, const VTableProfData *VTableEnd = __llvm_profile_end_vtables(); const char *VNamesBegin = __llvm_profile_begin_vtabnames(); const char *VNamesEnd = __llvm_profile_end_vtabnames(); + uint64_t Version = __llvm_profile_get_version(); return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin, CountersEnd, BitmapBegin, BitmapEnd, VPDataReader, NamesBegin, NamesEnd, VTableBegin, VTableEnd, - VNamesBegin, VNamesEnd, SkipNameDataWrite); + VNamesBegin, VNamesEnd, SkipNameDataWrite, Version); } -COMPILER_RT_VISIBILITY int -lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, - const __llvm_profile_data *DataEnd, - const char *CountersBegin, const char *CountersEnd, - const char *BitmapBegin, const char *BitmapEnd, - VPDataReaderType *VPDataReader, const char *NamesBegin, - const char *NamesEnd, const VTableProfData *VTableBegin, - const VTableProfData *VTableEnd, const char *VNamesBegin, - const char *VNamesEnd, int SkipNameDataWrite) { +COMPILER_RT_VISIBILITY int lprofWriteDataImpl( + ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, + const __llvm_profile_data *DataEnd, const char *CountersBegin, + const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd, + VPDataReaderType *VPDataReader, const char *NamesBegin, + const char *NamesEnd, const VTableProfData *VTableBegin, + const VTableProfData *VTableEnd, const char *VNamesBegin, + const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) { /* Calculate size of sections. */ const uint64_t DataSectionSize = __llvm_profile_get_data_size(DataBegin, DataEnd); @@ -308,6 +308,7 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin, #define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init; #include "profile/InstrProfData.inc" } + Header.Version = Version; /* On WIN64, label differences are truncated 32-bit values. Truncate * CountersDelta to match. */ diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index a8055979acaa2..bc704b3f89c44 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -463,6 +463,10 @@ createIRLevelProfileFlagVar(Module &M, M, IntTy64, true, GlobalValue::WeakAnyLinkage, Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); + if (isGPUProfTarget(M)) + IRLevelVersionVariable->setVisibility( + llvm::GlobalValue::ProtectedVisibility); + Triple TT(M.getTargetTriple()); if (TT.supportsCOMDAT()) { IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test index 83cf76f68fb63..49c5ae9b0931d 100644 --- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test +++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test @@ -1,7 +1,7 @@ // Header // // INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) -// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version) // INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) // INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) // INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h index e030ab9e6b61f..6def53430a7c0 100644 --- a/offload/plugins-nextgen/common/include/GlobalHandler.h +++ b/offload/plugins-nextgen/common/include/GlobalHandler.h @@ -67,15 +67,16 @@ extern "C" { extern int __attribute__((weak)) __llvm_write_custom_profile( const char *Target, const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd, const char *CountersBegin, - const char *CountersEnd, const char *NamesBegin, const char *NamesEnd); + const char *CountersEnd, const char *NamesBegin, const char *NamesEnd, + const uint64_t *VersionOverride); } - /// PGO profiling data extracted from a GPU device struct GPUProfGlobals { SmallVector Counts; SmallVector<__llvm_profile_data> Data; SmallVector NamesData; Triple TargetTriple; + uint64_t Version = INSTR_PROF_RAW_VERSION; void dump() const; Error write() const; diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp index 8783490831e25..35a70d8eff901 100644 --- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp +++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp @@ -16,6 +16,7 @@ #include "Shared/Utils.h" +#include "llvm/ProfileData/InstrProfData.inc" #include "llvm/Support/Error.h" #include @@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device, if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal)) return Err; DeviceProfileData.Data.push_back(std::move(Data)); + } else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) { + uint64_t RawVersionData; + GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(), + &RawVersionData); + if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal)) + return Err; + DeviceProfileData.Version = RawVersionData; } } return DeviceProfileData; @@ -295,9 +303,9 @@ Error GPUProfGlobals::write() const { memcpy(NamesBegin, NamesData.data(), NamesData.size()); // Invoke compiler-rt entrypoint - int result = __llvm_write_custom_profile(TargetTriple.str().c_str(), - DataBegin, DataEnd, CountersBegin, - CountersEnd, NamesBegin, NamesEnd); + int result = __llvm_write_custom_profile( + TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin, + CountersEnd, NamesBegin, NamesEnd, &Version); if (result != 0) return Plugin::error("Error writing GPU PGO data to file"); diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c new file mode 100644 index 0000000000000..c8011cbae83c0 --- /dev/null +++ b/offload/test/offloading/gpupgo/pgo1.c @@ -0,0 +1,84 @@ +// RUN: %libomptarget-compile-generic -fcreate-profile \ +// RUN: -Xarch_device -fprofile-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.llvm.profraw | \ +// RUN: %fcheck-generic --check-prefix="LLVM-PGO" + +// RUN: %libomptarget-compile-generic -fcreate-profile \ +// RUN: -Xarch_device -fprofile-instr-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.clang.profraw | \ +// RUN: %fcheck-generic --check-prefix="CLANG-PGO" + +// REQUIRES: gpu +// REQUIRES: pgo + +int test1(int a) { return a / 2; } +int test2(int a) { return a * 2; } + +int main() { + int m = 2; +#pragma omp target + for (int i = 0; i < 10; i++) { + m = test1(m); + for (int j = 0; j < 2; j++) { + m = test2(m); + } + } +} + +// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 4 +// LLVM-PGO: Block counts: [20, 10, 2, 1] + +// LLVM-PGO-LABEL: test1: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 1 +// LLVM-PGO: Block counts: [10] + +// LLVM-PGO-LABEL: test2: +// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-PGO: Counters: 1 +// LLVM-PGO: Block counts: [20] + +// LLVM-PGO-LABEL: Instrumentation level: +// LLVM-PGO-SAME: IR +// LLVM-PGO-SAME: entry_first = 0 +// LLVM-PGO-LABEL: Functions shown: +// LLVM-PGO-SAME: 3 +// LLVM-PGO-LABEL: Maximum function count: +// LLVM-PGO-SAME: 20 +// LLVM-PGO-LABEL: Maximum internal block count: +// LLVM-PGO-SAME: 10 + +// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 3 +// CLANG-PGO: Function count: 0 +// CLANG-PGO: Block counts: [11, 20] + +// CLANG-PGO-LABEL: test1: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 1 +// CLANG-PGO: Function count: 10 +// CLANG-PGO: Block counts: [] + +// CLANG-PGO-LABEL: test2: +// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-PGO: Counters: 1 +// CLANG-PGO: Function count: 20 +// CLANG-PGO: Block counts: [] + +// CLANG-PGO-LABEL: Instrumentation level: +// CLANG-PGO-SAME: Front-end +// CLANG-PGO-LABEL: Functions shown: +// CLANG-PGO-SAME: 3 +// CLANG-PGO-LABEL: Maximum function count: +// CLANG-PGO-SAME: 20 +// CLANG-PGO-LABEL: Maximum internal block count: +// CLANG-PGO-SAME: 20 diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c new file mode 100644 index 0000000000000..b75b0beaffdec --- /dev/null +++ b/offload/test/offloading/gpupgo/pgo2.c @@ -0,0 +1,102 @@ +// RUN: %libomptarget-compile-generic -fprofile-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %basename_t.llvm.profraw | %fcheck-generic \ +// RUN: --check-prefix="LLVM-HOST" +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.llvm.profraw \ +// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE" + +// RUN: %libomptarget-compile-generic -fprofile-instr-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %basename_t.clang.profraw | %fcheck-generic \ +// RUN: --check-prefix="CLANG-HOST" +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.clang.profraw | \ +// RUN: %fcheck-generic --check-prefix="CLANG-DEV" + +// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %basename_t.nogpu.profraw | %fcheck-generic \ +// RUN: --check-prefix="LLVM-HOST" +// RUN: not test -e %target_triple.%basename_t.nogpu.profraw + +// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \ +// RUN: -Xarch_device -fprofile-instr-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %basename_t.hidf.profraw | %fcheck-generic \ +// RUN: --check-prefix="LLVM-HOST" +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.hidf.profraw \ +// RUN: | %fcheck-generic --check-prefix="CLANG-DEV" + +// RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \ +// RUN: -Xarch_host -fprofile-instr-generate +// RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \ +// RUN: %libomptarget-run-generic 2>&1 +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %basename_t.hfdi.profraw | %fcheck-generic \ +// RUN: --check-prefix="CLANG-HOST" +// RUN: llvm-profdata show --all-functions --counts \ +// RUN: %target_triple.%basename_t.hfdi.profraw \ +// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE" + +// REQUIRES: gpu +// REQUIRES: pgo + +int main() { + int host_var = 0; + for (int i = 0; i < 20; i++) { + host_var += i; + } + + int device_var = 1; +#pragma omp target + for (int i = 0; i < 10; i++) { + device_var *= i; + } +} + +// LLVM-HOST-LABEL: main: +// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-HOST: Counters: 3 +// LLVM-HOST: Block counts: [20, 1, 0] + +// LLVM-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// LLVM-HOST: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-HOST: Counters: 2 +// LLVM-HOST: Block counts: [0, 0] +// LLVM-HOST: Instrumentation level: IR + +// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}} +// LLVM-DEVICE: Counters: 3 +// LLVM-DEVICE: Block counts: [10, 2, 1] +// LLVM-DEVICE: Instrumentation level: IR + +// CLANG-HOST-LABEL: main: +// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-HOST: Counters: 2 +// CLANG-HOST: Function count: 1 +// CLANG-HOST: Block counts: [20] + +// CLANG-HOST-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// CLANG-HOST: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-HOST: Counters: 2 +// CLANG-HOST: Function count: 0 +// CLANG-HOST: Block counts: [0] +// CLANG-HOST: Instrumentation level: Front-end + +// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: +// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}} +// CLANG-DEV: Counters: 2 +// CLANG-DEV: Function count: 0 +// CLANG-DEV: Block counts: [11] +// CLANG-DEV: Instrumentation level: Front-end diff --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c deleted file mode 100644 index 6fe4487ffb67f..0000000000000 --- a/offload/test/offloading/pgo1.c +++ /dev/null @@ -1,66 +0,0 @@ -// RUN: %libomptarget-compile-generic -fprofile-generate \ -// RUN: -Xclang "-fprofile-instrument=llvm" -// RUN: env LLVM_PROFILE_FILE=llvm.profraw %libomptarget-run-generic 2>&1 -// RUN: %profdata show --all-functions --counts \ -// RUN: %target_triple.llvm.profraw | %fcheck-generic \ -// RUN: --check-prefix="LLVM-PGO" - -// RUN: %libomptarget-compile-generic -fprofile-instr-generate \ -// RUN: -Xclang "-fprofile-instrument=clang" -// RUN: env LLVM_PROFILE_FILE=clang.profraw %libomptarget-run-generic 2>&1 -// RUN: %profdata show --all-functions --counts \ -// RUN: %target_triple.clang.profraw | %fcheck-generic \ -// RUN: --check-prefix="CLANG-PGO" - -// REQUIRES: gpu -// REQUIRES: pgo - -#ifdef _OPENMP -#include -#endif - -int test1(int a) { return a / 2; } -int test2(int a) { return a * 2; } - -int main() { - int m = 2; -#pragma omp target - for (int i = 0; i < 10; i++) { - m = test1(m); - for (int j = 0; j < 2; j++) { - m = test2(m); - } - } -} -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 4 -// LLVM-PGO: Block counts: [20, 10, 2, 1] - -// LLVM-PGO-LABEL: test1: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 1 -// LLVM-PGO: Block counts: [10] - -// LLVM-PGO-LABEL: test2: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 1 -// LLVM-PGO: Block counts: [20] - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 3 -// CLANG-PGO: Function count: 0 -// CLANG-PGO: Block counts: [11, 20] - -// CLANG-PGO-LABEL: test1: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: 10 -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: test2: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: 20 -// CLANG-PGO: Block counts: []