diff --git a/xptifw/.gitignore b/xptifw/.gitignore new file mode 100644 index 0000000000000..aaf47ec18df35 --- /dev/null +++ b/xptifw/.gitignore @@ -0,0 +1,22 @@ +CMakeCache.txt +CMakeFiles/ +Makefile +basic_test/CMakeFiles/ +basic_test/Makefile +basic_test/cmake_install.cmake +cmake_install.cmake +lib/ +samples/basic_collector/CMakeFiles/ +samples/basic_collector/Makefile +samples/basic_collector/cmake_install.cmake +samples/basic_collector/xpti_timers.hpp +src/CMakeFiles/ +src/Makefile +src/cmake_install.cmake +unit_test/CMakeFiles/ +unit_test/Makefile +unit_test/cmake_install.cmake +unit_test/googletest-build/ +unit_test/googletest-download/ +unit_test/googletest-src/ + diff --git a/xptifw/CMakeLists.txt b/xptifw/CMakeLists.txt new file mode 100644 index 0000000000000..38647cfb2b6da --- /dev/null +++ b/xptifw/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 2.8.9) + +set(XPTI_VERSION 0.4.1) +set(XPTIFW_DIR ${CMAKE_CURRENT_LIST_DIR}) +# The XPTI framework requires the includes from +# the proxy implementation of XPTI +set(XPTI_DIR ${CMAKE_CURRENT_LIST_DIR}/../xpti) + +# Create a soft option for enabling the use of TBB +option(XPTI_ENABLE_TBB "Enable TBB in the framework" OFF) + +if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "No build type selected, default to Release") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE) +endif() + +project (xptifw) + +set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) +set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}) +set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}) + +include_directories(${CMAKE_SOURCE_DIR}/include ${XPTI_DIR}/include) +add_subdirectory(src) +add_subdirectory(unit_test) +add_subdirectory(samples/basic_collector) +# The tests in basic_test are written using TBB, so these tests are enabled +# only if TBB has been enabled. +if (XPTI_ENABLE_TBB) + add_subdirectory(basic_test) +endif() diff --git a/xptifw/CMakeLists.txt.in b/xptifw/CMakeLists.txt.in new file mode 100644 index 0000000000000..c6247af53cf5f --- /dev/null +++ b/xptifw/CMakeLists.txt.in @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8.2) + +project(googletest-download NONE) + +include(ExternalProject) +ExternalProject_Add(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG master + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/xptifw/README.md b/xptifw/README.md new file mode 100644 index 0000000000000..b32bee81ac169 --- /dev/null +++ b/xptifw/README.md @@ -0,0 +1,16 @@ +# XPTI Framework Library + +Implementation of the instrumentation framework library to support +instrumentation of arbitrary regions of code. This implementation requires the +specification header files used by the proxy library in `xpti/`. This +library is not necessary for building the SYCL runtime library and only required +to build tools that extract the traces from instrumented code. + +To see the implementation of the basic collector and how it can be attached to +an application that has been instrumented with XPTI, see [samples/basic_collector/README.md](samples/basic_collector/README.md). + +To see how to determine the cost of the APIs, see the tests under [basic_test/](basic_test/README.md). + +Unit tests are available under [unit_test](unit_test/README.md). + +To see the complete documentation on XPTI framework API, please see [XPTI Framework library documentation](doc/XPTI_Framework.md) \ No newline at end of file diff --git a/xptifw/basic_test/CMakeLists.txt b/xptifw/basic_test/CMakeLists.txt new file mode 100644 index 0000000000000..1856bed1c519b --- /dev/null +++ b/xptifw/basic_test/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 2.8.9) +project (run_test) + +file(GLOB SOURCES *.cpp *.hpp) +include_directories(${XPTIFW_DIR}/include) +include_directories(${XPTI_DIR}/include) + +remove_definitions(-DXPTI_STATIC_LIBRARY) +add_definitions(-DXPTI_API_EXPORTS -g -O3) +add_executable(run_test ${SOURCES}) +add_dependencies(run_test xptifw) +target_link_libraries(run_test PRIVATE xptifw) +if(UNIX) + target_link_libraries(run_test PRIVATE dl) +endif() + +if (XPTI_ENABLE_TBB) + target_link_libraries(run_test PRIVATE tbb) +endif() + +# Set the location of the library installation +install(TARGETS run_test DESTINATION ${CMAKE_BINARY_DIR}) diff --git a/xptifw/basic_test/README.md b/xptifw/basic_test/README.md new file mode 100644 index 0000000000000..2736a792ea74a --- /dev/null +++ b/xptifw/basic_test/README.md @@ -0,0 +1,16 @@ +# Basic tests + +In order to capture the cost of various API calls in the framework and test the +correctness of the API, a set of basic tests have been created. They primarily +fall under two categories: + +1. Semantic tests: These tests perform correctness checks on the API call to +ensure the right data is being retrieved. The semantic tests are categorized +into string table tests, trace point tests and notification tests. + +2. Performance tests: These test attempt to capture the average cost of various +operations that are a part of creating trace points in applications. The tests +are categorized into data structure tests and instrumentation tests. + +For more detail on the framework, the tests that are provided and their usage, +please consult the [XPTI Framework library documentation](doc/XPTI_Framework.md). diff --git a/xptifw/basic_test/cl_processor.hpp b/xptifw/basic_test/cl_processor.hpp new file mode 100644 index 0000000000000..b769ac134e354 --- /dev/null +++ b/xptifw/basic_test/cl_processor.hpp @@ -0,0 +1,620 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +#pragma once +#include "xpti_trace_framework.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace test { +namespace utils { +enum class OptionType { Boolean, Integer, Float, String, Range }; + +// We are using C++ 11, hence we cannot use +// std::variant or std::any +using table_row_t = std::map; +using table_t = std::map; +using titles_t = std::vector; + +class ScopedTimer { +public: + using time_unit_t = + std::chrono::time_point; + ScopedTimer(uint64_t &ns, double &ratio, size_t count = 1) + : MDuration{ns}, MAverage{ratio}, MInstances{count} { + MBefore = std::chrono::high_resolution_clock::now(); + } + + ~ScopedTimer() { + MAfter = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(MAfter - MBefore); + MDuration = duration.count(); + MAverage = (double)MDuration / MInstances; + } + +private: + uint64_t &MDuration; + double &MAverage; + size_t MInstances; + time_unit_t MBefore, MAfter; +}; + +class CommandLineOption { +public: + CommandLineOption() + : MRequired(false), MType(OptionType::String), + MHelp("No help available.") {} + + CommandLineOption &setRequired(bool yesOrNo) { + MRequired = yesOrNo; + return *this; + } + CommandLineOption &setType(OptionType type) { + MType = type; + return *this; + } + CommandLineOption &setHelp(std::string help) { + MHelp = help; + return *this; + } + CommandLineOption &setAbbreviation(std::string abbr) { + MAbbrev = abbr; + return *this; + } + + std::string &abbreviation() { return MAbbrev; } + std::string &help() { return MHelp; } + OptionType type() { return MType; } + bool required() { return MRequired; } + +private: + bool MRequired; + OptionType MType; + std::string MHelp; + std::string MAbbrev; +}; + +class CommandLineParser { +public: + using CommandLineOptions_t = + std::unordered_map; + using key_value_t = std::unordered_map; + + CommandLineParser() { + MReservedKey = "--help"; + MReservedKeyAbbr = "-h"; + } + + void parse(int argc, char **argv) { + MCommandLineOptions.resize(argc); + // Go through the command-line options list and build an internal + MAppName = argv[0]; + for (int i = 1; i < argc; ++i) { + MCommandLineOptions[i - 1] = argv[i]; + } + + buildAbbreviationTable(); + + if (!checkOptions()) { + printHelp(); + exit(-1); + } + } + + CommandLineOption &addOption(std::string Key) { + if (Key == MReservedKey) { + std::cout << "Option[" << Key + << "] is a reserved option. Ignoring the addOption() call!\n"; + // throw an exception here; + } + if (MOptionHelpLUT.count(Key)) { + std::cout << "Option " << Key << " has already been registered!\n"; + return MOptionHelpLUT[Key]; + } + + return MOptionHelpLUT[Key]; + } + + std::string &query(const char *Key) { + if (MOptionHelpLUT.count(Key)) { + return MValueLUT[Key]; + } else if (MAbbreviatedOptionLUT.count(Key)) { + std::string FullKey = MAbbreviatedOptionLUT[Key]; + if (MValueLUT.count(FullKey)) { + return MValueLUT[FullKey]; + } + return MEmptyString; + } + } + +private: + void buildAbbreviationTable() { + for (auto &Option : MOptionHelpLUT) { + std::string &abbr = Option.second.abbreviation(); + if (!abbr.empty()) { + MAbbreviatedOptionLUT[abbr] = Option.first; + } + } + } + + void printHelp() { + std::cout << "Usage:- \n"; + std::cout << " " << MAppName << " "; + // Print all required options first + for (auto &Option : MOptionHelpLUT) { + if (Option.second.required()) { + std::cout << Option.first << " "; + switch (Option.second.type()) { + case OptionType::Integer: + std::cout << " "; + break; + case OptionType::Float: + std::cout << " "; + break; + case OptionType::Boolean: + std::cout << " "; + break; + case OptionType::String: + std::cout << " "; + break; + case OptionType::Range: + std::cout << " "; + break; + } + } + } + // Print the optional flags next. + for (auto &Option : MOptionHelpLUT) { + if (!Option.second.required()) { + std::cout << "[" << Option.first << " "; + switch (Option.second.type()) { + case OptionType::Integer: + std::cout << "] "; + break; + case OptionType::Float: + std::cout << "] "; + break; + case OptionType::Boolean: + std::cout << "] "; + break; + break; + case OptionType::String: + std::cout << "] "; + break; + case OptionType::Range: + std::cout << "] "; + break; + } + } + } + std::cout << "\n Options supported:\n"; + // Print help for all of the options + for (auto &Option : MOptionHelpLUT) { + std::stringstream Help(Option.second.help()); + std::string HelpLine; + bool FirstTime = true; + + while (std::getline(Help, HelpLine, '\n')) { + if (FirstTime) { + std::string options = + Option.first + ", " + Option.second.abbreviation(); + FirstTime = false; + std::cout << " " << std::left << std::setw(20) << options << " " + << HelpLine << "\n"; + } else { + std::cout << " " << std::left << std::setw(20) << " " + << " " << HelpLine << "\n"; + } + } + } + } + + bool checkOptions() { + bool Pass = true; + std::string PrevKey; + for (auto &Option : MCommandLineOptions) { + std::size_t Pos = Option.find_first_of("-"); + if (std::string::npos != Pos) { + // We have an option provided; let's check to see if it is verbose or + // abbreviated + Pos = Option.find_first_of("-", Pos + 1); + if (std::string::npos != Pos) { + // We have a verbose option + if (Option == MReservedKey) { + printHelp(); + exit(-1); + } else if (MOptionHelpLUT.count(Option) == 0) { + std::cout << "Unknown option[" << Option << "]!\n"; + Pass = false; + } + MValueLUT[Option] = "true"; + PrevKey = Option; + } else { + // We have an abbreviated option + if (Option == MReservedKeyAbbr) { + printHelp(); + exit(-1); + } else if (MAbbreviatedOptionLUT.count(Option) == 0) { + std::cout << "Unknown option[" << Option << "] detected.\n"; + Pass = false; + } + PrevKey = MAbbreviatedOptionLUT[Option]; + MValueLUT[PrevKey] = "true"; + } + } else { + // No idea why stringstream will decode the last \n as a "" string; this + // handles that case + if (PrevKey.empty() && Option.empty()) + break; + // We have an option value + if (PrevKey.empty()) { + std::cout << "Value[" << Option + << "] provided without specifying an option\n"; + Pass = false; + } else { + MValueLUT[PrevKey] = Option; + PrevKey = MEmptyString; + } + } + } + + for (auto &Option : MOptionHelpLUT) { + // Check to see if an option is required; If so, check to see if there's a + // value associated with it. + if (Option.second.required()) { + if (!MValueLUT.count(Option.first)) { + std::cout << "Option[" << Option.first + << "] is required and not provided.\n"; + Pass = false; + } + } + } + + return Pass; + } + + std::vector MCommandLineOptions; + CommandLineOptions_t MOptionHelpLUT; + key_value_t MAbbreviatedOptionLUT; + key_value_t MValueLUT; + std::string MEmptyString; + std::string MReservedKey; + std::string MReservedKeyAbbr; + std::string MAppName; +}; + +class TableModel { +public: + using row_titles_t = std::map; + + TableModel() {} + + void setHeaders(titles_t &Titles) { MColumnTitles = Titles; } + + table_row_t &addRow(int Row, std::string &RowName) { + if (MRowTitles.count(Row)) { + std::cout << "Warning: Row title already specified!\n"; + } + MRowTitles[Row] = RowName; + return MTable[Row]; + } + + table_row_t &addRow(int Row, const char *RowName) { + if (MRowTitles.count(Row)) { + std::cout << "Warning: Row title already specified!\n"; + } + MRowTitles[Row] = RowName; + return MTable[Row]; + } + + table_row_t &operator[](int Row) { return MTable[Row]; } + + void print() { + std::cout << std::setw(14) << " "; + for (auto &Title : MColumnTitles) { + std::cout << std::setw(14) << Title; // Column headers + } + std::cout << "\n"; + + for (auto &Row : MTable) { + std::cout << std::setw(14) << MRowTitles[Row.first]; + for (auto &Data : Row.second) { + std::cout << std::fixed << std::setw(14) << std::setprecision(0) + << Data.second; + } + std::cout << "\n"; + } + std::cout << "\n"; + } + +private: + titles_t MColumnTitles; + row_titles_t MRowTitles; + table_t MTable; +}; + +class RangeDecoder { +public: + RangeDecoder(std::string &RangeStr) : MRange(RangeStr) { + // Split by commas first followed by : for begin,end, Step + std::stringstream Elements(RangeStr); + std::string Element; + while (std::getline(Elements, Element, ',')) { + if (Element.find_first_of("-:") == std::string::npos) { + MElements.insert(std::stol(Element)); + } else { + std::stringstream R(Element); + std::vector RangeTokens; + std::string SubStr; + // Now split by : + while (std::getline(R, SubStr, ':')) { + RangeTokens.push_back(SubStr); + } + // RangeTokens should have three entries; Second entry is the Step + std::cout << RangeTokens[0] << ";" << RangeTokens[1] << std::endl; + long Step = std::stol(RangeTokens[2]); + for (long i = std::stol(RangeTokens[0]); i <= std::stol(RangeTokens[1]); + i += Step) { + MElements.insert(i); + } + } + } + } + + std::set &decode() { return MElements; } + +private: + std::string MRange; + std::set MElements; +}; +} // namespace utils + +namespace semantic { +class TestCorrectness { +public: + enum class SemanticTests { + StringTableTest = 1, + TracePointTest, + NotificationTest + }; + + TestCorrectness(test::utils::CommandLineParser &Parser) : MParser(Parser) { + xptiInitialize("xpti", 20, 0, "xptiTests"); + } + + void run() { + auto &V = MParser.query("--type"); + if (V != "semantic") + return; + + test::utils::RangeDecoder td(MParser.query("--num-threads")); + MThreads = td.decode(); + test::utils::RangeDecoder rd(MParser.query("--test-id")); + MTests = rd.decode(); + + runTests(); + } + + void runTests() { + for (auto Test : MTests) { + switch ((SemanticTests)Test) { + case SemanticTests::StringTableTest: + runStringTableTests(); + break; + case SemanticTests::TracePointTest: + runTracepointTests(); + break; + case SemanticTests::NotificationTest: + runNotificationTests(); + break; + default: + std::cout << "Unknown test type [" << Test << "]: use 1,2,3 or 1:3:1\n"; + break; + } + } + MTable.print(); + } + +private: + void runStringTableTests(); + void runStringTableTestThreads(int RunNo, int NThreads, + test::utils::TableModel &Table); + void runTracepointTests(); + void runTracepointTestThreads(int RunNo, int nt, + test::utils::TableModel &Table); + void runNotificationTests(); + void runNotificationTestThreads(int RunNo, int NThreads, + test::utils::TableModel &Table); + + test::utils::CommandLineParser &MParser; + test::utils::TableModel MTable; + std::set MThreads, MTests; + long MTracepoints; + const char *MSource = "foo.cpp"; + uint64_t MInstanceID = 0; +}; +} // namespace semantic + +namespace performance { +constexpr int MaxTracepoints = 100000; +constexpr int MinTracepoints = 10; +class TestPerformance { +public: + struct record { + std::string fn; + uint64_t lookup; + }; + enum class PerformanceTests { DataStructureTest = 1, InstrumentationTest }; + + TestPerformance(test::utils::CommandLineParser &Parser) : MParser(Parser) { + xptiInitialize("xpti", 20, 0, "xptiTests"); + } + + std::string makeRandomString(uint8_t Length, std::mt19937_64 &Gen) { + if (Length > 25) { + Length = 25; + } + // A=65, a=97 + std::string s(Length, '\0'); + for (int i = 0; i < Length; ++i) { + int ascii = MCaseU(Gen); + int value = MCharU(Gen); + s[i] = (ascii ? value + 97 : value + 65); + } + return s; + } + + void run() { + auto &V = MParser.query("--type"); + if (V != "performance") + return; + + test::utils::RangeDecoder Td(MParser.query("--num-threads")); + MThreads = Td.decode(); + MTracepoints = std::stol(MParser.query("--trace-points")); + if (MTracepoints > MaxTracepoints) { + std::cout << "Reducing trace points to " << MaxTracepoints << "!\n"; + MTracepoints = MaxTracepoints; + } + if (MTracepoints < 0) { + std::cout << "Setting trace points to " << MinTracepoints << "!\n"; + MTracepoints = MinTracepoints; + } + + test::utils::RangeDecoder Rd(MParser.query("--test-id")); + MTests = Rd.decode(); + + std::string Dist = MParser.query("--tp-frequency"); + if (Dist.empty()) { + // By default, we assume that for every trace point that is created, we + // will visit it NINE more times. + MTracepointInstances = MTracepoints * 10; + } else { + float Value = std::stof(Dist); + if (Value > 100) { + std::cout << "Trace point creation frequency limited to 100%!\n"; + Value = 100; + } + if (Value < 0) { + std::cout << "Trace point creation frequency set to 1%!\n"; + Value = 1; + } + // If not, we compute the number of trace point instances based on the + // trace point frequency value; If the frequency is 10%, then every 10th + // trace point create will be creating a new trace point. If it is 2%, + // then every 50th trace point will create call will result in a new + // trace point. + MTracepointInstances = + (long)((1.0 / (std::stof(Dist) / 100)) * MTracepoints); + } + // Check to see if overheads to model are set; if not assume 1.0% + Dist = MParser.query("--overhead"); + if (!Dist.empty()) { + MOverhead = std::stof(Dist); + if (MOverhead < 0.1) { + std::cout << "Overheads to be modeled clamped to range - 0.1%!\n"; + MOverhead = 0.1; + } else if (MOverhead > 15) { + std::cout << "Overheads to be modeled clamped to range - 15%!\n"; + MOverhead = 15; + } + } + + // If the number of trace points(TP) required to run tests on is 1000, then + // we will run our string table tests on the number of TPs we compute. For a + // TP frequency of 10%, we will have TP instances be 1000x10 + MStringTableEntries = MTracepointInstances; + // Mersenne twister RNG engine that is uniform distribution + std::random_device QRd; + std::mt19937_64 Gen(QRd()); + // Generate the pseudo-random numbers for trace points and string table + // random lookup + MTracepointU = std::uniform_int_distribution(0, MTracepoints - 1); + MStringTableU = + std::uniform_int_distribution(0, MStringTableEntries - 1); + MCharU = std::uniform_int_distribution(0, 25); + MCaseU = std::uniform_int_distribution(0, 1); + + MRndmSTIndex.resize(MStringTableEntries); + MRndmTPIndex.resize(MStringTableEntries); + for (int i = 0; i < MStringTableEntries; ++i) { + MRndmSTIndex[i] = MStringTableU(Gen); + } + for (int i = 0; i < MStringTableEntries; ++i) { + MRndmTPIndex[i] = MTracepointU(Gen); + } + // Generate the strings we will be registering with the string table and + // also the random lookup table for trace points + for (int i = 0; i < MTracepointInstances; ++i) { + record Rec; + Rec.lookup = MRndmTPIndex[i]; // 0-999999999 + std::string Str = makeRandomString(5, Gen); + Rec.fn = Str + std::to_string(Rec.lookup); + MRecords.push_back(Rec); + Str = makeRandomString(8, Gen) + std::to_string(i); + MFunctions.push_back(Str); + Str = makeRandomString(8, Gen) + std::to_string(i); + MFunctions2.push_back(Str); + } + // Done with the setup; now run the tests + runTests(); + } + + void runTests() { + for (auto Test : MTests) { + switch ((PerformanceTests)Test) { + case PerformanceTests::DataStructureTest: + runDataStructureTests(); + break; + case PerformanceTests::InstrumentationTest: + runInstrumentationTests(); + break; + default: + std::cout << "Unknown test type [" << Test << "]: use 1,2 or 1:2:1\n"; + break; + } + } + MTable.print(); + } + +private: + void runDataStructureTests(); + void runDataStructureTestsThreads(int RunNo, int NThreads, + test::utils::TableModel &Table); + void runInstrumentationTests(); + void runInstrumentationTestsThreads(int RunNo, int NThreads, + test::utils::TableModel &Table); + + test::utils::CommandLineParser &MParser; + test::utils::TableModel MTable; + std::set MThreads, MTests; + long MTracepoints; + long MTracepointInstances; + long MStringTableEntries; + const char *MSource = "foo.cpp"; + uint64_t MInstanceID = 0; + std::uniform_int_distribution MTracepointU, MStringTableU, MCharU, + MCaseU; + std::vector MRndmTPIndex, MRndmSTIndex; + std::vector MRecords; + std::vector MFunctions, MFunctions2; + double MOverhead = 1.0; +}; +} // namespace performance +} // namespace test diff --git a/xptifw/basic_test/main.cpp b/xptifw/basic_test/main.cpp new file mode 100644 index 0000000000000..741b62ea675a1 --- /dev/null +++ b/xptifw/basic_test/main.cpp @@ -0,0 +1,77 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +#include "cl_processor.hpp" + +// This test will expose the correctness and performance tests +// through the command-line options. +int main(int argc, char **argv) { + test::utils::CommandLineParser options; + + options.addOption("--verbose") + .setAbbreviation("-v") + .setHelp("Run the tests in verbose mode. Running in this mode " + "may\naffect performance test metrics.\n") + .setRequired(false) + .setType(test::utils::OptionType::String); + + options.addOption("--trace-points") + .setAbbreviation("-t") + .setHelp( + "Number of trace points to use in the tests - Range [10-100000]\n") + .setRequired(true) + .setType(test::utils::OptionType::Integer); + + options.addOption("--type") + .setAbbreviation("-y") + .setHelp("Takes in the type of test to run. The options are:\n\n o " + "semantic\n o performance\n\nSemantic tests will ignore all " + "flags that are meant\nfor performance tests.\n") + .setRequired(true) + .setType(test::utils::OptionType::String); + + options.addOption("--test-id") + .setAbbreviation("-i") + .setHelp( + "Takes in the test identifier to run a specific test. These\ntests " + "will be identifiers within the semantic or performance tests.\n") + .setRequired(true) + .setType(test::utils::OptionType::Range); + + options.addOption("--num-threads") + .setAbbreviation("-n") + .setHelp("Number of threads to use to run the tests.\n") + .setRequired(true) + .setType(test::utils::OptionType::Range); + + options.addOption("--overhead") + .setAbbreviation("-o") + .setHelp("Overhead limit in percentage - Range[0.1-15]\n") + .setRequired(false) + .setType(test::utils::OptionType::Float); + + options.addOption("--report") + .setAbbreviation("-r") + .setHelp("Print the results in tabular form.\n") + .setRequired(false) + .setType(test::utils::OptionType::String); + + options.addOption("--tp-frequency") + .setAbbreviation("-f") + .setHelp("Trace point creation frequency as a percentage of tracepoint " + "instances-Range [1-100]\n") + .setRequired(false) + .setType(test::utils::OptionType::Float); + + options.parse(argc, argv); + + test::semantic::TestCorrectness ct(options); + test::performance::TestPerformance pt(options); + + ct.run(); + pt.run(); +} diff --git a/xptifw/basic_test/performance_tests.cpp b/xptifw/basic_test/performance_tests.cpp new file mode 100644 index 0000000000000..1b850f92b110c --- /dev/null +++ b/xptifw/basic_test/performance_tests.cpp @@ -0,0 +1,564 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//----------------------- performance_tests.cpp ----------------------------- +// Tests the performance of the API and framework by running real world +// scenarios and computing the average costs and maximum Events/sec that can +// be serviced by the framework at a given max. overhead constraint. +//--------------------------------------------------------------------------- +#include "tbb/concurrent_vector.h" +#include "tbb/parallel_for.h" +#include "tbb/spin_mutex.h" +#include "tbb/task_arena.h" +#include "tbb/task_group.h" + +#include "cl_processor.hpp" +#include "xpti_trace_framework.h" + +#include +#include +#include + +namespace test { +void registerCallbacks(uint8_t sid); +namespace performance { +enum class DSColumns { + Threads, ///< Slot used to record the number of threads + STInsert, ///< Used to capture the average string insert costs + STLookup, ///< Used to capture the average string lookup costs + STInsertLookup, ///< Avg. string insert+2 lookups cost + TPCreate, ///< Average trace point creation costs + TPUncachedLookup, ///< Average trace point recration costs using payload + TPFWCache, ///< Average trace event lookup using unique_id + TPLocalCache, ///< Average costs to look up locally cached event (0) + Notify ///< Average notification costs +}; + +enum class FWColumns { + Threads, ///< Slot used to record the number of threads + TPLookupAndNotify, ///< Average cost to create a trace event and notify based + ///< on the average frequency of a new tracepoint being + ///< created as a function of total number of trace point + ///< lookup/notifications + TPCreate, ///< Average trace point event creation cost + EPS10, ///< Events/sec @ given overhead with CB handler cost of 10ns + EPS100, ///< Events/sec @ given overhead with CB handler cost of 100ns + EPS500, ///< Events/sec @ given overhead with CB handler cost of 500ns + EPS1000, ///< Events/sec @ given overhead with CB handler cost of 1000ns + EPS2000 ///< Events/sec @ given overhead with CB handler cost of 2000ns +}; + +void TestPerformance::runDataStructureTestsThreads( + int RunNo, int NumThreads, test::utils::TableModel &Model) { + xptiReset(); + uint64_t TimeInNS; + double ElapsedTime; + + // If the num-threads specification includes 0, then a true serial version + // outside of TBB is run + if (!NumThreads) { + auto &ModelRow = Model.addRow(RunNo, "Serial"); + ModelRow[(int)DSColumns::Threads] = NumThreads; + // Hold the string IDs for measuring lookup later + std::vector IDs; + IDs.resize(MTracepoints); + // Columns 1, 2: Insert, 2 Lookups + // Perform measurement tests to determine the cost of insertions into the + // string table, the lookup costs and a composite measurement of insertion + // and 2 lookups for strings added to the string table + { + // Create 'm_tracepoint' number of strings and measure the cost of serial + // insertions into a concurrent container. Here, using an unordered_map + // will be faster, but we rely on TBB concurrent containers to ensure they + // are thread safe + { + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + for (int i = 0; i < MTracepoints; ++i) { + char *TableStrRef = nullptr; + // Assume that the string has already been created as it is normally + // provided to the Payload constructors + std::string &FuncName = MFunctions[i]; + IDs[i] = xptiRegisterString(FuncName.c_str(), &TableStrRef); + } + } + ModelRow[(int)DSColumns::STInsert] = ElapsedTime; + + { // lookup the created strings "MTracepoints" randomly + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints * 2); + for (int i = 0; i < MTracepoints * 2; ++i) { + int LookupIndex = MRndmTPIndex[i % MStringTableEntries]; + const char *LUTStrRef = xptiLookupString(IDs[LookupIndex]); + } + } + ModelRow[(int)DSColumns::STLookup] = ElapsedTime; + } + + // Column 3: Insert+ 2 Lookups + // Perform measurement tests to determine the cost of insertion and 2 + // lookups for strings added to the string table + { // Create NEW "m_tracepoint" strings + std::vector NewIDs; + NewIDs.resize(MTracepoints); + long NoOfOperations = MTracepoints * 3; + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, NoOfOperations); + for (int i = 0; i < MTracepoints; ++i) { + char *TableStrRef = nullptr; + std::string &FuncName = MFunctions2[i]; + NewIDs.push_back(xptiRegisterString(FuncName.c_str(), &TableStrRef)); + } + for (int i = 0; i < MTracepoints * 2; ++i) { + int LookupIndex = + MRndmTPIndex[i % MStringTableEntries]; // Generates a value between + // 0-MTracepoints-1 + const char *LUTStrRef = xptiLookupString(IDs[LookupIndex]); + } + } + ModelRow[(int)DSColumns::STInsertLookup] = ElapsedTime; + + std::vector UIds; + std::vector Events; + UIds.resize(MTracepoints); + Events.resize(MTracepoints); + // Column 4: Measure the cost of trace point creation and cache the returned + // event and event IDs + { + // Create "MTracepoints" number of trace point events + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + for (int i = 0; i < MTracepoints; ++i) { + record &r = MRecords[i]; + int LookupIndex = r.lookup; + std::string &fn = r.fn; + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, LookupIndex, + LookupIndex % 80, (void *)r.lookup); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[LookupIndex] = Ev->unique_id; + Events[LookupIndex] = Ev; + } + } + } + ModelRow[(int)DSColumns::TPCreate] = ElapsedTime; + + // Column 5: Measure the cost of trace point creation of previously created + // trace points in an un-cached manner + { // Lookup "MTracepoints" instances, uncached where we create the payload + // each time + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + for (int i = 0; i < MTracepoints; ++i) { + record &r = MRecords[i]; + uint64_t LookupIndex = r.lookup; + std::string &fn = r.fn; + xpti::payload_t P = + xpti::payload_t(fn.c_str(), MSource, (int)LookupIndex, + (int)LookupIndex % 80, (void *)LookupIndex); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + } + } + ModelRow[(int)DSColumns::TPUncachedLookup] = ElapsedTime; + + // Column 6: Measure the cost of trace point creation of previously created + // trace points in an framework-cached manner + { // Lookup "MTracepoints" instances, framework-cached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + for (int i = 0; i < MTracepoints; ++i) { + record &r = MRecords[i]; + uint64_t LookupIndex = r.lookup; + xpti::trace_event_data_t *Ev = const_cast( + xptiFindEvent(UIds[LookupIndex])); + } + } + ModelRow[(int)DSColumns::TPFWCache] = ElapsedTime; + + // Column 7: Measure the cost of trace point creation of previously created + // and cached trace points + { // Lookup "MTracepoints" instances, locally-cached or locally visible + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances); + for (int i = 0; i < MTracepointInstances; ++i) { + record &r = MRecords[i % MTracepoints]; + uint64_t LookupIndex = r.lookup; // get the random id to lookup + xpti::trace_event_data_t *Ev = Events[LookupIndex]; + } + } + ModelRow[(int)DSColumns::TPLocalCache] = ElapsedTime; + + { // Notify "MTracepoints" number tps, locally cached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances); + for (int i = 0; i < MTracepointInstances; ++i) { + record &r = MRecords[i % MTracepoints]; + uint64_t LookupIndex = r.lookup; + xpti::trace_event_data_t *Ev = Events[LookupIndex]; + xpti::framework::scoped_notify ev( + "xpti", (uint16_t)xpti::trace_point_type_t::region_begin, nullptr, + Ev, MInstanceID, nullptr); + } + } + ModelRow[(int)DSColumns::Notify] = ElapsedTime; + + } else { + // Now run the same performance tests in multi-threaded mode to accommodate + // lock contention costs + + std::string RowTitle = "Threads " + std::to_string(NumThreads); + auto &ModelRow = Model.addRow(RunNo, RowTitle); + ModelRow[(int)DSColumns::Threads] = NumThreads; + + // Limit TBB to use the number of threads for this run + tbb::task_arena a(NumThreads); + a.execute([&]() { + std::vector IDs; + IDs.resize(MTracepoints); + // Columns 1, 2: Insert, 2 Lookups + // Perform measurement tests to determine the cost of insertions into the + // string table, the lookup costs and a composite measurement of insertion + // and 2 lookups for strings added to the string table + { + { // Create "MTracepoints" strings + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + tbb::parallel_for(tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + char *TableStrRef = nullptr; + std::string &FuncName = MFunctions[i]; + IDs[i] = xptiRegisterString(FuncName.c_str(), + &TableStrRef); + } + }); + } + ModelRow[(int)DSColumns::STInsert] = ElapsedTime; + + { // lookup the created strings "MTracepoints*2" linearly + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepoints * 2); + tbb::parallel_for(tbb::blocked_range(0, MTracepoints * 2), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + int LookupIndex = + MRndmTPIndex[i % MStringTableEntries]; + const char *LUTStrRef = + xptiLookupString(IDs[LookupIndex]); + } + }); + } + ModelRow[(int)DSColumns::STLookup] = ElapsedTime; + } + // Column 3: Insert+ 2 Lookups + // Perform measurement tests to determine the cost of insertion and 2 + // lookups for strings added to the string table + { // insert and lookup at the same time "MStringTableEntries*10" + std::vector NewIDs; + NewIDs.resize(MTracepoints); + tbb::task_group g; + // 2 lookups + 1 insert of MTracepoints elements that occurs + // simultaneously + long NoOfOperations = MTracepoints * 3; + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, NoOfOperations); + g.run([&] { + // Add new strings + tbb::parallel_for(tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + char *TableStrRef = nullptr; + std::string &FuncName = MFunctions2[i]; + NewIDs[i] = xptiRegisterString(FuncName.c_str(), + &TableStrRef); + } + }); + }); + g.run([&] { + // And read previously added strings + tbb::parallel_for( + tbb::blocked_range(0, MStringTableEntries), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + int LookupIndex = + MRndmTPIndex[i % MStringTableEntries]; // Generates a + // value between + // 0-MTracepoints-1 + // Read from previously added strings by looking + // up the old IDs stored in 'IDs' + const char *LUTStrRef = xptiLookupString(IDs[LookupIndex]); + } + }); + }); + g.wait(); + } + ModelRow[(int)DSColumns::STInsertLookup] = ElapsedTime; + + std::vector UIds; + std::vector Events; + UIds.resize(MTracepoints); + Events.resize(MTracepoints); + // Column 4: Measure the cost of trace point creation and cache the + // returned event and event IDs + { // Create "MTracepoints" number of trace point Events + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + tbb::parallel_for( + tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i]; + int LookupIndex = r.lookup; + std::string &fn = r.fn; + xpti::payload_t P = + xpti::payload_t(fn.c_str(), MSource, LookupIndex, + LookupIndex % 80, (void *)r.lookup); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, + (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[LookupIndex] = Ev->unique_id; + Events[LookupIndex] = Ev; + } + } + }); + } + ModelRow[(int)DSColumns::TPCreate] = ElapsedTime; + + // Column 5: Measure the cost of trace point creation of previously + // created trace points in an un-cached manner + { // Lookup "MTracepoints" number of trace point Events, uncached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + tbb::parallel_for( + tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i]; + int LookupIndex = r.lookup; + std::string &fn = r.fn; + xpti::payload_t P = + xpti::payload_t(fn.c_str(), MSource, LookupIndex, + LookupIndex % 80, (void *)r.lookup); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, + (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + } + }); + } + ModelRow[(int)DSColumns::TPUncachedLookup] = ElapsedTime; + + // Column 6: Measure the cost of trace point creation of previously + // created trace points in an framework-cached manner + { // Lookup "MTracepointInstances" number of trace point Events, + // framework-cached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + tbb::parallel_for(tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i]; + uint64_t LookupIndex = r.lookup; + xpti::trace_event_data_t *Ev = + const_cast( + xptiFindEvent(UIds[LookupIndex])); + } + }); + } + ModelRow[(int)DSColumns::TPFWCache] = ElapsedTime; + + // Column 7: Measure the cost of trace point creation of previously + // created and cached trace points + { // Lookup "MTracepoints" number of trace point Events, locally-cached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances); + tbb::parallel_for(tbb::blocked_range(0, MTracepointInstances), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i % MTracepoints]; + uint64_t LookupIndex = + r.lookup; // get the random id to lookup + xpti::trace_event_data_t *Ev = + Events[LookupIndex]; + } + }); + } + ModelRow[(int)DSColumns::TPLocalCache] = ElapsedTime; + + { // Notify "MTracepoints" number tps, locally cached + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances); + tbb::parallel_for( + tbb::blocked_range(0, MTracepointInstances), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i % MTracepoints]; + uint64_t LookupIndex = r.lookup; + xpti::trace_event_data_t *Ev = Events[LookupIndex]; + xpti::framework::scoped_notify ev( + "xpti", (uint16_t)xpti::trace_point_type_t::region_begin, + nullptr, Ev, MInstanceID, nullptr); + } + }); + } + ModelRow[(int)DSColumns::Notify] = ElapsedTime; + }); + } +} + +void TestPerformance::runDataStructureTests() { + test::utils::TableModel Model; + + test::utils::titles_t Columns{"Threads", "Str.Insert", "Str.Lookup", + "St.Ins/Lu", "TP Create", "TP Un-Cached", + "TP FW-Cached", "TP Local", "Notify"}; + std::cout << std::setw(Columns.size() * 15 / 2) + << "Data Structure Tests [FW=framework, Lu=lookup, " + "TP=Tracepoint, Time=ns\n"; + Model.setHeaders(Columns); + + uint8_t sid = xptiRegisterStream("xpti"); + test::registerCallbacks(sid); + + if (MThreads.size()) { + int RunNo = 0; + for (auto Thread : MThreads) { + runDataStructureTestsThreads(RunNo++, Thread, Model); + } + } + + Model.print(); +} + +void TestPerformance::runInstrumentationTestsThreads( + int RunNo, int NumThreads, test::utils::TableModel &Model) { + xptiReset(); + uint64_t TimeInNS; + double ElapsedTime; + + std::vector tp_ids; + tp_ids.resize(MTracepoints); + std::vector Events; + Events.resize(MTracepoints); + // Variables used to compute Events/sec + uint64_t events_per_sec, overhead_based_cost; + std::vector> cb_handler_cost = { + {FWColumns::EPS10, 10}, + {FWColumns::EPS100, 100}, + {FWColumns::EPS500, 500}, + {FWColumns::EPS1000, 1000}, + {FWColumns::EPS2000, 2000}}; + + if (!NumThreads) { + auto &ModelRow = Model.addRow(RunNo, "Serial"); + ModelRow[(int)FWColumns::Threads] = NumThreads; + { + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances * 2); + { + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + for (int i = 0; i < MTracepoints; ++i) { + std::string &fn = MFunctions[i]; + xpti::payload_t P(fn.c_str(), MSource, i, i % 80, (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + tp_ids[i] = Ev->unique_id; + Events[i] = Ev; + } + } + } + ModelRow[(int)FWColumns::TPCreate] = ElapsedTime; + for (int i = 0; i < MTracepointInstances; ++i) { + int LookupIndex = MRndmTPIndex[i % MStringTableEntries]; + xpti::trace_event_data_t *Ev = Events[LookupIndex]; + xpti::framework::scoped_notify ev( + "xpti", (uint16_t)xpti::trace_point_type_t::region_begin, nullptr, + Ev, MInstanceID, nullptr); + } + } + ModelRow[(int)FWColumns::TPLookupAndNotify] = ElapsedTime; + for (auto cost : cb_handler_cost) { + // Amount of non-instrumentation based work that needs to be present for + // it to meet the overhead constraints requested + overhead_based_cost = (ElapsedTime + cost.second) * (100.0 / MOverhead); + ModelRow[(int)cost.first] = 1000000000 / overhead_based_cost; + } + + } else { + tbb::task_arena a(NumThreads); + + std::string RowTitle = "Threads " + std::to_string(NumThreads); + auto &ModelRow = Model.addRow(RunNo, RowTitle); + ModelRow[(int)FWColumns::Threads] = NumThreads; + { + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, + MTracepointInstances * 2); + a.execute([&]() { + { + test::utils::ScopedTimer Timer(TimeInNS, ElapsedTime, MTracepoints); + tbb::parallel_for( + tbb::blocked_range(0, MTracepoints), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + std::string &fn = MFunctions[i]; + xpti::payload_t P(fn.c_str(), MSource, i, i % 80, (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, + (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + tp_ids[i] = Ev->unique_id; + Events[i] = Ev; + } + } + }); + } + ModelRow[(int)FWColumns::TPCreate] = ElapsedTime; + tbb::parallel_for( + tbb::blocked_range(0, MTracepointInstances), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + record &r = MRecords[i % MTracepoints]; + uint64_t LookupIndex = r.lookup; + xpti::trace_event_data_t *Ev = Events[LookupIndex]; + xpti::framework::scoped_notify ev( + "xpti", (uint16_t)xpti::trace_point_type_t::region_begin, + nullptr, Ev, MInstanceID, nullptr); + } + }); + }); + } + ModelRow[(int)FWColumns::TPLookupAndNotify] = ElapsedTime; + for (auto cost : cb_handler_cost) { + // Amount of non-instrumentation based work that needs to be present for + // it to meet the overhead constraints requested + overhead_based_cost = (ElapsedTime + cost.second) * (100.0 / MOverhead); + ModelRow[(int)cost.first] = 1000000000 / overhead_based_cost; + } + } +} + +void TestPerformance::runInstrumentationTests() { + test::utils::TableModel Model; + + test::utils::titles_t Columns{ + "Threads", "TP LU+Notify(ns)", "TP Create(ns)", "Ev/s,cb=10", + "Ev/s,cb=100", "Ev/s,cb=500", "Ev/s,cb=1000", "Ev/s,cb=2000"}; + std::cout << std::setw(Columns.size() * 15 / 2) << "Framework Tests\n"; + Model.setHeaders(Columns); + uint8_t sid = xptiRegisterStream("xpti"); + test::registerCallbacks(sid); + + if (MThreads.size()) { + int RunNo = 0; + for (auto Thread : MThreads) { + runInstrumentationTestsThreads(RunNo++, Thread, Model); + } + } + + Model.print(); +} + +} // namespace performance +} // namespace test diff --git a/xptifw/basic_test/semantic_tests.cpp b/xptifw/basic_test/semantic_tests.cpp new file mode 100644 index 0000000000000..9dbff4130692d --- /dev/null +++ b/xptifw/basic_test/semantic_tests.cpp @@ -0,0 +1,512 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//----------------------- semantic_tests.cpp ------------------------------- +// Tests the correctness of the API by comparing it agains the spec and +// expected results. +//-------------------------------------------------------------------------- +#include "tbb/concurrent_vector.h" +#include "tbb/parallel_for.h" +#include "tbb/spin_mutex.h" +#include "tbb/task_arena.h" +#include "tbb/task_group.h" + +#include "cl_processor.hpp" +#include "xpti_trace_framework.h" + +#include +#include +#include + +static void tpCallback(uint16_t trace_type, xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, uint64_t instance, + const void *ud) {} + +namespace test { +void registerCallbacks(uint8_t sid) { + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::graph_create, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::node_create, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::edge_create, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::region_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::region_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::task_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::task_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::barrier_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::barrier_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::lock_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::lock_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::transfer_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::transfer_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::thread_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::thread_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::wait_begin, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::wait_end, + tpCallback); + xptiRegisterCallback(sid, (uint16_t)xpti::trace_point_type_t::signal, + tpCallback); +} +// The semantic namespace contains tests to determine the correctness of the +// implementation. The test ensure that the framework is robust under serial and +// multi-threaded conditions. +namespace semantic { +enum class STColumns { + Threads, + Insertions, + Lookups, + DuplicateInserts, + PassRate +}; + +enum class TPColumns { + Threads, + Insertions, + Lookups, + DuplicateInserts, + PayloadLookup, + PassRate +}; + +enum class NColumns { Threads, Notifications, PassRate }; + +void TestCorrectness::runStringTableTestThreads( + int RunNo, int NumThreads, test::utils::TableModel &Model) { + xptiReset(); + constexpr int NumStrings = 1000; + + if (!NumThreads) { + std::vector Strings; + std::vector IDs; + IDs.resize(NumStrings); + Strings.resize(NumStrings); + for (int i = 0; i < NumStrings; ++i) { + char *TableStrRef = nullptr; + std::string StrName = "Function" + std::to_string(i); + IDs[i] = xptiRegisterString(StrName.c_str(), &TableStrRef); + Strings[i] = TableStrRef; + } + auto &ModelRow = Model.addRow(RunNo, "Serial"); + ModelRow[(int)STColumns::Threads] = NumThreads; + ModelRow[(int)STColumns::Insertions] = (long double)Strings.size(); + int LookupCount = 0; + for (int i = 0; i < Strings.size(); ++i) { + const char *TableStrRef = xptiLookupString(IDs[i]); + if (TableStrRef == Strings[i]) + ++LookupCount; + } + ModelRow[(int)STColumns::Lookups] = LookupCount; + int DuplicateCount = 0; + for (int i = 0; i < Strings.size(); ++i) { + char *TableStrRef = nullptr; + std::string StrName = "Function" + std::to_string(i); + xpti::string_id_t id = xptiRegisterString(StrName.c_str(), &TableStrRef); + if (StrName == TableStrRef && id == IDs[i] && TableStrRef == Strings[i]) + ++DuplicateCount; + } + ModelRow[(int)STColumns::DuplicateInserts] = DuplicateCount; + ModelRow[(int)STColumns::PassRate] = + (double)(Strings.size() + LookupCount + DuplicateCount) / + (NumStrings * 3) * 100; + } else { + tbb::task_arena a(NumThreads); + + a.execute([&]() { + std::vector Strings; + std::vector IDs; + Strings.resize(NumStrings); + IDs.resize(NumStrings); + tbb::parallel_for( + tbb::blocked_range(0, NumStrings), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + char *TableStrRef = nullptr; + std::string StrName = "Function" + std::to_string(i); + IDs[i] = xptiRegisterString(StrName.c_str(), &TableStrRef); + Strings[i] = TableStrRef; + } + }); + + std::string RowTitle = "Threads " + std::to_string(NumThreads); + auto &ModelRow = Model.addRow(RunNo, RowTitle); + ModelRow[(int)STColumns::Threads] = NumThreads; + ModelRow[(int)STColumns::Insertions] = (long double)Strings.size(); + std::atomic LookupCount = {0}, DuplicateCount = {0}; + tbb::parallel_for(tbb::blocked_range(0, NumStrings), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + const char *TableStrRef = xptiLookupString(IDs[i]); + if (TableStrRef == Strings[i]) + ++LookupCount; + } + }); + ModelRow[(int)STColumns::Lookups] = LookupCount; + tbb::parallel_for(tbb::blocked_range(0, NumStrings), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + char *TableStrRef = nullptr; + std::string StrName = + "Function" + std::to_string(i); + xpti::string_id_t id = xptiRegisterString( + StrName.c_str(), &TableStrRef); + if (StrName == TableStrRef && id == IDs[i] && + TableStrRef == Strings[i]) + ++DuplicateCount; + } + }); + ModelRow[(int)STColumns::DuplicateInserts] = DuplicateCount; + + ModelRow[(int)STColumns::PassRate] = + (double)(Strings.size() + LookupCount + DuplicateCount) / + (NumStrings * 3) * 100; + }); + } +} + +void TestCorrectness::runStringTableTests() { + test::utils::TableModel Model; + + test::utils::titles_t Columns{"Threads", "Insert", "Lookup", "Duplicate", + "Pass rate"}; + std::cout << std::setw(25) << "String Table Tests\n"; + Model.setHeaders(Columns); + + if (MThreads.size()) { + int RunNo = 0; + for (auto Thread : MThreads) { + runStringTableTestThreads(RunNo++, Thread, Model); + } + } + + Model.print(); +} + +void TestCorrectness::runTracepointTestThreads(int RunNo, int NumThreads, + test::utils::TableModel &Model) { + xptiReset(); + constexpr int TracepointCount = 1000; + + if (!NumThreads) { + std::vector Payloads; + std::vector UIds; + std::vector Events; + Payloads.resize(TracepointCount); + UIds.resize(TracepointCount); + Events.resize(TracepointCount); + + for (uint64_t i = 0; i < TracepointCount; ++i) { + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, (int)i, + (int)(i % 80), (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[i] = Ev->unique_id; + Payloads[i] = Ev->reserved.payload; + Events[i] = Ev; + } + } + auto &ModelRow = Model.addRow(RunNo, "Serial"); + ModelRow[(int)TPColumns::Threads] = NumThreads; + ModelRow[(int)TPColumns::Insertions] = (long double)Events.size(); + + std::atomic LookupCount = {0}; + for (int i = 0; i < Events.size(); ++i) { + const xpti::trace_event_data_t *Ev = xptiFindEvent(UIds[i]); + if (Ev && Ev->unique_id == UIds[i]) + ++LookupCount; + } + ModelRow[(int)TPColumns::Lookups] = LookupCount; + std::atomic DuplicateCount = {0}; + std::atomic PayloadCount = {0}; + for (uint64_t i = 0; i < Events.size(); ++i) { + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = + xpti::payload_t(fn.c_str(), MSource, (int)i, (int)i % 80, (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + if (Ev->unique_id == UIds[i]) { + ++DuplicateCount; + } + xpti::payload_t *RP = Ev->reserved.payload; + if (Ev->unique_id == UIds[i] && RP && + std::string(RP->name) == std::string(P.name) && + std::string(RP->source_file) == std::string(P.source_file) && + RP->line_no == P.line_no && RP->column_no == P.column_no) + ++PayloadCount; + } + } + ModelRow[(int)TPColumns::DuplicateInserts] = DuplicateCount; + ModelRow[(int)TPColumns::PayloadLookup] = PayloadCount; + ModelRow[(int)TPColumns::PassRate] = + (double)(Events.size() + LookupCount + DuplicateCount + PayloadCount) / + (TracepointCount * 4) * 100; + } else { + tbb::task_arena a(NumThreads); + + a.execute([&]() { + std::vector Payloads; + std::vector UIds; + std::vector Events; + Payloads.resize(TracepointCount); + UIds.resize(TracepointCount); + Events.resize(TracepointCount); + + tbb::spin_mutex MLock; + tbb::parallel_for( + tbb::blocked_range(0, TracepointCount), + [&](tbb::blocked_range &r) { + for (uint64_t i = r.begin(); i != r.end(); ++i) { + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, (int)i, + (int)i % 80, (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[i] = Ev->unique_id; + Payloads[i] = Ev->reserved.payload; + Events[i] = Ev; + } + } + }); + + std::string RowTitle = "Threads " + std::to_string(NumThreads); + auto &ModelRow = Model.addRow(RunNo, RowTitle); + ModelRow[(int)TPColumns::Threads] = NumThreads; + ModelRow[(int)TPColumns::Insertions] = (long double)Events.size(); + std::atomic LookupCount = {0}, DuplicateCount = {0}, + PayloadCount = {0}; + tbb::parallel_for(tbb::blocked_range(0, TracepointCount), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + const xpti::trace_event_data_t *Ev = + xptiFindEvent(UIds[i]); + if (Ev && Ev->unique_id == UIds[i]) + LookupCount++; + } + }); + + ModelRow[(int)TPColumns::Lookups] = LookupCount; + tbb::parallel_for( + tbb::blocked_range(0, TracepointCount), + [&](tbb::blocked_range &r) { + for (uint64_t i = r.begin(); i != r.end(); ++i) { + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, (int)i, + (int)i % 80, (void *)i); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + if (Ev->unique_id == UIds[i]) { + ++DuplicateCount; + } + xpti::payload_t *RP = Ev->reserved.payload; + if (Ev->unique_id == UIds[i] && RP && + std::string(RP->name) == std::string(P.name) && + std::string(RP->source_file) == + std::string(P.source_file) && + RP->line_no == P.line_no && RP->column_no == P.column_no) + ++PayloadCount; + } + } + }); + ModelRow[(int)TPColumns::DuplicateInserts] = DuplicateCount; + ModelRow[(int)TPColumns::PayloadLookup] = PayloadCount; + ModelRow[(int)TPColumns::PassRate] = + (double)(Events.size() + LookupCount + DuplicateCount + + PayloadCount) / + (TracepointCount * 4) * 100; + }); + } +} + +void TestCorrectness::runTracepointTests() { + test::utils::TableModel Model; + + test::utils::titles_t Columns{"Threads", "Create", "Lookup", + "Duplicate", "Payload", "Pass rate"}; + std::cout << std::setw(25) << "Tracepoint Tests\n"; + Model.setHeaders(Columns); + + if (MThreads.size()) { + int RunNo = 0; + for (auto Thread : MThreads) { + runTracepointTestThreads(RunNo++, Thread, Model); + } + } + + Model.print(); +} + +void TestCorrectness::runNotificationTestThreads( + int RunNo, int NumThreads, test::utils::TableModel &Model) { + xptiReset(); + int TPCount = 30, CallbackCount = TPCount * 30; + std::vector Payloads; + std::vector UIds; + std::vector Events; + Payloads.resize(TPCount); + UIds.resize(TPCount); + Events.resize(TPCount); + + if (!NumThreads) { + + // assumes tp creation is thread safe + std::atomic NotifyCount = {0}; + for (uint64_t i = 0; i < TPCount; ++i) { + int Index = (int)i; + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, Index, + Index % 80, (void *)(i % 10)); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[Index] = Ev->unique_id; + Payloads[Index] = Ev->reserved.payload; + Events[Index] = Ev; + } + NotifyCount++; + } + + auto &ModelRow = Model.addRow(RunNo, "Serial"); + ModelRow[(int)NColumns::Threads] = NumThreads; + + for (int i = TPCount; i < CallbackCount; ++i) { + int Index = (int)i % TPCount; + void *Address = (void *)(Index % 10); + std::string fn = "Function" + std::to_string(Index); + xpti::payload_t P = xpti::payload_t(fn.c_str(), MSource, (int)Index, + (int)Index % 80, Address); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev && Ev->unique_id == UIds[Index]) { + uint8_t TP = (Index % 10) + 1; + uint16_t TPType = (uint16_t)(TP << 1); + xpti::framework::scoped_notify ev("xpti", TPType, nullptr, Ev, + MInstanceID, nullptr); + NotifyCount++; + } + } + uint64_t Acc = 0; + for (int i = 0; i < TPCount; ++i) { + Acc += Events[i]->instance_id; + } + + // Accumulator contains 'CallbackCount' number of + // instances that are invoked after creation, so + // each event has 101 instances * TPCount = 1010 + // + // total instances = CallbackCount + TPCount; + + ModelRow[(int)NColumns::Notifications] = (long double)Acc; + ModelRow[(int)NColumns::PassRate] = (long double)(Acc) / (NotifyCount)*100; + } else { + tbb::task_arena a(NumThreads); + + a.execute([&]() { + std::atomic NotifyCount = {0}; + tbb::spin_mutex MLock; + tbb::parallel_for( + tbb::blocked_range(0, TPCount), [&](tbb::blocked_range &r) { + for (uint64_t i = r.begin(); i != r.end(); ++i) { + int Index = (int)i; + std::string fn = "Function" + std::to_string(i); + xpti::payload_t P = + xpti::payload_t(fn.c_str(), MSource, (int)Index, + (int)Index % 80, (void *)(i % 10)); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev) { + UIds[Index] = Ev->unique_id; + Payloads[Index] = Ev->reserved.payload; + Events[Index] = Ev; + } + ++NotifyCount; + } + }); + + std::string RowTitle = "Threads " + std::to_string(NumThreads); + auto &ModelRow = Model.addRow(RunNo, RowTitle); + ModelRow[(int)NColumns::Threads] = NumThreads; + + tbb::parallel_for( + tbb::blocked_range(TPCount, CallbackCount), + [&](tbb::blocked_range &r) { + for (int i = r.begin(); i != r.end(); ++i) { + int Index = (int)i % TPCount; + void *Address = (void *)(Index % 10); + std::string fn = "Function" + std::to_string(Index); + xpti::payload_t P = xpti::payload_t( + fn.c_str(), MSource, (int)Index, (int)Index % 80, Address); + xpti::trace_event_data_t *Ev = xptiMakeEvent( + fn.c_str(), &P, (uint16_t)xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &MInstanceID); + if (Ev && Ev->unique_id == UIds[Index]) { + uint8_t TP = (Index % 10) + 1; + uint16_t TPType = (uint16_t)(TP << 1); + xpti::framework::scoped_notify ev("xpti", TPType, nullptr, Ev, + MInstanceID, nullptr); + NotifyCount++; + } + } + }); + + uint64_t Acc = 0; + for (int i = 0; i < TPCount; ++i) { + Acc += Events[i]->instance_id; + } + + ModelRow[(int)NColumns::Notifications] = (long double)Acc; + ModelRow[(int)NColumns::PassRate] = (double)(Acc) / (NotifyCount)*100; + }); + } +} + +void TestCorrectness::runNotificationTests() { + test::utils::TableModel Model; + + test::utils::titles_t Columns{"Threads", "Notify", "Pass rate"}; + std::cout << std::setw(25) << "Notification Tests\n"; + Model.setHeaders(Columns); + + uint8_t SID = xptiRegisterStream("xpti"); + // We do not need to register callback for correctness tests + + if (MThreads.size()) { + int RunNo = 0; + for (auto Thread : MThreads) { + runNotificationTestThreads(RunNo++, Thread, Model); + } + } + + Model.print(); +} + +} // namespace semantic +} // namespace test diff --git a/xptifw/doc/XPTI_Framework.md b/xptifw/doc/XPTI_Framework.md new file mode 100644 index 0000000000000..8964b5cf88d1c --- /dev/null +++ b/xptifw/doc/XPTI_Framework.md @@ -0,0 +1,950 @@ +# XPTI Tracing Framework + +- [XPTI Tracing Framework](#xpti-tracing-framework) + - [Overview](#overview) + - [Architecture](#architecture) + - [The Dispatcher](#the-dispatcher) + - [The Subscriber](#the-subscriber) + - [Using the Reference Dispatcher and Subscriber](#using-the-reference-dispatcher-and-subscriber) + - [Tracing Framework and Callback APIs](#tracing-framework-and-callback-apis) + - [Brief API Concepts](#brief-api-concepts) + - [`xptiInitialize`](#xptiinitialize) + - [`xptiFinalize`](#xptifinalize) + - [`xptiTraceEnabled`](#xptitraceenabled) + - [APIs and Data Structures Exported by the Tracing Framework](#apis-and-data-structures-exported-by-the-tracing-framework) + - [Trace Point Event](#trace-point-event) + - [Creating the Payload](#creating-the-payload) + - [Creating an Event that Represents the Trace Point](#creating-an-event-that-represents-the-trace-point) + - [`xptiRegisterUserDefinedTracePoint`](#xptiregisteruserdefinedtracepoint) + - [`xptiRegisterUserDefinedEventType`](#xptiregisteruserdefinedeventtype) + - [`xptiMakeEvent`](#xptimakeevent) + - [Notifying the registered listeners](#notifying-the-registered-listeners) + - [`xptiNotifySubscribers`](#xptinotifysubscribers) + - [Performance of the Framework](#performance-of-the-framework) + - [Modeling and projection](#modeling-and-projection) + - [Computing the cost incurred in the framework](#computing-the-cost-incurred-in-the-framework) + +## Overview + +In order to understand different parts of an application or library, the +ability to capture information about the application or library is needed. +Using such information, one can create meaningful representations such as +call-graphs, execution trace views etc. XPTI tracing framework is one such +framework that allows developers to instrument their code with XPTI API and +forward interesting or useful events during the application's execution, as +determined by the developer. + +The XPTI tracing framework is a lightweight tracing framework that provides a +simple API for instrumenting code, which allows developers to capture +relationships through nodes and edges and track the execution of the +aforementioned nodes and other functions that may be of interest. The API also +provides the ability to associate each trace point with end-user source code +information such as source file, function name and line number, for example. +The goal of this framework is to provide a low overhead solution that tools +can use to build performance analytical models. This document describes the +different components of this framework and a testing methodology to determine +the cost of using this framework in your applications. + +Current implementation of the framework uses std containers by default. There +is also an implementation that relies on the concurrent containers in +[Threading Building Blocks(TBB)](github.com/intel/tbb) and this can be enabled +by using the define `-DXPTI_USE_TBB` with `cmake`. The std container based +implementation is a thread-safe implementation, but has not been optimized for +performance. Increasing the number of threads accessing the framework will +increase the contention costs in the current implementation and may affect the +performance of the framework. + +To enable the build to use TBB for the framework and tests, use the commands as +shown below: + + ```bash + % cd xptifw + % cmake -DXPTI_ENABLE_TBB=ON -DXPTI_SOURCE_DIR=$SYCL_HOME/xpti ./ + ``` + +> **NOTE:** This document is best viewed with [Markdown Reader](https://chrome.google.com/webstore/detail/markdown-reader/gpoigdifkoadgajcincpilkjmejcaanc) +> plugin for Chrome or the [Markdown Preview Extension]() for Visual Studio Code. + +## Architecture + +The framework consists of a proxy library that is a static library for use +within your applications. However, the proxy library consists of stubs that +forward calls to a dynamic component, if available and if tracing has been +enabled. If tracing has not been enabled or the dynamic component not found at +run time, the stub implementations return immediately. + +The framework currently employs environment variables to determine if tracing +has been enabled and where to find the dynamic component. Both of these must be set for it to successfully dispatch event streams. + +1. The environment variable that indicates tracing has been enabled for the + run is defined by `XPTI_TRACE_ENABLE`. + + To enable tracing, the possible values are `XPTI_TRACE_ENABLE=1` or + `XPTI_TRACE_ENABLE=true` and to disable, the possible values are + `XPTI_TRACE_ENABLE=0` or `XPTI_TRACE_ENABLE=false`. + + Currently, if the variable is not defined, it is assumed to be `true`. + +2. The environment variable `XPTI_FRAMEWORK_DISPATCHER` points to the XPTI + dispatcher or the dynamic component that allows the static library to load + the shared object into memory and dispatch the event streams to subscribers + of the event streams. The dispatcher or the dynamic component manages the + subscribers to the event streams through an environment variable + `XPTI_SUBSCRIBERS`, but this variable has no bearing on the functionality + supported by the static library portion of the framework. + +![XPTI Architecture](xpti_arch.png) + +The above diagram describes the dependencies and/or the interactions between a +sample application or library that has been instrumented with XPTI API, the +XPTI dispatcher and the subscriber loaded by the dispatcher. All API calls +made by the application and the library go to the static library in this +diagram and if `XPTI_TRACE_ENABLE` is not enabled or if the path to the +dispatcher is not provided in the environment variable +`XPTI_FRAMEWORK_DISPATCHER`, then the calls to the static library +`xptiTraceEnabled()` return immediately. + +In the case both these variables are enabled, then the calls will be forwarded +to the dynamic library which will attempt to load the subscribers pointed to by +the environment variable `XPTI_SUBSCRIBERS`. The hypothetical trace data +captured by the subscriber is shown as well under the `Resulting trace data` +part of the above diagram. + +### The Dispatcher + +The dispatcher is a dynamic library that implements the XPTI API and is +dynamically loaded by the static proxy library, if the static library is used to +link with the application or library. Linking with the dynamic library instead +of the static library is also an option, however the dynamic library will now +have to be shipped with the application. Using the static library allows +instrumented applications or libraries to get around this problem. + +### The Subscriber + +A subscriber in XPTI is a shared object that is dynamically loaded by the +dispatcher. Events generated by an instrumented application or library are +forwarded to the subscriber by the dispatcher through a callback mechanism. The +ownership of the subscriber is controlled by tools or applications that consume +the generated event streams and **must** follow the protocol or handshake +defined for an event stream. + +There are three important things that a subscriber must implement to be +functional: (1) `xptiTraceInit`, (2) `xptiTraceFinish` and (3) callback +handlers. The `xptiTraceInit` and `xptiTraceFinish` API calls are used by the +dispatcher loading the subscriber dynamically to determine if the subscriber +is a valid subscriber. If these entry points are not present, then the +subscriber is not loaded. + +The `xptiTraceInit` callback is called by the dispatcher when the generator of +a new stream of data makes a call to `xptiInitialize` for the new stream. The +implementation of the `xptiTraceInit` function is where the subscriber would +follow the specification or protocol defined for the stream to subscribe to +events from various trace point types. The code snippet below shows an example +of such an implementation for the stream `"foo"`. + +```cpp +#include "xpti_data_types.h" + +XPTI_CALLBACK_API void xptiTraceInit +( + unsigned int major_version, ///< Major version + unsigned int minor_version, ///< Minor version + const char *version_str, ///< Version as a string + const char *stream_name ///< Stream name +) { + if (stream_name) { + // Only register callbacks if the major version is the + // expected version and the stream is the stream type + // you care about + if(std::string("foo") != stream_name && + major_version > 0 && major_version < 3) + return; + g_stream_id = xptiRegisterStream(stream_name); + xptiRegisterCallback(g_stream_id, graph_create, graph_create); + xptiRegisterCallback(g_stream_id, node_create, node_create); + xptiRegisterCallback(g_stream_id, edge_create, edge_create); + xptiRegisterCallback(g_stream_id, region_begin, algorithm_begin); + xptiRegisterCallback(g_stream_id, region_end, algorithm_end); + xptiRegisterCallback(g_stream_id, task_begin, trace_point_begin); + xptiRegisterCallback(g_stream_id, task_end, trace_point_end); + ... + } + else { + // handle the case when a bad stream name has been provided + } +} + +XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { + // Free any dynamically allocated memory for the stream + // and any additional cleanup operation + ... +} +``` + +The above code snippet shows the `xptiTraceFinish` call as well and this +function is used to clean up memory and any other data structures that were +allocated to handle the stream. The `xptiTraceFinish` call is made by the +dispatcher when the instrumented code is winding down a data stream by calling + `xptiFinalize` for the stream. + +The implementation of the callbacks is where attention needs to be given to the +handshake protocol or specification for a given stream the subscriber wants to +attach to and consume the data. The instrumented library may send additional +user data during the notification of each trace point and this data could be of +a different type for each trace point notification. + +```cpp +XPTI_CALLBACK_API void trace_point_begin( + xpti_trace_point_type_t trace_type, + xpti_trace_event_data_t *parent, + xpti_trace_event_data_t *event, + uint64_t instance, + const void *user_data) +{ + /// Capture time here + my_time_t begin = get_my_time(); + // Capture thread id or cpu or whatever else + my_device_t dev = get_my_device_instance(); + + /// If access to the payload is required, it can be done like this. + auto p = xptiQueryPayload(event); + + // Send the data off to some serializing buffer asynchronously + emit_data(begin, dev,...); +} + +``` + +For example, the specification for a given event stream, the trace point type +`graph_create` might send a data structure of a certain type as user_data and +`task_begin` might send a character string. Resolving the `user_data` parameter +requires strict conformance to the specification for the stream. + +In addition to the `user_data`, the unique id that describes the event is +available under `event->unique_id`. For most cases, this should be sufficient +to resolve a given event. However, in many cases, a particular event may be +exercised within a loop. Since a trace point event is based on the +instrumentation at a specific location in the code, the `unique_id` of this +will always remain the same. However, with each instance of this event, and +instance ID may be emitted that keeps track of the instance of this event. The +combined value of the `unique_id` and `instance_id` should always be unique. + +> **NOTE:** A subscriber **must** implement the `xptiTraceInit` and +> `xptiTraceFinish` APIs for the dispatcher to successfully load the subscriber. + +> **NOTE:** The specification for a given event stream **must** be consulted +> before implementing the callback handlers for various trace types. + +### Using the Reference Dispatcher and Subscriber + +The XPTI framework package provides a reference implementation of the XPTI +dispatcher and a sample subscriber that can be used to see what is being emitted +by any stream generated using XPTI. If you wish to skip the rest of the +document and inspect the generated stream, you can follow the steps outlined +below. + +1. **Build the XPTI framework dispatcher:** The instructions below show how to + build the library with standard containers. If you have access to TBB, you + can enable the macro `-DXPTI_USE_TBB` in the cmake command. + + ```bash + % cd xptifw + % cmake -DXPTI_SOURCE_DIR=$SYCL_HOME/xpti ./ + % make + ``` + + The binaries will be built and installed in `lib/Release`. These include the + dispatcher, a sample subscriber that prints the contents of the stream, the + unit test and a performance characterization application for the framework. + +2. **Run an instrumented SYCL application:** + To enable the dispatcher and subscriber, set the following environment + variables. The commands for enabling the environment variables are provided + for Linux environments in the example below: + + ```bash + % export XPTI_TRACE_ENABLE=1 + % export XPTI_FRAMEWORK_DISPATCHER=/path/to/libxptifw.so + % export XPTI_SUBSCRIBERS=/path/to/libbasic_collector.so + ``` + + You can now run a SYCL application that has been linked with a runtime that + supports the XPTI instrumentation and inspect the resulting stream. + +3. **Running the unit tests:** The unit tests included cover the exported API + and incorporate some correctness tests. + + ```bash + % /lib/Release/xpti_tests + ``` +4. **Understanding the throughput of the framework:** This document discusses + the performance of the framework in detail in the sections [Performance of the Framework](#performance-of-the-framework) and [Modeling and projection](#modeling-and-projection). For details on the command line arguments, + please refer to these sections. + + > **NOTE:** These tests rely on the availability of TBB for creating the + multi-threaded tests and will not be created if TBB has not been enabled + during the build process. + + ```bash + % /lib/Release/run_test --trace-points 1000 --type performance --overhead 1.5 --num-threads 0,1,2,3 --test-id 1,2 --tp-frequency 50 + ``` + + The above command will run the performance tests in which 1000 trace points + are created and each trace point visited twice. The trace point creation and + notification costs are measured in single thread and multi-threaded + scenarios and the output shows the throughput projection of the framework + using the events/sec metric at 1.5% overheads to the application runtime. + +## Tracing Framework and Callback APIs + +The current version of the instrumentation API adopts a model where traces are +generated in pairs for a give trace scope and a scoped class is made available +that assists developers instrumenting their code. The APIs are divided into two parts: (1) the public API exported by the tracing framework which are +implemented by the static library and the dispatcher and (2) the callback API +that tools will implement to create a subscriber. + +### Brief API Concepts + +The XPTI framework exports a small set of API functions that are a part of the +static library and dispatcher exports and deemed sufficient for the uses-cases +that have been considered so far. Since the primary goal is to gather execution +traces of compute elements in an application, the APIs address this scope for +now. Currently, they allow developers to capture relationship information as +nodes and edges in a graph, where the nodes represent a compute element or an +action with a latency associated with it and the edges represent the +dependencies between the compute elements which may be events or memory +objects. In addition to such relationship events, the API allows to you trace +arbitrary regions of code similar to conventional tracing methods. + +For each interesting trace point in an application, a notification can be sent +out by the framework. However, if there are no subscribers to consume this +notification event, the framework returns immediately. This allows developers +that want to instrument applications or run-times to limit the overheads +considerably. + +The API is documented in the file `xpti_trace_framework.h` that can be found +under `xpti/doc`. Some of the API functions and concepts that warrant +additional insight are discussed further. + +### `xptiInitialize` + +When instrumenting an application, developers can decide how many streams they +want to generate. In some cases, organizing trace data by streams may be +preferable. The `xptiInitialize` function facilitates the organization of data +streams by allowing each component that generates a stream of data to make a +call to `xptiInitialize`. The types of events captured by the instrumentation +and the protocol for the handshake between the trace generation for a given +stream must be defined for a given stream as a contract or specification. +This allows subscribers to rely on this specification to implement a tool that +can consume this data and do something useful with it. + +The `xptiIntialize` function reports to all the subscribers that a new stream +of data is about to be generated and the name of the stream along with some +version information of the stream is sent to the subscriber. + +The version information is primarily provided to ensure that subscribers to the +event stream can choose not to handle an event stream if it is an unsupported +version. The up-to-date documentation of the XPTI API is always maintained in +the `xpti_trace_framework.h` header file. The application that is instrumented +**must** attempt to send the initialization only once, but the subscriber must +be prepared to handle the case when multiple initialization callbacks occur for +a given stream. + +### `xptiFinalize` + +The application or library being instrumented to generate a stream of data must +attempt to finalize the stream by making this call. This allows the dispatcher +to notify all the subscribers that a stream is about to end. + +### `xptiTraceEnabled` + +To recap some of the discussion in the [Architecture](#architecture) section, +this API call returns `true` in the following situations: + +1. When `XPTI_TRACE_ENABLE` is not set, but the `XPTI_FRAMEWORK_DISPATCHER` +and `XPTI_SUBSCRIBERS` variables are set to valid libraries. This assumes that +a tool has been created and pointed to by `XPTI_SUBSCRIBERS` and the tool has +been linked against the dynamic component or dispatcher. In general, the +dynamic component or the dispatcher and the tool component or the subscriber +are owned by the tool attempting to listen to the instrumented stream of data. + +2. When using the static library for linking in the instrumented application +or library, this call returns `true` only if `XPTI_FRAMEWORK_DISPATCHER` is +set to a valid library and `XPTI_TRACE_ENABLE` is not set to `false`. + +### APIs and Data Structures Exported by the Tracing Framework + +We will begin our discussion by detailing the various public APIs that are +exported by the framework and when they are meant to be used. The framework API +is what will be used by developers instrumenting their code. The primary goal +of the API is to support the instrumentation of code that may or may not fall +into function boundaries. + +* First, the places in the code where instrumentation is warranted should be + identified. Each trace point is unique and will be associated with a + `payload` data structure that encapsulates: (1) a unique name, such as a + function or kernel name or something meaningful if the trace point marks a + section of code, (2) the source file it is located in, (3) the line number + where this interesting event occurs and (4) the column number of the + interesting event. Compilers such as `gcc`, `clang` and the Intel compilers + can generate all of this information easily through builtin functions. +* Secondly, an event must be created for this trace point region and this + process of creating an event will use the `payload` information and create + an event. If the payload has already been registered, then the previously + registered and associated event will be returned. This process will also + create a `unique_id` for the event. +* Thirdly, the scope of this trace point must be determined and for a given + scope, as in a related pair of events, the `unique_id` created at the + begin trace point **must** be preserved and used for the end trace point as + well. +* Finally, the callbacks registered for these types of events must be notified. + +### Trace Point Event + +The trace point event describes the event used to notify the subscriber and is +usually associated with a payload that describes the event. Since application +code is being instrumented with XPTI, the payload may consist of a `function` +`name`, `source file name` and `line number`, which forms a unique combination +of strings and numbers that is used to create the `unique_id` associated with +an event. Using the `event` or the `unique_id`, one should be able to query the +`payload` information. When a notification occurs for a trace point, the trace +point event and trace point type information is sent to the subscriber. A given +event may be used to notify subscribers as multiple trace point types. For +example, a node may represent a computational entity and an event created for +the node may be emitted as `node_create`, `task_begin` and `task_end` +notifications to record the creation, the beginning of the execution of an +instance of the node and when the execution of that instance has completed. + +#### Creating the Payload + +We will first look at the `xpti::trace_point_payload_t` data structure that +is defined in `xpti_data_types.h`. + +```cpp + +xpti::payload_t p("function1", "main.cpp", 104, 5, function1); + +``` + +The payload data structure can be created with a set of unique descriptors for +the region of code being instrumented, such as a function name, source file +name and line number, for example. However, it can also take in a function +name and pointer to the function or just a pointer to the function that +uniquely describes the payload that will be used to create a trace point +event. This information is used by the `xptiMakeEvent` function to create a +`unique_id` for the trace point event. + +The next section looks at using the payload information to create a trace point +event. Each trace point is unique, from a language or code section standpoint. +A trace point maybe visited multiple times, but the payload and the event +describing the trace point will always be the same. The tracing framework must +guarantee that when a trace point is visited, the same `unique_id` is +retrieved for it. For frequent visits to the same trace point site, we must be +able to look up the `unique_id` of the payload efficiently or we cache the +information at the trace point location. + +#### Creating an Event that Represents the Trace Point + +Once a payload structure has been created, it is used to associate the trace +point that this payload represents to an event that captures additional +information about the trace point. The framework has a list of predefined trace +point types that may be used to mark various trace points with an appropriate +type. They are declared in the header file `xpti_data_types.h` and are used to +describe the creation of a graph, node, edges or the instantiation of a node as +`task_begin` and `task_end` pair of trace point notifications. +These trace points represent the types of actions commonly associated with +instrumenting a library or application. However, in cases where there is no +direct mapping from the predefined trace point types to a language structure or +if one needs to describe an action that is orthogonal to code structure such as +diagnostic information for executing code, the framework allows each +instrumentation stream to extend the available trace point types with new trace +point types that map to these unsupported constructs. + +#### `xptiRegisterUserDefinedTracePoint` + +This API allows streams to extend the existing trace point types and generate +new types that map to the action about to be described by this API. The +description of the API is followed by a code example that shows the use of +this API. + +```cpp + uint16_t xptiRegisterUserDefinedTracePoint( + const char *vendor_name, + uint8_t user_defined_tp); +``` + +| Argument | Description | +| -------- | ----------- | +|`vendor_name` | The name of the tool or vendor implementing the tool that is describing this extension to the trace points. | +| `user_defined_tp`| The user defined trace point which is of type `uint16_t`. The 8 most significant bits of the 16-bit value encodes the `vendor_name` and 8 least significant bits are used to encode the extensions for this tool. A maximum of **128** new user defined trace points can be created per `vendor_name`. | + +The code example of extending the default or pre-defined trace point types is +shown below. As you can see in the example, the user defined trace point types +are initialized in the `enum` using the macros `XPTI_TRACE_POINT_BEGIN` and +`XPTI_TRACE_POINT_END` for the same trace point type `0`. By default, the +trace point types are designed to define the scope of the action be described. +This requires 1-bit to represent begin or end, which leaves the remaining +7-bits to describe 128 unique trace point extensions for a given `tool_name`. + +```cpp +typedef enum { + my_read_begin = XPTI_TRACE_POINT_BEGIN(0), + my_read_end = XPTI_TRACE_POINT_END(0), + my_allocate_begin = XPTI_TRACE_POINT_BEGIN(1), + my_allocate_end = XPTI_TRACE_POINT_END(1) +}tp_extension_t; +... +uint16_t tp1_start = xptiRegisterUserDefinedTracePoint("myTest", + my_read_begin); +uint16_t tp1_end = xptiRegisterUserDefinedTracePoint("myTest", + my_read_end); +uint16_t tp2_start = xptiRegisterUserDefinedTracePoint("myTest", + my_allocate_begin); +uint16_t tp2_end = xptiRegisterUserDefinedTracePoint("myTest", + my_allocate_end); +... +xptiNotifySubscribers(stream_id, tp1_start, parent, event, instance, + nullptr); +``` + +If the callback handler for this stream needs to know if this is an extension +or a predefined type, they can use the following macros to decipher the trace +point type. + +```cpp +uint8_t tool_vendor_id = XPTI_TOOL_ID(tp1_start); +uint8_t tp_type = XPTI_EXTRACT_USER_DEFINED_ID(tp1_start); + +if(tool_vendor_id == 0) { + // Default pre-defined type +} +else { + // User-defined trace type + // Here: tp_type will be tp_extension_t::my_read_begin +} +``` + +This mechanism allows different kinds of information to be captured and the +trace point type describes the type of information expected by the +notification. The trace point type is only used when notifying the subscribers +of an event with the trace point type acting as a qualifier for the event. + +#### `xptiRegisterUserDefinedEventType` + +This API allows streams to extend the existing trace point event types and +generate new types that map to the semantic description of the trace event +being created. The description of the API is followed by a code example that +shows the use of this API. + +```cpp + uint16_t xptiRegisterUserDefinedTracePoint( + const char *vendor_name, + uint8_t user_defined_event); +``` +| Argument | Description | +| -------- | ----------- | +|`vendor_name` | The name of the tool or vendor implementing the tool that is describing this extension to the event types. | +| `user_defined_event`| The user defined event which is of type `uint16_t`. The 8 most significant bits of the 16-bit value encodes the `vendor_name` and 8 least significant bits are used to encode the extensions for this tool. A maximum of **128** new user defined event types can be created per `vendor_name`. | + +Similar to trace point types, the `xpti::trace_event_type_t` can also be +extended. The events that are predefined by the framework fall under `{graph,` +`algorithm, barrier, scheduler, async, lock, offload_read, offload_write,` +`user_defined}`. Let's take the example of having to extend the event types to include a diagnostic category. + +```cpp +typedef enum { + my_diagnostic_A = XPTI_EVENT(0), + my_diagnostic_B = XPTI_EVENT(1) +} event_extension_t; +... +uint16_t my_ev1 = xptiRegisterUserDefinedEventType("myTest", my_diagnostic_A) +... +uint64_t InstanceNo; +MyEvent = xptiMakeEvent("application_foo", &Payload, + my_ev1, xpti::trace_activity_type_t::active, + &InstanceNo); +``` + +When this information is provided to the callback handlers in subscribers +through notifications, the handler can decide what it wants to do with the +extended types. If it is not designed to handle it, it can choose to ignore +the event. On the other hand, a subscriber that is designed to handle it must +conform to the specifications defined by the stream that is generating the +extended type events. + +```cpp +uint8_t tool_vendor_id = XPTI_TOOL_ID(tp1_start); +uint8_t ev_type = XPTI_EXTRACT_USER_DEFINED_ID(tp1_start); + +if(tool_vendor_id == 0) { + // Default pre-defined type +} +else { + // User-defined event type + // Here: tp_type will be event_extension_t::my_diagnostic_A +} +``` + +#### `xptiMakeEvent` + +The `xptiMakeEvent` combines the payload information with information about the trace point being defined to create an `xpti::trace_event_data_t`. + +```cpp + xpti::trace_event_data_t *xptiMakeEvent(const char *name, + xpti::payload_t *payload, uint16_t event, + xpti::trace_activity_type_t activity, + uint64_t *instance_no); +``` + +| Argument | Description | +| -------- | ----------- | +|`name` | Name of the event, which is typically a function or kernel name. | +| `payload`| The payload that this trace event represents. The payload in `XPTI` represents the source file, function name and line number, if available. If the source information is not available, it may contain a function name and code pointer virtual address or just the virtual address. This allows one to get the payload as meta-data for a given trace point. | +| `event` | The event type this trace point represents. It could be one of the predefined types or an extended type. | +| `activity` | Describes the activity type, as in active time or overhead time etc. | +| `instance_no` | If `xptiMakeEvent` is used each time this code location is visited to create or look up a previously created event, the `instance_no` parameter is incremented to indicate the instance ID of the current visit. | + +The created trace event data type is returned. In case the payload information +is the same, a previously created event is returned. If global `user_data` +needs to be specified for this trace event that may be used by tools, it can +be allocated and stored in `xpti::trace_event_data_t` structure under +`user_data`. + +The code sample below shows a sample code snippet that creates such an trace +point event using a payload and uses the created event to notify all +subscribers of the event qualified by a trace point type. + +```cpp +if ( xptiTraceEnabled() ) { + // example + uint64_t instance_no; + auto stream_id = xptiRegisterStream("myStream"); + xptiInitialize("myStream", 1, 0, "myStream 1.0"); + xpti::payload_t p("application_graph"); + auto event = xptiMakeEvent( "app", &p, + xpti::trace_event_type_t::graph, + xpti::trace_activity_type_t::active, + &instance_no); +} +... +if (event && xptiTraceEnabled()) { + // If the event has been created, then notify + // all subscribers about the graph creation + xptiNotifySubscribers(stream_id, + xpti::trace_point_type_t::graph_create, + nullptr, // no parent + event, + instance_no, + nullptr // no user data); +} + +``` + +#### Notifying the registered listeners + +As discussed in previous sections, creating a trace point is only one part of a trace point definition. The part that actually lets a tool know that such a trace event occurred is through a notification of the aforementioned event. In this section, we will describe the API and its use. + +#### `xptiNotifySubscribers` + +```cpp + xpti::result_t xptiNotifySubscribers(uint8_t stream_id, + uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, + const void *temporal_user_data); +``` + +| Argument | Description | +| -------- | ----------- | +|`stream_id` | The stream that this notification belongs to. The stream ID is obtained from `xptiRegisterStream`. | +| `trace_type`| The trace point type that describes the current notification. It could be one of the pre-defined types or a user-extension. | +|`parent`| A parent trace event, if present. | +|`event` | The current trace event for which the notification is being sent out. | +|`instance` | This value indicates the instance of the current trace event. If this is being used to monitor functions, this value should indicate the call count at that time. | +| `temporal_user_data` | This is a field that holds per instance user data and is valid for just that value of `instance`| + +The code example below shows an example 'C' code that is instrumented with the +framework API and this will generate traces for the functions in the program. +However, in this example, we use the helper scoped class provided by the +framework to emit notifications for the begin and end of the scope through +the `xpti::trace_point_type_t::task_begin` and +`xpti::trace_point_type_t::task_end` automatically. In this example, the per instance user data is not sent and the `scoped_notify` defaults that to `nullptr`. + +```cpp +void function1() { + uint64_t instance_id = 0; + xpt::trace_event_data_t event; + if (xptiTraceEnabled()) { + xpti::payload_t p("function1","main.cpp",104, 2,function1); + event = xptiMakeEvent("function1",&p, + xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, &instance_id); + } + xpti::framework::scoped_notify ev("myStream", + xpti::trace_point_type_t::region_begin, nullptr, &event, + instance_id); + for(int i = 0; i < 5; ++i ) { + function2(); + } +} +``` + +## Performance of the Framework + +In order to estimate the overheads one could experience by using the framework, +this section will outline a couple of scenarios. Some of the key operations +that would result in overheads are listed below. For each of these operations, +we will construct scenarios that will provide us with measurements to determine +how many events/sec we can process before the overheads start to become a +problem. We will use an overhead limit of 1% as this data will be used to +build an analytical model in the future. + +| Data structure | Operation | Description | +| -------------- | ------------- |------------ | +| String table | String insert | Every string that is exposed to the framework is registered with the string table and only the string IDs used | +|| String lookup | The lookup of a string in the string table can be implicit or explicit. For example, when a previously defined payload is submitted again, the strings present will be registered. However, there is an early exit if the string already exists in the table due to an implicit lookup. | +| Payload| Creation| A payload constructor may take in `function name`, `source file name`, line number, column number and an address or a `function name` and an address or just an address. Whenever strings are presented, they are first registered with the string table and the string IDs are used in the creation of a hash that is used to generate a `unique_id` for the event. | +| Trace point event | Metadata add| One can add arbitrary number of pairs to an event as strings. These strings will be registered with the string table and the metadata is maintained as a map of two string IDs. | +|| Metadata lookup | The metadata associated with a trace point event is looked up explicitly in the string table. | +|| Creation | Includes string table registration for the strings contained in the payload, the hash generation for a payload and insertion of the payload and event into a map.| +|| Framework lookup | If a trace point has already been created, the event can be looked up using its `unique_id`. | +|| Notification | Events that are created are notified to all subscribers| +|Subscriber | Callback handler | When a notification is received by the subscriber, the event is handled. The callback handlers must be efficient to minimize the overheads.| + +Using the operations described in the table above, a set of tests are designed +to evaluate the performance of these operations in near real-world scenarios. +The tests are present under `xptifw/basic_test` and report these results in +tabular form. The performance test in particular accepts various configuration +parameters from the command line. An example command is shown in the section +below. + +```bash +run_test --trace-points 1000 --type performance --overhead 1.5 --num-threads 0,1,2,3 --test-id 1,2 --tp-frequency 50 +``` + +The above command will run the test for 1000 trace points and compile the +performance measurements for the set of threads given as input with a maximum +overhead at 1.5% and for every trace point created, it will be visited twice. +A description of the command line arguments is provided in detail below: + +> **--type, -y [`required`]** +> - This flag takes in the type of tests that need to be run. The allowed +options are **[semantic, performance]**. +> - **semantic**: Runs semantic tests that test the correctness of the +framework operations and they are split into three separate tests. +> 1. Performs string table tests on a 1000 strings +> 2. Performs tests on trace point events by checking to see if the same +event is returned for the same payload and so on. +> 3. Performs notification tests to see if trace events are notified +correctly and record the instances of notifications per events. +> - **performance**: Runs performance tests on the provided input +configuration and these tests measure the cost of various operations used in +the framework. These tests are split into two separate tests. +> 1. Data structure tests that capture the average cost of string table +inserts, lookups, trace point event creation and lookup using the same +payload or `unique_id` for the event and notification. +> 2. Runs instrumentation tests and projects the number of events that can +be serviced per second using the configuration provided on the command line. +These tests are where the **--overhead** and **--tp-frequency** arguments are +used. + +> **--trace-points, -t [`required`]** +> - Number of trace point events to create and use for the test. The expected +range is **[10-100000]**. + +> **--test-id, -i [`required`]** +> - Takes in a list of tests that are comma separated and runs the requested +tests. This command line argument takes in a range as well and the format is +described below: +> 1. Comma separated sequence: --test-id 1,2,3 +> 2. Range description: --test-id 1:3:1 + +> **--num-threads, -n [`required`]** +> - Takes in a list of thread counts to use for the requested tests. +> 1. Comma separated sequence: --num-threads 0,1,2,4,8,12,16 +> 2. Range description: --num-threads 0:2:1,4:16:4 + +> **--tp-frequency, --f** +> - The trace point creation frequency basically allows the test to +determine the total number of trace point visits to perform for every trace +point event that is created. If the trace point creation frequency is 10%, +then every trace point event that is created must be visited 10 times. Since +we know how many trace point events were requested from the command line +(**--trace-points N**), we multiply this value (N) by 100/f where f = trace +point frequency in percent to get the total number of trace point visits. +> - So, if number of trace points is 5000 and trace point frequency is 10%, +the total number of trace point visits the test is going to perform is 5000 x +1/0.1 = 50000 + +>**--overhead** +> - The overhead input allows the test framework to use the measured +performance of the trace point creation and notification to come up with an +estimate of how many events can be serviced per second with the given +configuration. +> - The default overheads for which the events/sec are computed is **1%** +> - If the overheads desired is 1%, then the following formula is used to +> compute the events/sec: +>

total cost of instrumentation (I) = (cost of trace point +> creation + cost of notification)

+>

So, if --trace-points 5000 --tp-frequency 10, this will be:

+>

I = 5000xCost(TP Create) + 50000xCost(Notify)

+>

Average cost (A) = I/50000, for 50000 events notified

+>

This cost A does not take into account the cost of the callback +> handler. In our projections, we use a handler cost of 10ns, 100ns and +> 500ns to get the events/sec that can be serviced. On an average, the +> handler costs for real-world cases will be somewhere between 80ns-400ns. +>

So, if the average cost is A and this is 1% overhead, the total run +> time must be 100xA ns

+>

Events/second E = 1000,000,000 ns/(100xA)ns

+> + +Using the metrics described above, we run the tests with varying overheads and +trace point creation frequencies to determine the maximum number of events +that can be serviced for that configuration. Some sample configurations are +shown below: + +- Configuration where each trace point event created is only visited **once** + ```bash + run_test --trace-points 5000 --type performance --num-threads 0,1,2,4 --test-id 1,2 --tp-frequency 100 + ``` +- Configuration where each trace point event is visited **twice** + ```bash + run_test --trace-points 5000 --type performance --num-threads 0,1,2,4 --test-id 1,2 --tp-frequency 50 + ``` +- Configuration where each trace point event is visited **ten** times + ```bash + run_test --trace-points 5000 --type performance --num-threads 0,1,2,4 --test-id 1,2 --tp-frequency 10 + ``` + +## Modeling and projection + +In order to determine the number of events that the framework can service in a +second, the performance tests use the following approach. If the total +instrumentation cost is 1µs and for this cost to be under 1% total +overhead, the amount of work that needs to be accomplished for every trace +event would be 1µs x 100 = 100µs. In this case, the maximum number +of events that can be notified/serviced would be: + + 1 sec/100µs = 1000000µs/100µs = 10000 events/sec + +The total instrumentation cost would include *some of the time in the +infrastructure in the framework* and the *cost of handling each notification +through callbacks* in the subscriber. + +### Computing the cost incurred in the framework + +On an average, some trace points are visited only once and others 10s-100s of +times. We assume that each trace point created will be visited at least 10 +times. The command line arguments that would test such a configuration is +shown below. + + ```bash + run_test --trace-points 10000 --type performance --num-threads 0,1,2,4 --test-id 1,2 --tp-frequency 10 + +We take average cost of a trace point event creation and the cost of 10 +notifications for each such event as it is visited 10 times to form the basis +of the cost incurred within the framework. This information is reported by +the performance test. The total instrumentation cost as discussed in the +previous section comprises of a framework cost and a callback handler cost in +the subscriber. + +Framework cost **FW*****cost*** = Avg{TP*create* + 10 x +TP*notify*} + +Subscriber cost **Callback*****cost*** = **C*t*** which +could be anywhere in the range [10-10000]ns + +Total cost **Cost*****total*** = **FW*****cost*** + +**C*t*** + +Using the information from the report or one such instance captured in the +table above, we know that: + +**FW*****cost*** = ~55ns + +Using different values for **C*t*** = [10, 100, 500, 1000]ns, we +get the table that shows the events/sec that can be serviced for total +instrumentation cost for the configuration. It can be noticed that as the +callback handler costs increase, the events/sec is inversely proportional to +the callback handler costs. The work unit cost for determining the number of +events/sec is given by: + +**W*****cost*** = **100** x [**FW*****cost*** + +**C*t***] for the configuration that limits overheads to 1%. + +The more times a trace point event is visited, the more events per second can +be serviced by the framework as the cost of a trace point event creation can +be amortized over all the visits to the same trace point. However, +**C*t*** will eventually limit the events/sec when they get to be +significantly larger than **FW*****cost***. + +> **NOTE:** All measurements reported in this document were measured on an NUC +> form-factor machine with Intel® Core™ i7-8559U @ 2.7 GHz processor +> running Ubuntu 18.04. The tests were compiled to use Threading Building +> Blocks concurrent containers for these runs. + +| Operation | Statistic | Scenario |Count| Framework Cost(ns) | +|-----------|-----------|----------|-----|------| +| String table insertion| Cost/insertion| Create a large number of strings (>1000) and insert them into the table. Measure the average cost of multi-threaded insertion.|10000|~**150-500**ns| +|String table lookup| Cost/lookup| Look up the strings added in the insertion test in random order and measure the average cost of the lookup.|20000| ~**40**ns| +|String table insert/lookup| Cost of insert/lookup | Strings that are added to the string table may be looked up multiple times. On an average, we assume that ever string added to the string table is looked up twice. If strings are looked up more often than the twice assumed in this test, then the average cost of insertion/lookup will be lower.|30000|~**130**ns| +| Trace point creation | Cost/creation| Create unique trace points and measure the average cost for each creation. |10000|~**1100**ns| +| Trace point creation | Cost/creation| Attempt to create previously created trace points again and measure the average cost for each creation. Since the payloads will be the same, there should be an early exit after it is determined that they are the same.|100000|~**275**ns| +| Trace point lookup using `unique_id`(FW lookup)| Cost/lookup| This time will purely be the cost of lookup of finding the trace point event, given its `unique_id`. |100000|~**35**ns| +|Trace point event caching| Cost/lookup| If the trace point event is cached at the event site, then this cost is 0. This is the most efficient mode of using it and amortizes the cost od trace point event creation the best.|100000|~**0**ns| +| Trace event notification| Cost/notification| Measure the average cost of notification. Here the the callback handler registered will return immediately. The callback handler overheads are modeled separately when the maximum number of events that can be serviced per sec are computed.|100000|~**10**ns| +|Trace event composite cost [**FW*****cost***]|Average cost/trace point| Create N unique trace points and MxN trace point lookups + MxN notifications. Measure the total time and get the average using MxN as the denominator.|100000|~**55**ns| + +> **NOTE:** The trace point, as implemented in the code block in the previous +> section can lead to significant runtime overheads as the framework has to +> lookup the `unique_id` of the payload provided with the trace point and this +> lookup can be costly. It is recommended that events created for each trace +> point are cached locally for subsequent use. An example implementation of +> this optimization is is shown in the code block below using a static +> variable, for example. If the instrumentation is a part of a class, then the +> event can be saved as a member variable. + +```c++ +void function1() { + uint64_t instance_id = 0; + static xpti::trace_event_data_t *f1_event = nullptr; + // Only create the event if it hasn't already been created. + // When the data structure f1_event is initialized, the + // unique id is set to invalid_id. + if (xptiTraceEnabled() && !f1_event) { + xpti::payload_t p("function1","main.cpp",104, 2,function1); + f1_event = xptiMakeEvent("function1", &p, + xpti::trace_event_type_t::algorithm, + xpti::trace_activity_type_t::active, + &instance_id); + } + xpti::framework::scoped_notify ev("myStream", + xpti::trace_point_type_t::region_begin, + nullptr, f1_event, instance_id); + for(int i = 0; i < 5; ++i ) { + function2(); + } +} +``` + +> **NOTE:** Using the framework cost, **FW*****cost*** = ~55ns, and +> a set of callback handler costs, we get the following table that shows the +> events/sec that can be handled by XPTI for a given configuration. + +| Trace points | Threads | Overhead|Events/sec @**C*t***=10ns | Events/sec @ **C*t***=100ns | Events/sec @ **C*t***=500ns| Events/sec @ **C*t***=1000 +| ------------- | -------- | ------- | ------------ |------| -----|---| +|10000 | Serial | 1% | ~150K | ~64K | ~18K | ~9.5K| +| | 4 | 1% | ~150K | ~64K | ~18K |~9.5K| +| | Serial | 2% | ~300K | ~127K | ~36K|~19K| +| | 4 | 2% | ~290K | ~125K | ~36K|~19K| +|1000 | Serial | 1% | ~165K | ~66K | ~18K | ~9.5K| +| | 4 | 1% | ~165K | ~66K | ~18K |~9.5K| +| | Serial | 2% | ~360K | ~137K | ~37K|~19K| +| | 4 | 2% | ~345K | ~135K | ~37K|~19K| + +The above data from the table is only provided as a guideline on what to +expect with the instrumentation. The events/second can be quite high if the +callback handlers are written efficiently. So the range of events that can be +serviced can be ~300,000 to 10000 per second depending on the cost of handling +the callbacks. diff --git a/xptifw/doc/xpti_arch.png b/xptifw/doc/xpti_arch.png new file mode 100644 index 0000000000000..9855a33b4b6ba Binary files /dev/null and b/xptifw/doc/xpti_arch.png differ diff --git a/xptifw/include/xpti_int64_hash_table.hpp b/xptifw/include/xpti_int64_hash_table.hpp new file mode 100644 index 0000000000000..909348753cac6 --- /dev/null +++ b/xptifw/include/xpti_int64_hash_table.hpp @@ -0,0 +1,280 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#pragma once +#include "xpti_data_types.h" + +#include +#include +#include + +#ifdef XPTI_STATISTICS +#include +#endif + +#ifdef XPTI_USE_TBB +#include +#include + +namespace xpti { +/// \brief A class for mapping one 64-bit value to another 64-bit value +/// \details With each payload, a kernel/function name and the source file name +/// may be passed and we need to ensure that the payload can be cached in a hash +/// map that maps a unique value from the payload to a universal ID. We could +/// use the payload hash for this purpose, but the numbers are non-monotonic and +/// can be harder to debug. This implementation of the hash table uses Threading +/// Building Blocks concurrent containers for multi-threaded efficiency. +class Hash64x64Table { +public: + using ht_lut_t = tbb::concurrent_hash_map; + + Hash64x64Table(int size = 1024) + : MForward(size), MReverse(size), MTableSize(size) { +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + ~Hash64x64Table() { + MForward.clear(); + MReverse.clear(); + } + + // Clear all the contents of this hash table and get it ready for re-use + void clear() { + MForward.clear(); + MReverse.clear(); + MForward.rehash(MTableSize); + MReverse.rehash(MTableSize); +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // Check to see if a particular Key is already present in the table; + // + // On success, the value for the Key will be returned. If not, + // xpti::invalid_id will be returned. + int64_t find(int64_t Key) { + // Try to read it, if already present + ht_lut_t::const_accessor e; + if (MForward.find(e, Key)) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return e->second; // We found it, so we return the value + } else + return xpti::invalid_id; + } + + // Add a pair to the hash table. If the Key already exists, this + // call returns even if the value happens to be different this time. + // + // If the Key does not exist, then the Key is inserted into the hash map and + // the reverse lookup populated with the pair. + void add(int64_t Key, int64_t Value) { + // Try to read it, if already present + ht_lut_t::const_accessor e; + if (MForward.find(e, Key)) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + } else { // Multiple threads could fall through here + // Release the reader lock held; + e.release(); + { + // Employ a double-check pattern here + tbb::spin_mutex::scoped_lock dc(MMutex); + ht_lut_t::accessor f; + if (MForward.insert(f, Key)) { + // The Key does not exist, so we will add the Key-Value pair to the + // hash map + f->second = Value; +#ifdef XPTI_STATISTICS + MInsertions++; +#endif + // When we insert a new entry into the table, we also need to build + // the reverse lookup; + { + ht_lut_t::accessor r; + if (MReverse.insert(r, Value)) { + // An entry does not exist, so we will add it to the reverse + // lookup. + r->second = Key; + f.release(); + r.release(); + } + } + } + // else, we do not add the Key-Value pair as the Key already exists in + // the table! + } + } + } + + // The reverse query allows one to get the Value from the Key that may have + // been cached somewhere. + int64_t reverseFind(int64_t Value) { + ht_lut_t::const_accessor e; + if (MReverse.find(e, Value)) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return e->second; + } else + return xpti::invalid_id; + } + + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("Hash table inserts : [%llu]\n", MInsertions.load()); + printf("Hash table lookups : [%llu]\n", MRetrievals.load()); +#endif + } + +private: + ht_lut_t MForward; ///< Forward lookup hash map + ht_lut_t MReverse; ///< Reverse lookup hash map + int32_t MTableSize; ///< Initial size of the hash map + tbb::spin_mutex + MMutex; ///< Mutex required to implement a double-check pattern +#ifdef XPTI_STATISTICS + safe_uint64_t MInsertions, ///< Thread-safe tracking of insertions + MRetrievals; ///< Thread-safe tracking of lookups +#endif +}; + +#else +namespace xpti { +/// \brief A class for mapping one 64-bit value to another 64-bit value +/// \details With each payload, a kernel/function name and the source file name +/// may be passed and we need to ensure that the payload can be cached in a hash +/// map that maps a unique value from the payload to a universal ID. We could +/// use the payload hash for this purpose, but the numbers are non-monotonic and +/// can be harder to debug. This implementation of the hash table uses std +/// library containers. +class Hash64x64Table { +public: + using ht_lut_t = std::unordered_map; + + Hash64x64Table(int size = 1024) + : MForward(size), MReverse(size), MTableSize(size) { +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + ~Hash64x64Table() { + MForward.clear(); + MReverse.clear(); + } + + // Clear all the contents of this hash table and get it ready for re-use + void clear() { + MForward.clear(); + MReverse.clear(); +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // Check to see if a particular Key is already present in the table; + // + // On success, the Value for the Key will be returned. If not, + // xpti::invalid_id will be returned. + int64_t find(int64_t Key) { + std::lock_guard Lock(MMutex); + // Try to read it, if already present + auto KeyLoc = MForward.find(Key); + if (KeyLoc != MForward.end()) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return KeyLoc->second; // We found it, so we return the Value + } else + return xpti::invalid_id; + } + + // Add a pair to the hash table. If the Key already exists, this + // call returns even if the Value happens to be different this time. + // + // If the Key does not exist, then the Key is inserted into the hash map and + // the reverse lookup populated with the pair. + void add(int64_t Key, int64_t Value) { + // Try to read it, if already present + auto KeyLoc = MForward.find(Key); + if (KeyLoc != MForward.end()) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + } else { // Multiple threads could fall through here + { + // Employ a double-check pattern here + std::lock_guard Lock(MMutex); + auto KeyLoc = MForward.find(Key); + if (KeyLoc == MForward.end()) { + // The Key does not exist, so we will add the Key-Value pair to the + // hash map + MForward[Key] = Value; + KeyLoc = MForward.find(Key); +#ifdef XPTI_STATISTICS + MInsertions++; +#endif + // When we insert a new entry into the table, we also need to build + // the reverse lookup; + { + auto ValLoc = MReverse.find(Value); + if (ValLoc == MReverse.end()) { + // An entry does not exist, so we will add it to the reverse + // lookup. + MReverse[Value] = Key; + } else { + MForward.erase(KeyLoc); + } + } + } + // else, we do not add the Key-Value pair as the Key already exists in + // the table! + } + } + } + + // The reverse query allows one to get the Value from the Key that may have + // been cached somewhere. + int64_t reverseFind(int64_t Value) { + std::lock_guard Lock(MMutex); + auto ValLoc = MReverse.find(Value); + if (ValLoc != MReverse.end()) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return ValLoc->second; + } else + return xpti::invalid_id; + } + + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("Hash table inserts : [%llu]\n", MInsertions.load()); + printf("Hash table lookups : [%llu]\n", MRetrievals.load()); +#endif + } + +private: + ht_lut_t MForward; ///< Forward lookup hash map + ht_lut_t MReverse; ///< Reverse lookup hash map + int32_t MTableSize; ///< Initial size of the hash map + std::mutex MMutex; ///< Mutex required to implement a double-check pattern +#ifdef XPTI_STATISTICS + safe_uint64_t MInsertions, ///< Thread-safe tracking of insertions + MRetrievals; ///< Thread-safe tracking of lookups +#endif +}; +#endif +} // namespace xpti diff --git a/xptifw/include/xpti_string_table.hpp b/xptifw/include/xpti_string_table.hpp new file mode 100644 index 0000000000000..03bdb08722822 --- /dev/null +++ b/xptifw/include/xpti_string_table.hpp @@ -0,0 +1,343 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#pragma once +#include "xpti_data_types.h" + +#include +#include +#include + +#ifdef XPTI_STATISTICS +#include +#endif + +#ifdef XPTI_USE_TBB + +#include +#include + +#pragma message("Using TBB concurrent containers...") +namespace xpti { +/// \brief A string table class to support the payload handling +/// \details With each payload, a kernel/function name and the source file name +/// may be passed and we need to ensure that the incoming strings are copied and +/// represented in a string table as the incoming strings are guaranteed to be +/// valid only for the duration of the call that handles the payload. This +/// implementation used Threading Building Blocks concurrent containers. +class StringTable { +public: + using st_forward_t = tbb::concurrent_hash_map; + using st_reverse_t = tbb::concurrent_hash_map; + + StringTable(int size = 4096) + : MStringToID(size), MIDToString(size), MTableSize(size) { + MIds = 1; +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // Clear all the contents of this string table and get it ready for re-use + void clear() { + MIds = {1}; + MIDToString.clear(); + MStringToID.clear(); + + MIDToString.rehash(MTableSize); + MStringToID.rehash(MTableSize); +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // If the string being added to the string table is empty or invalid, then + // the returned string id = invalid_id; + // + // On success, the string will be inserted into two tables - one that maps + // string to string ID and another that maps from string ID to string. If a + // reference string pointer is made available, then the address of the string + // in the string table is returned through the default argument + xpti::string_id_t add(const char *str, const char **ref_str = nullptr) { + if (!str) + return xpti::invalid_id; + + std::string LocalStr = str; + return add(LocalStr, ref_str); + } + + xpti::string_id_t add(std::string str, const char **ref_str = nullptr) { + if (str.empty()) + return xpti::invalid_id; + + // Try to see if the string is already present in the string table + st_forward_t::const_accessor e; + if (MStringToID.find(e, str)) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + if (ref_str) + *ref_str = e->first.c_str(); + + // We found it, so we return the string ID + return e->second; + } else { + // Multiple threads could fall through here Release the reader lock held + e.release(); + string_id_t id; + { + // Employ a double-check pattern here + tbb::spin_mutex::scoped_lock dc(MMutex); + st_forward_t::accessor f; + if (MStringToID.insert(f, str)) { + // If the string does not exist, then insert() returns true. Here we + // create an ID for it + id = MIds++; + f->second = id; +#ifdef XPTI_STATISTICS + MInsertions++; +#endif + // When we insert a new entry into the table, we also need to build + // the reverse lookup; + { + st_reverse_t::accessor r; + if (MIDToString.insert(r, id)) { + // An entry does not exist, so we will add it to the reverse + // lookup. + r->second = f->first.c_str(); + // Cache the saved string address and send it to the caller + if (ref_str) + *ref_str = r->second; + f.release(); + r.release(); + MStrings++; + return id; + } else { + // We cannot have a case where a string is not present in the + // forward lookup and present in the reverse lookup + MStringToID.erase(f); + if (ref_str) + *ref_str = nullptr; + + return xpti::invalid_id; + } + } + + } else { + // The string has already been added, so we return the stored ID + id = f->second; +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + if (ref_str) + *ref_str = f->first.c_str(); + return id; + } + // Both the accessor and MMutex will be released here! + } + } + return xpti::invalid_id; + } + + // The reverse query allows one to get the string from the string_id_t that + // may have been cached somewhere. + const char *query(xpti::string_id_t id) { + st_reverse_t::const_accessor e; + if (MIDToString.find(e, id)) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return e->second; + } else + return nullptr; + } + + int32_t count() { return (int32_t)MStrings; } + + const st_reverse_t &table() { return MIDToString; } + + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("String table inserts: [%llu]\n", MInsertions.load()); + printf("String table lookups: [%llu]\n", MRetrievals.load()); +#endif + } + +private: + safe_int32_t MIds; ///< Thread-safe ID generator + st_forward_t MStringToID; ///< Forward lookup hash map + st_reverse_t MIDToString; ///< Reverse lookup hash map + int32_t MTableSize; ///< Initial table size of the hash-map + tbb::spin_mutex MMutex; ///< Mutex required for double-check pattern + safe_int32_t MStrings; ///< The count of strings in the table +#ifdef XPTI_STATISTICS + safe_uint64_t MInsertions, ///< Thread-safe tracking of insertions + MRetrievals; ///< Thread-safe tracking of lookups +#endif +}; +} // namespace xpti +#else // Non-TBB implementation follows + +namespace xpti { +/// \brief A string table class to support the payload handling +/// \details With each payload, a kernel/function name and the source file name +/// may be passed and we need to ensure that the incoming strings are copied and +/// represented in a string table as the incoming strings are guaranteed to be +/// valid only for the duration of the call that handles the payload. This +/// implementation used STL containers protected with std::mutex. +class StringTable { +public: + using st_forward_t = std::unordered_map; + using st_reverse_t = std::unordered_map; + + StringTable(int size = 4096) + : MStringToID(size), MIDToString(size), MTableSize(size) { + MIds = 1; +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // Clear all the contents of this string table and get it ready for re-use + void clear() { + MIds = {1}; + MIDToString.clear(); + MStringToID.clear(); + +#ifdef XPTI_STATISTICS + MInsertions = 0; + MRetrievals = 0; +#endif + } + + // If the string being added to the string table is empty or invalid, then + // the returned string id = invalid_id; + // + // On success, the string will be inserted into two tables - one that maps + // string to string ID and another that maps from string ID to string. If a + // reference string pointer is made available, then the address of the string + // in the string table is returned through the default argument + xpti::string_id_t add(const char *str, const char **ref_str = nullptr) { + if (!str) + return xpti::invalid_id; + + std::string LocalStr = str; + return add(LocalStr, ref_str); + } + + xpti::string_id_t add(std::string str, const char **ref_str = nullptr) { + if (str.empty()) + return xpti::invalid_id; + + // Try to see if the string is already present in the string table + auto Loc = MStringToID.find(str); + if (Loc != MStringToID.end()) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + if (ref_str) + *ref_str = Loc->first.c_str(); + + // We found it, so we return the string ID + return Loc->second; + } else { + // String not in the table + // Multiple threads could fall through here + string_id_t StrID; + { + // Employ a double-check pattern here + std::lock_guard lock(MMutex); + auto Loc = MStringToID.find(str); + // String not present in the table + if (Loc == MStringToID.end()) { + // Add it + StrID = MIds++; + auto Entry = MStringToID.insert(st_forward_t::value_type(str, StrID)); + if (ref_str) + *ref_str = Entry.first->first.c_str(); +#ifdef XPTI_STATISTICS + MInsertions++; +#endif + // When we insert a new entry into the table, we also need to build + // the reverse lookup; + { + auto IDLoc = MIDToString.find(StrID); + if (IDLoc == MIDToString.end()) { + // An entry does not exist, so we will add it to the reverse + // lookup. + MIDToString[StrID] = Entry.first->first.c_str(); + // Cache the saved string address and send it to the caller + MStrings++; + return StrID; + } else { + // We cannot have a case where a string is not present in the + // forward lookup and present in the reverse lookup + MStringToID.erase(Loc); + if (ref_str) + *ref_str = nullptr; + + return xpti::invalid_id; + } + } + + } else { + // The string has already been added, so we return the stored ID + StrID = Loc->second; +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + if (ref_str) + *ref_str = Loc->first.c_str(); + return StrID; + } + // The MMutex will be released here! + } + } + return xpti::invalid_id; + } + + // The reverse query allows one to get the string from the string_id_t that + // may have been cached somewhere. + const char *query(xpti::string_id_t id) { + std::lock_guard lock(MMutex); + auto Loc = MIDToString.find(id); + if (Loc != MIDToString.end()) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif + return Loc->second; + } else + return nullptr; + } + + int32_t count() { return (int32_t)MStrings; } + + const st_reverse_t &table() { return MIDToString; } + + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("String table inserts: [%llu]\n", MInsertions.load()); + printf("String table lookups: [%llu]\n", MRetrievals.load()); +#endif + } + +private: + safe_int32_t MIds; ///< Thread-safe ID generator + st_forward_t MStringToID; ///< Forward lookup hash map + st_reverse_t MIDToString; ///< Reverse lookup hash map + int32_t MTableSize; ///< Initial table size of the hash-map + std::mutex MMutex; ///< Mutex required for double-check pattern + ///< Replace with reader-writer lock in C++14 + safe_int32_t MStrings; ///< The count of strings in the table +#ifdef XPTI_STATISTICS + safe_uint64_t MInsertions, ///< Thread-safe tracking of insertions + MRetrievals; ///< Thread-safe tracking of lookups +#endif +}; +} // namespace xpti +#endif diff --git a/xptifw/samples/basic_collector/CMakeLists.txt b/xptifw/samples/basic_collector/CMakeLists.txt new file mode 100644 index 0000000000000..735ed7deed27c --- /dev/null +++ b/xptifw/samples/basic_collector/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 2.8.9) +project (basic_collector) + +file(GLOB SOURCES *.cpp) +include_directories(${XPTIFW_DIR}/include) +include_directories(${XPTI_DIR}/include) +include_directories(${XPTIFW_DIR}/samples/include) + +remove_definitions(-DXPTI_STATIC_LIBRARY) +add_definitions(-DXPTI_API_EXPORTS) +add_library(basic_collector SHARED ${SOURCES}) +add_dependencies(basic_collector xptifw) +target_link_libraries(basic_collector PRIVATE xptifw) +if(UNIX) + target_link_libraries(basic_collector PRIVATE dl) +endif() + +if (XPTI_ENABLE_TBB) + target_link_libraries(basic_collector PRIVATE tbb) +endif() +# Set the location of the library installation +install(TARGETS basic_collector DESTINATION ${CMAKE_BINARY_DIR}) diff --git a/xptifw/samples/basic_collector/README.md b/xptifw/samples/basic_collector/README.md new file mode 100644 index 0000000000000..2040db580b640 --- /dev/null +++ b/xptifw/samples/basic_collector/README.md @@ -0,0 +1,26 @@ +# Basic collector + +The basic collector demonstrates the creation of a subscriber and prints of the +data received from various streams. In order to obtain the data from an application instrumented with XPTI, the following steps must be performed. + +1. Set the environment variable that indicates that tracing has been enabled. + + This is defined by the variable `XPTI_TRACE_ENABLE`. The possible + values taken by this environment variable are: + + To enable: `XPTI_TRACE_ENABLE=1` or `XPTI_TRACE_ENABLE=true` + + To disable: `XPTI_TRACE_ENABLE=0` or `XPTI_TRACE_ENABLE=false` + +2. Set the environment variable that points to the XPTI framework dispatcher so + the stub library can dynamically load it and dispatch the calls to the + dispatcher. + `XPTI_FRAMEWORK_DISPATCHER=/path/to/libxptifw.[so,dll,dylib]` + +3. Set the environment variable that points to the subscriber, which in this + case is `libbasic_collector.[so,dll,dylib]`. + + `XPTI_SUBSCRIBERS=/path/to/libbasic_collector.[so,dll,dylib]` + +For more detail on the framework, the tests that are provided and their usage, +please consult the [XPTI Framework library documentation](doc/XPTI_Framework.md). diff --git a/xptifw/samples/basic_collector/basic_collector.cpp b/xptifw/samples/basic_collector/basic_collector.cpp new file mode 100644 index 0000000000000..f8d455d26cb73 --- /dev/null +++ b/xptifw/samples/basic_collector/basic_collector.cpp @@ -0,0 +1,184 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +#include "xpti_timers.hpp" +#include "xpti_trace_framework.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static uint8_t GStreamID = 0; +std::mutex GIOMutex; +xpti::ThreadID GThreadIDEnum; + +static const char *TPTypes[] = { + "unknown", "graph_create", "node_create", "edge_create", + "region_", "task_", "barrier_", "lock_", + "signal ", "transfer_", "thread_", "wait_", + 0}; + +// The lone callback function we are going to use to demonstrate how to attach +// the collector to the running executable +XPTI_CALLBACK_API void tpCallback(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, const void *user_data); + +// Based on the documentation, every subscriber MUST implement the +// xptiTraceInit() and xptiTraceFinish() APIs for their subscriber collector to +// be loaded successfully. +XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, + unsigned int minor_version, + const char *version_str, + const char *stream_name) { + // The basic collector will take in streams from anyone as we are just + // printing out the stream data + if (stream_name) { + char *tstr; + // Register this stream to get the stream ID; This stream may already have + // been registered by the framework and will return the previously + // registered stream ID + GStreamID = xptiRegisterStream(stream_name); + xpti::string_id_t dev_id = xptiRegisterString("sycl_device", &tstr); + + // Register our lone callback to all pre-defined trace point types + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::graph_create, + tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::node_create, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::edge_create, tpCallback); + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::region_begin, + tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::region_end, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::task_begin, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::task_end, tpCallback); + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::barrier_begin, + tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::barrier_end, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::lock_begin, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::lock_end, tpCallback); + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::transfer_begin, + tpCallback); + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::transfer_end, + tpCallback); + xptiRegisterCallback(GStreamID, + (uint16_t)xpti::trace_point_type_t::thread_begin, + tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::thread_end, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::wait_begin, tpCallback); + xptiRegisterCallback( + GStreamID, (uint16_t)xpti::trace_point_type_t::wait_end, tpCallback); + xptiRegisterCallback(GStreamID, (uint16_t)xpti::trace_point_type_t::signal, + tpCallback); + printf("Registered all callbacks\n"); + } else { + // handle the case when a bad stream name has been provided + std::cerr << "Invalid stream - no callbacks registered!\n"; + } +} + +// +std::string truncate(std::string Name) { + size_t Pos = Name.find_last_of(":"); + if (Pos != std::string::npos) { + return Name.substr(Pos + 1); + } else { + return Name; + } +} + +XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { + // We do nothing here +} + +XPTI_CALLBACK_API void tpCallback(uint16_t TraceType, + xpti::trace_event_data_t *Parent, + xpti::trace_event_data_t *Event, + uint64_t Instance, const void *UserData) { + auto Payload = xptiQueryPayload(Event); + xpti::timer::tick_t Time = xpti::timer::rdtsc(); + auto TID = xpti::timer::getThreadID(); + uint32_t CPU = GThreadIDEnum.enumID(TID); + std::string Name; + + if (Payload->name_sid != xpti::invalid_id) { + Name = truncate(Payload->name); + } else { + Name = ""; + } + + uint64_t ID = Event ? Event->unique_id : 0; + // Lock while we print information + std::lock_guard Lock(GIOMutex); + // Print the record information + printf("%-25lu: name=%-35s cpu=%3d event_id=%10lu\n", Time, Name.c_str(), CPU, + ID); + // Go through all available meta-data for an event and print it out + xpti::metadata_t *Metadata = xptiQueryMetadata(Event); + for (auto &Item : *Metadata) { + printf(" %-25s:%s\n", xptiLookupString(Item.first), + xptiLookupString(Item.second)); + } + + if (Payload->source_file_sid != xpti::invalid_id && Payload->line_no > 0) { + printf("---[Source file:line no] %s:%d\n", Payload->source_file, + Payload->line_no); + } +} + +#if (defined(_WIN32) || defined(_WIN64)) + +#include +#include + +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fwdReason, LPVOID lpvReserved) { + switch (fwdReason) { + case DLL_PROCESS_ATTACH: + // printf("Framework initialization\n"); + break; + case DLL_PROCESS_DETACH: + // + // We cannot unload all subscribers here... + // + // printf("Framework finalization\n"); + break; + } + + return TRUE; +} + +#else // Linux (possibly macOS?) + +__attribute__((constructor)) static void framework_init() { + // printf("Framework initialization\n"); +} + +__attribute__((destructor)) static void framework_fini() { + // printf("Framework finalization\n"); +} + +#endif diff --git a/xptifw/samples/include/xpti_timers.hpp b/xptifw/samples/include/xpti_timers.hpp new file mode 100644 index 0000000000000..cf0f1593c5510 --- /dev/null +++ b/xptifw/samples/include/xpti_timers.hpp @@ -0,0 +1,92 @@ +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +#pragma once + +#include +#include +#include +#include + +namespace xpti { +class ThreadID { +public: + using thread_lut_t = std::unordered_map; + + inline uint32_t enumID(std::thread::id &curr) { + std::stringstream s; + s << curr; + std::string str(s.str()); + + if (m_thread_lookup.count(str)) { + return m_thread_lookup[str]; + } else { + uint32_t enumID = m_tid++; + m_thread_lookup[str] = enumID; + return enumID; + } + } + + inline uint32_t enumID(const std::string &curr) { + if (m_thread_lookup.count(curr)) { + return m_thread_lookup[curr]; + } else { + uint32_t enumID = m_tid++; + m_thread_lookup[curr] = enumID; + return enumID; + } + } + +private: + std::atomic m_tid = {0}; + thread_lut_t m_thread_lookup; +}; + +namespace timer { +#include +using tick_t = uint64_t; +#if defined(_WIN32) || defined(_WIN64) +#include "windows.h" +inline xpti::timer::tick_t rdtsc() { + LARGE_INTEGER qpcnt; + int rval = QueryPerformanceCounter(&qpcnt); + return qpcnt.QuadPart; +} +inline uint64_t getTSFrequency() { + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + return freq.QuadPart * 1000; +} +inline uint64_t getCPU() { return GetCurrentProcessorNumber(); } +#else +#include +#include +#if __x86_64__ || __i386__ || __i386 +inline xpti::timer::tick_t rdtsc() { + struct timespec ts; + int status = clock_gettime(CLOCK_REALTIME, &ts); + return (static_cast(1000000000UL) * static_cast(ts.tv_sec) + + static_cast(ts.tv_nsec)); +} + +inline uint64_t getTSFrequency() { return static_cast(1E9); } + +inline uint64_t getCPU() { +#ifdef __linux__ + return sched_getcpu(); +#else + return 0; +#endif +} +#else +#error Unsupported ISA +#endif + +inline std::thread::id getThreadID() { return std::this_thread::get_id(); } +#endif +} // namespace timer +} // namespace xpti diff --git a/xptifw/src/CMakeLists.txt b/xptifw/src/CMakeLists.txt new file mode 100644 index 0000000000000..31a5be3352217 --- /dev/null +++ b/xptifw/src/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 2.8.9) +project (xptifw) + +file(GLOB SOURCES *.cpp) +include_directories(${XPTIFW_DIR}/include) +include_directories(${XPTI_DIR}/include) + +remove_definitions(-DXPTI_STATIC_LIBRARY) +add_definitions(-DXPTI_API_EXPORTS) +add_library(xptifw SHARED ${SOURCES}) +if(UNIX) + target_link_libraries(xptifw PRIVATE dl) +endif() + +if (XPTI_ENABLE_TBB) + add_dependencies(xptifw tbb) + target_compile_definitions(xptifw PRIVATE XPTI_USE_TBB) + target_link_libraries(xptifw PRIVATE tbb) +endif() + +# Set the location of the library installation +install(TARGETS xptifw DESTINATION ${CMAKE_BINARY_DIR}) diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp new file mode 100644 index 0000000000000..1d24cd1d255e0 --- /dev/null +++ b/xptifw/src/xpti_trace_framework.cpp @@ -0,0 +1,1199 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#include "xpti_trace_framework.hpp" +#include "xpti_int64_hash_table.hpp" +#include "xpti_string_table.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef XPTI_USE_TBB +#include +#include +#include +#include +#endif + +#define XPTI_USER_DEFINED_TRACE_TYPE16(value) \ + ((uint16_t)xpti::trace_point_type_t::user_defined | (uint16_t)value) +#define XPTI_USER_DEFINED_EVENT_TYPE16(value) \ + ((uint16_t)xpti::trace_event_type_t::user_defined | (uint16_t)value) +#define XPTI_EXTRACT_MSB16(val) (val >> 16) +#define XPTI_EXTRACT_LSB16(val) (val & 0x0000ffff) + +#define XPTI_VENDOR_DEFINED_TRACE_TYPE16(vendor_id, trace_type) \ + ((uint16_t)vendor_id << 8 | XPTI_USER_DEFINED_TRACE_TYPE16(trace_type)) +#define XPTI_VENDOR_DEFINED_EVENT_TYPE16(vendor_id, event_type) \ + ((uint16_t)vendor_id << 8 | XPTI_USER_DEFINED_EVENT_TYPE16(event_type)) + +namespace xpti { +constexpr const char *env_subscribers = "XPTI_SUBSCRIBERS"; +xpti::utils::PlatformHelper g_helper; +// This class is a helper class to load all the listed subscribers provided by +// the user in XPTI_SUBSCRIBERS environment variable. +class Subscribers { +public: + // Data structure to hold the plugin related information, including the + // initialization and finalization functions + struct plugin_data_t { + /// The handle of the loaded shared object + xpti_plugin_handle_t handle = nullptr; + /// The initialization entry point + xpti::plugin_init_t init = nullptr; + /// The finalization entry point + xpti::plugin_fini_t fini = nullptr; + /// The name of the shared object (in UTF8?)) + std::string name; + /// indicates whether the data structure is valid + bool valid = false; + }; + // Data structures defined to hold the plugin data that can be looked up by + // plugin name or the handle + // + using plugin_handle_lut_t = std::map; + using plugin_name_lut_t = std::map; + + // We unload all loaded shared objects in the destructor; Must not be invoked + // in the DLLMain() function and possibly the __fini() function in Linux + ~Subscribers() { unloadAllPlugins(); } + // Method to query the plugin data information using the handle. If there's no + // information present for the handle provided, the method returns a structure + // with the valid attribute set to 'false' + plugin_data_t queryPlugin(xpti_plugin_handle_t Handle) { + plugin_data_t PData; +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock MyLock(MMutex); +#else + std::lock_guard Lock(MMutex); +#endif + if (MHandleLUT.count(Handle)) + return MHandleLUT[Handle]; + else + return PData; // return invalid plugin data + } + + // Load the provided shared object file name using the explicit load API. If + // the load is successful, a test is performed to see if the shared object has + // the required entry points for it to be considered a trace plugin + // subscriber. If so, the internal data structures are updated and a valid + // handle is returned. + // + // If not, the shared object is unloaded and a NULL handle is returned. + xpti_plugin_handle_t loadPlugin(const char *Path) { + xpti_plugin_handle_t Handle = 0; + std::string Error; + // Check to see if the subscriber has already been loaded; if so, return the + // handle from the previously loaded library + if (MNameLUT.count(Path)) { +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock MyLock(MMutex); +#else + std::lock_guard Lock(MMutex); +#endif + // This plugin has already been loaded, so let's return previously + // recorded handle + plugin_data_t &Data = MNameLUT[Path]; + assert(Data.valid && "Lookup is invalid!"); + if (Data.valid) + return Data.handle; + } + + Handle = g_helper.loadLibrary(Path, Error); + if (Handle) { + // The tracing framework requires the tool plugins to implement the + // xptiTraceInit() and xptiTraceFinish() functions. If these are not + // present, then the plugin will be ruled an invalid plugin and unloaded + // from the process. + xpti::plugin_init_t InitFunc = + (xpti::plugin_init_t)g_helper.findFunction(Handle, "xptiTraceInit"); + xpti::plugin_fini_t FiniFunc = + (xpti::plugin_fini_t)g_helper.findFunction(Handle, "xptiTraceFinish"); + if (InitFunc && FiniFunc) { + // We appear to have loaded a valid plugin, so we will insert the + // plugin information into the two maps guarded by a lock + plugin_data_t Data; + Data.valid = true; + Data.handle = Handle; + Data.name = Path; + Data.init = InitFunc; + Data.fini = FiniFunc; +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock MyLock(MMutex); +#else + std::lock_guard Lock(MMutex); +#endif + MNameLUT[Path] = Data; + MHandleLUT[Handle] = Data; + } else { + // We may have loaded another shared object that is not a tool plugin + // for the tracing framework, so we'll unload it now + unloadPlugin(Handle); + Handle = nullptr; + } + } else { + // Get error from errno + if (!Error.empty()) + printf("[%s]: %s\n", Path, Error.c_str()); + } + return Handle; + } + + // Unloads the shared object identified by the handle provided. If + // successful, returns a success code, else a failure code. + xpti::result_t unloadPlugin(xpti_plugin_handle_t PluginHandle) { + xpti::result_t Res = g_helper.unloadLibrary(PluginHandle); + if (xpti::result_t::XPTI_RESULT_SUCCESS == Res) { + auto Loc = MHandleLUT.find(PluginHandle); + if (Loc != MHandleLUT.end()) { + MHandleLUT.erase(PluginHandle); + } + } + return Res; + } + + // Quick test to see if there are registered subscribers + bool hasValidSubscribers() { return (MHandleLUT.size() > 0); } + + void initializeForStream(const char *Stream, uint32_t major_revision, + uint32_t minor_revision, + const char *version_string) { + // If there are subscribers registered, then initialize the subscribers + // with the new stream information. + if (MHandleLUT.size()) { + for (auto &Handle : MHandleLUT) { + Handle.second.init(major_revision, minor_revision, version_string, + Stream); + } + } + } + + void finalizeForStream(const char *Stream) { + // If there are subscribers registered, then finalize the subscribers for + // the stream + if (MHandleLUT.size()) { + for (auto &Handle : MHandleLUT) { + Handle.second.fini(Stream); + } + } + } + + void loadFromEnvironmentVariable() { + if (!g_helper.checkTraceEnv()) + return; + // Load all registered Listeners by scanning the environment variable in + // "Env"; The environment variable, if set, extract the comma separated + // tokens into a vector. + std::string Token, Env = g_helper.getEnvironmentVariable(env_subscribers); + std::vector Listeners; + std::stringstream Stream(Env); + + // Split the environment variable value by ',' and build a vector of the + // tokens (subscribers) + while (std::getline(Stream, Token, ',')) { + Listeners.push_back(Token); + } + + size_t ValidSubscribers = Listeners.size(); + if (ValidSubscribers) { + // Let's go through the subscribers and load these plugins; + for (auto &Path : Listeners) { + // Load the plugins listed in the environment variable +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock MyLock(MLoader); +#else + std::lock_guard Lock(MLoader); +#endif + auto SubscriberHandle = loadPlugin(Path.c_str()); + if (!SubscriberHandle) { + ValidSubscribers--; + printf("Failed to load %s successfully...\n", Path.c_str()); + } + } + } + } + + void unloadAllPlugins() { + for (auto &Item : MNameLUT) { + unloadPlugin(Item.second.handle); + } + MHandleLUT.clear(); + MNameLUT.clear(); + } + +private: + /// Hash map that maps shared object name to the plugin data + plugin_name_lut_t MNameLUT; + /// Hash map that maps shared object handle to the plugin data + plugin_handle_lut_t MHandleLUT; +#ifdef XPTI_USE_TBB + /// Lock to ensure the operation on these maps are safe + tbb::spin_mutex MMutex; + /// Lock to ensure that only one load happens at a time + tbb::spin_mutex MLoader; +#else + /// Lock to ensure the operation on these maps are safe + std::mutex MMutex; + /// Lock to ensure that only one load happens at a time + std::mutex MLoader; +#endif +}; + +/// \brief Helper class to create and manage tracepoints +/// \details The class uses the global string table to register the strings it +/// encounters in various payloads and builds internal hash maps to manage them. +/// This is a single point for managing tracepoints. +class Tracepoints { +public: +#ifdef XPTI_USE_TBB + using va_uid_t = tbb::concurrent_unordered_map; + using uid_payload_t = tbb::concurrent_unordered_map; + using uid_event_t = + tbb::concurrent_unordered_map; +#else + using va_uid_t = std::unordered_map; + using uid_payload_t = std::unordered_map; + using uid_event_t = std::unordered_map; +#endif + + Tracepoints(xpti::StringTable &st) + : MUId(1), MInsertions(0), MRetrievals(0), MStringTableRef(st) { + // Nothing requires to be done at construction time + } + + ~Tracepoints() { clear(); } + + void clear() { + MStringTableRef.clear(); + // We will always start our ID + // stream from 1. 0 is null_id + // and -1 is invalid_id + MUId = {1}; + MPayloadLUT.clear(); + MInsertions = MRetrievals = {0}; + MPayloads.clear(); + MEvents.clear(); + MCodePtrLUT.clear(); + } + + inline uint64_t makeUniqueID() { return MUId++; } + + // Create an event with the payload information. If one already exists, the + // retrieve the previously added event. If not, we register the provided + // payload as we are seeing it for the first time. We will register all of + // the strings in the payload and used the string ids for generating a hash + // for the payload. + // + // In the case the event already exists, the instance_no will return the + // instance ID of the event. If the event is created for the first time, the + // instance_id will always be 1. + // + // If the string information like the name, source file etc is not available, + // we will use the code pointer to generate an universal id. + // + // At the end of the function, the following tasks will be complete: + // 1. Create a hash for the payload and cache it + // 2. Create a mapping from hash <--> Universal ID + // 3. Create a mapping from code_ptr <--> Universal ID + // 4. Create a mapping from Universal ID <--> Payload + // 5. Create a mapping from Universal ID <--> Event + xpti::trace_event_data_t *create(const xpti::payload_t *Payload, + uint64_t *InstanceNo) { + return register_event(Payload, InstanceNo); + } + // Method to get the payload information from the event structure. This method + // uses the Universal ID in the event structure to lookup the payload + // information and returns the payload if available. + // + // This method is thread-safe + const xpti::payload_t *payloadData(xpti::trace_event_data_t *Event) { + if (!Event || Event->unique_id == xpti::invalid_id) + return nullptr; +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MMutex); +#endif + if (Event->reserved.payload) + return Event->reserved.payload; + else { + // Cache it in case it is not already cached + Event->reserved.payload = &MPayloads[Event->unique_id]; + return Event->reserved.payload; + } + } + + const xpti::trace_event_data_t *eventData(int64_t UId) { + if (UId == xpti::invalid_id) + return nullptr; + +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MMutex); +#endif + auto EvLoc = MEvents.find(UId); + if (EvLoc != MEvents.end()) + return &(EvLoc->second); + else + return nullptr; + } + + // Sometimes, the user may want to add key-value pairs as metadata associated + // with an event; this would be in addition to the source_file, line_no and + // column_no fields that may already be present. Since we are not sure of the + // data types, we will allow them to add these pairs as strings. Internally, + // we will store key-value pairs as a map of string ids. + xpti::result_t addMetadata(xpti::trace_event_data_t *Event, const char *Key, + const char *Value) { + if (!Event || !Key || !Value) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + + string_id_t KeyID = MStringTableRef.add(Key); + if (KeyID == xpti::invalid_id) { + return xpti::result_t::XPTI_RESULT_INVALIDARG; + } + string_id_t ValueID = MStringTableRef.add(Value); + if (ValueID == xpti::invalid_id) { + return xpti::result_t::XPTI_RESULT_INVALIDARG; + } + // Protect simultaneous insert operations on the metadata tables +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock HashLock(MMetadataMutex); +#else + std::lock_guard HashLock(MMetadataMutex); +#endif + + if (Event->reserved.metadata.count(KeyID)) { + return xpti::result_t::XPTI_RESULT_DUPLICATE; + } + Event->reserved.metadata[KeyID] = ValueID; + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + // Method to get the access statistics of the tracepoints. + // It will print the number of insertions vs lookups that were + // performed. + // + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("Tracepoint inserts : [%lu] \n", MInsertions.load()); + printf("Tracepoint lookups : [%lu]\n", MRetrievals.load()); + printf("Tracepoint Hashmap :\n"); + MPayloadLUT.printStatistics(); +#endif + } + +private: + /// Goals: To create a hash value from payload + /// 1. Check the payload structure to see if it is valid. If valid, then + /// check to see if any strings are provided and add them to the string + /// table. + /// 2. Generate a payload reference using the string information, if + /// present or the code pointer information, otherwise + /// 3. Add the payload and generate a unique ID + /// 4. Cache the computed hash in the payload + int64_t makeHash(xpti::payload_t *Payload) { + // Initialize to invalid hash value + int64_t HashValue = xpti::invalid_id; + // If no flags are set, then the payload is not valid + if (Payload->flags == 0) + return HashValue; + // If the hash value has been cached, return and bail early + if (Payload->flags & (uint64_t)payload_flag_t::HashAvailable) + return Payload->internal; + + // Add the string information to the string table and use the string IDs + // (in addition to any unique addresses) to create a hash value + if ((Payload->flags & (uint64_t)payload_flag_t::NameAvailable)) { + // Add the kernel name to the string table; if the add() returns the + // address to the string in the string table, we can avoid a query [TBD] + Payload->name_sid = MStringTableRef.add(Payload->name, &Payload->name); + // Payload->name = MStringTableRef.query(Payload->name_sid); + if (Payload->flags & (uint64_t)payload_flag_t::SourceFileAvailable) { + // Add source file information ot string table + Payload->source_file_sid = + MStringTableRef.add(Payload->source_file, &Payload->source_file); + // Payload->source_file = + // MStringTableRef.query(Payload->source_file_sid); + if (Payload->flags & (uint64_t)payload_flag_t::CodePointerAvailable) { + // We have source file, kernel name info and kernel address; + // so we combine all of them to make it unique: + // + // <32-bits of address bits 5-36><16-bit source_file_sid><16-bit + // kernel name sid> + // + // Using the code pointer address works better than using the line + // number and column number as the column numbers are not set in all + // compilers that support builtin functions. If two objects are + // declared on the same line, then the line numbers, function name, + // source file are all the same and it would be hard to disambiguate + // them. However, if we use the address, which would be the object + // address, they both will have different addresses even if they + // happen to be on the same line. + uint16_t NamePack = (uint16_t)(Payload->name_sid & 0x0000ffff); + uint16_t SrcFileNamePack = + (uint16_t)(Payload->source_file_sid & 0x0000ffff); + uint32_t KernelIDPack = XPTI_PACK16_RET32(SrcFileNamePack, NamePack); + uint32_t Address = (uint32_t)( + ((uint64_t)Payload->code_ptr_va & 0x0000000ffffffff0) >> 4); + HashValue = XPTI_PACK32_RET64(Address, KernelIDPack); + // Cache the hash once it is computed + Payload->flags |= (uint64_t)payload_flag_t::HashAvailable; + Payload->internal = HashValue; + return HashValue; + } else { + // We have both source file and kernel name info + // + // If we happen to have the line number, then we will combine all + // three integer values (22-bits) to form a 64-bit hash. If not, we + // will use 22 bits of the source file and kernel name ids and form a + // 64-bit value with the middle 22-bits being zero representing the + // line number. + uint64_t LeftPart = 0, MiddlePart = 0, RightPart = 0, + Mask22Bits = 0x00000000003fffff; + // If line number info is available, extract 22-bits of it + if (Payload->flags & (uint64_t)payload_flag_t::LineInfoAvailable) { + MiddlePart = Payload->line_no & Mask22Bits; + MiddlePart = MiddlePart << 22; + } + // The leftmost 22-bits will represent the file name string id + LeftPart = Payload->source_file_sid & Mask22Bits; + LeftPart = LeftPart << 44; + // The rightmost 22-bits will represent the kernel name string id + RightPart = Payload->name_sid & Mask22Bits; + HashValue = LeftPart | MiddlePart | RightPart; + Payload->flags |= (uint64_t)payload_flag_t::HashAvailable; + Payload->internal = HashValue; + return HashValue; + } + } else if (Payload->flags & + (uint64_t)payload_flag_t::CodePointerAvailable) { + // We have both kernel name and kernel address; we use bits 5-36 from + // the address and combine it with the kernel name string ID + uint32_t Address = (uint32_t)( + ((uint64_t)Payload->code_ptr_va & 0x0000000ffffffff0) >> 4); + HashValue = XPTI_PACK32_RET64(Address, Payload->name_sid); + Payload->flags |= (uint64_t)payload_flag_t::HashAvailable; + Payload->internal = HashValue; + return HashValue; + } else { + // We only have kernel name and this is suspect if the kernel names are + // not unique and will replace any previously stored payload information + if (Payload->name_sid != xpti::invalid_id) { + HashValue = XPTI_PACK32_RET64(0, Payload->name_sid); + Payload->flags |= (uint64_t)payload_flag_t::HashAvailable; + Payload->internal = HashValue; + return HashValue; + } + } + } else if (Payload->flags & + (uint64_t)payload_flag_t::CodePointerAvailable) { + // We are only going to look at Kernel address when kernel name is not + // available. + HashValue = (uint64_t)Payload->code_ptr_va; + Payload->flags |= (uint64_t)payload_flag_t::HashAvailable; + Payload->internal = HashValue; + return HashValue; + } + return HashValue; + } + + // Register the payload and generate a universal ID for it. + // Once registered, the payload is accessible through the + // Universal ID that corresponds to the payload. + // + // This method is thread-safe + xpti::trace_event_data_t *register_event(const xpti::payload_t *Payload, + uint64_t *InstanceNo) { + xpti::payload_t TempPayload = *Payload; + // Initialize to invalid + // We need an explicit lock for the rest of the operations as the same + // payload could be registered from multiple-threads. + // + // 1. makeHash(p) is invariant, although the hash may be created twice and + // written to the same field in the structure. If we have a lock guard, we + // may be spinning and wasting time instead. We will just compute this in + // parallel. + // 2. MPayloadLUT is queried by two threads and and both queries return + // "not found" + // 3. This takes both threads to the else clause both threads will create a + // unique_id for the payload being registered and add them to the hash table + // [with DIFFERENT IDs] and MPayloads[unique_id] gets updated twice for the + // same payload with different IDs + // 4. ev.unique_id is undefined as it could be one of the two IDs generated + // for the payload + // + int64_t UId = xpti::invalid_id; + // Make a hash value from the payload. If the hash value created is + // invalid, return immediately + int64_t HashValue = makeHash(&TempPayload); + if (HashValue == xpti::invalid_id) + return nullptr; + // If it's valid, we check to see if we can retrieve the previously added + // event structure; we do this as a critical section +#ifdef XPTI_USE_TBB + tbb::speculative_spin_mutex::scoped_lock HashLock(MHashLock); +#else + std::lock_guard HashLock(MHashLock); +#endif + UId = MPayloadLUT.find(HashValue); + if (UId != xpti::invalid_id) { +#ifdef XPTI_STATISTICS + MRetrievals++; +#endif +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MMutex); +#endif + auto EvLoc = MEvents.find(UId); + if (EvLoc != MEvents.end()) { + EvLoc->second.instance_id++; + // Guarantees that the returned instance ID will be accurate as + // it is on the stack + if (InstanceNo) + *InstanceNo = EvLoc->second.instance_id; + return &(EvLoc->second); + } else + return nullptr; // we have a problem! + } else { +#ifdef XPTI_STATISTICS + MInsertions++; +#endif + // Create a new unique ID + // + UId = MUId++; + // And add it as a pair + // + MPayloadLUT.add(HashValue, UId); + // The API allows you to query a Universal ID from the kernel address; so + // build the necessary data structures for this. + if (TempPayload.flags & (uint64_t)payload_flag_t::HashAvailable) { +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MCodePtrMutex); +#endif + MCodePtrLUT[(uint64_t)TempPayload.code_ptr_va] = UId; + } + // We also want to query the payload by universal ID that has been + // generated +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MMutex); +#endif + MPayloads[UId] = TempPayload; // when it uses tbb, should be thread-safe + { + xpti::trace_event_data_t *Event = &MEvents[UId]; + // We are seeing this unique ID for the first time, so we will + // initialize the event structure with defaults and set the unique_id to + // the newly generated unique id (uid) + Event->unique_id = UId; + Event->unused = 0; + Event->reserved.payload = &MPayloads[UId]; + Event->data_id = Event->source_id = Event->target_id = 0; + Event->instance_id = 1; + Event->user_data = nullptr; + Event->event_type = (uint16_t)xpti::trace_event_type_t::unknown_event; + Event->activity_type = + (uint16_t)xpti::trace_activity_type_t::unknown_activity; + *InstanceNo = Event->instance_id; + return Event; + } + } + return nullptr; + } + + xpti::safe_int64_t MUId; + xpti::Hash64x64Table MPayloadLUT; + xpti::StringTable &MStringTableRef; + xpti::safe_uint64_t MInsertions, MRetrievals; + uid_payload_t MPayloads; + uid_event_t MEvents; + va_uid_t MCodePtrLUT; +#ifdef XPTI_USE_TBB + tbb::spin_mutex MMetadataMutex; + tbb::speculative_spin_mutex MHashLock; +#else + std::mutex MMetadataMutex; + std::mutex MHashLock; + std::mutex MMutex; + std::mutex MCodePtrMutex; +#endif +}; + +/// \brief Helper class to manage subscriber callbacks for a given tracepoint +/// \details This class provides a thread-safe way to register and unregister +/// callbacks for a given stream. This will be used by tool plugins. +/// +/// The class also provided a way to notify registered callbacks for a given +/// stream and trace point type. This will be used by framework to trigger +/// notifications are instrumentation points. +/// +class Notifications { +public: + using cb_entry_t = std::pair; +#ifdef XPTI_USE_TBB + using cb_entries_t = tbb::concurrent_vector; + using cb_t = tbb::concurrent_hash_map; + using stream_cb_t = tbb::concurrent_unordered_map; + using statistics_t = tbb::concurrent_unordered_map; +#else + using cb_entries_t = std::vector; + using cb_t = std::unordered_map; + using stream_cb_t = std::unordered_map; + using statistics_t = std::unordered_map; +#endif + + xpti::result_t registerCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + if (!cbFunc) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + +#ifdef XPTI_STATISTICS + // Initialize first encountered trace + // type statistics counters + { +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock Lock(MStatsLock); +#else + std::lock_guard Lock(MStatsLock); +#endif + auto InstanceNo = MStats.find(TraceType); + if (InstanceNo == MStats.end()) { + MStats[TraceType] = 0; + } + } +#endif +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MCBsLock); +#endif + auto &StreamCBs = + MCallbacksByStream[StreamID]; // thread-safe + // What we get is a concurrent_hash_map + // of vectors holding the callbacks we + // need access to; +#ifdef XPTI_USE_TBB + cb_t::accessor Acc; + StreamCBs.insert(Acc, TraceType); +#else + auto Acc = StreamCBs.find(TraceType); + if (Acc == StreamCBs.end()) { + // Create a new slot and return the accessor for the trace type + auto Tmp = StreamCBs[TraceType]; + Acc = StreamCBs.find(TraceType); + } +#endif + // If the key does not exist, a new entry is created and an accessor to it + // is returned. If it exists, we have access to the previous entry. + // + // Before we add this element, we scan all existing elements to see if it + // has already been registered. If so, we return XPTI_RESULT_DUPLICATE. + // + // If not, we set the first element of new entry to 'true' indicating that + // it is valid. Unregister will just set this flag to false, indicating that + // it is no longer valid and is unregistered. + for (auto &Ele : Acc->second) { + if (Ele.second == cbFunc) { + if (Ele.first) // Already here and active + return xpti::result_t::XPTI_RESULT_DUPLICATE; + else { // it has been unregistered before, re-enable + Ele.first = true; + return xpti::result_t::XPTI_RESULT_UNDELETE; + } + } + } + // If we come here, then we did not find the callback being registered + // already in the framework. So, we insert it. + Acc->second.push_back(std::make_pair(true, cbFunc)); + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + xpti::result_t unregisterCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + if (!cbFunc) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MCBsLock); +#endif + auto &StreamCBs = + MCallbacksByStream[StreamID]; // thread-safe + // What we get is a concurrent_hash_map + // of vectors holding the callbacks we + // need access to; +#ifdef XPTI_USE_TBB + cb_t::accessor Acc; + bool Success = StreamCBs.find(Acc, TraceType); +#else + auto Acc = StreamCBs.find(TraceType); + bool Success = (Acc != StreamCBs.end()); +#endif + if (Success) { + for (auto &Ele : Acc->second) { + if (Ele.second == cbFunc) { + if (Ele.first) { // Already here and active + // unregister, since delete and simultaneous + // iterations by other threads are unsafe + Ele.first = false; + // releases the accessor + return xpti::result_t::XPTI_RESULT_SUCCESS; + } else { + // releases the accessor + return xpti::result_t::XPTI_RESULT_DUPLICATE; + } + } + } + } + // Not here, so nothing to unregister + return xpti::result_t::XPTI_RESULT_NOTFOUND; + } + + xpti::result_t unregisterStream(uint8_t StreamID) { + // If there are no callbacks registered for the requested stream ID, we + // return not found +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MCBsLock); +#endif + if (MCallbacksByStream.count(StreamID) == 0) + return xpti::result_t::XPTI_RESULT_NOTFOUND; + + auto &StreamCBs = MCallbacksByStream[StreamID]; // thread-safe + // Disable all callbacks registered for the stream represented by StreamID + for (auto &Item : StreamCBs) { + for (auto &Ele : Item.second) { + Ele.first = false; + } + } + // Return success + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + xpti::result_t notifySubscribers(uint16_t StreamID, uint16_t TraceType, + xpti::trace_event_data_t *Parent, + xpti::trace_event_data_t *Object, + uint64_t InstanceNo, const void *UserData) { + { +#ifndef XPTI_USE_TBB + std::lock_guard Lock(MCBsLock); +#endif + cb_t &Stream = MCallbacksByStream[StreamID]; // Thread-safe +#ifdef XPTI_USE_TBB + cb_t::const_accessor Acc; // read-only accessor + bool Success = Stream.find(Acc, TraceType); +#else + auto Acc = Stream.find(TraceType); + bool Success = (Acc != Stream.end()); +#endif + + if (Success) { + // Go through all registered callbacks and invoke them + for (auto &Ele : Acc->second) { + if (Ele.first) + (Ele.second)(TraceType, Parent, Object, InstanceNo, UserData); + } + } + } +#ifdef XPTI_STATISTICS + auto &Counter = MStats[TraceType]; + { +#ifdef XPTI_USE_TBB + tbb::spin_mutex::scoped_lock Lock(MStatsLock); +#else + std::lock_guard Lock(MStatsLock); +#endif + Counter++; + } +#endif + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + void printStatistics() { +#ifdef XPTI_STATISTICS + printf("Notification statistics:\n"); + for (auto &s : MStats) { + printf("%19s: [%llu] \n", + stringify_trace_type((xpti_trace_point_type_t)s.first).c_str(), + s.second); + } +#endif + } + +private: +#ifdef XPTI_STATISTICS + std::string stringify_trace_type(xpti_trace_point_type_t TraceType) { + switch (TraceType) { + case graph_create: + return "graph_create"; + case node_create: + return "node_create"; + case edge_create: + return "edge_create"; + case region_begin: + return "region_begin"; + case region_end: + return "region_end"; + case task_begin: + return "task_begin"; + case task_end: + return "task_end"; + case barrier_begin: + return "barrier_begin"; + case barrier_end: + return "barrier_end"; + case lock_begin: + return "lock_begin"; + case lock_end: + return "lock_end"; + case signal: + return "signal"; + case transfer_begin: + return "transfer_begin"; + case transfer_end: + return "transfer_end"; + case thread_begin: + return "thread_begin"; + case thread_end: + return "thread_end"; + case wait_begin: + return "wait_begin"; + case wait_end: + return "wait_end"; + break; + default: + if (TraceType & user_defined_trace_point) { + std::string str = + "user_defined[" + + std::to_string(XPTI_EXTRACT_USER_DEFINED_ID(TraceType)) + "]"; + return str; + } else { + std::string str = + "unknown[" + + std::to_string(XPTI_EXTRACT_USER_DEFINED_ID(TraceType)) + "]"; + return str; + } + } + } +#endif + stream_cb_t MCallbacksByStream; +#ifdef XPTI_USE_TBB + tbb::spin_mutex MStatsLock; +#else + std::mutex MCBsLock; + std::mutex MStatsLock; +#endif + statistics_t MStats; +}; + +class Framework { +public: + Framework() + : MTracepoints(MStringTableRef), MUniversalIDs(0), MTraceEnabled(false) { + // Load all subscribers on construction + MSubscribers.loadFromEnvironmentVariable(); + MTraceEnabled = + (g_helper.checkTraceEnv() && MSubscribers.hasValidSubscribers()); + } + + void clear() { + MUniversalIDs = {1}; + MTracepoints.clear(); + MStringTableRef.clear(); + } + + inline void setTraceEnabled(bool yesOrNo = true) { MTraceEnabled = yesOrNo; } + + inline bool traceEnabled() { return MTraceEnabled; } + + inline uint64_t makeUniqueID() { return MTracepoints.makeUniqueID(); } + + xpti::result_t addMetadata(xpti::trace_event_data_t *Event, const char *Key, + const char *Value) { + return MTracepoints.addMetadata(Event, Key, Value); + } + + xpti::trace_event_data_t * + createEvent(const xpti::payload_t *Payload, uint16_t EventType, + xpti::trace_activity_type_t ActivityType, uint64_t *InstanceNo) { + if (!Payload || !InstanceNo) + return nullptr; + + if (Payload->flags == 0) + return nullptr; + + xpti::trace_event_data_t *Event = MTracepoints.create(Payload, InstanceNo); + + // Event is not managed by anyone. The unique_id that is a part of the event + // structure can be used to determine the payload that forms the event. The + // attribute 'ev.UserData' and 'ev.reserved' can be used to store user + // defined and system defined data respectively. Currently the 'reserved' + // field is not used, but object lifetime management must be employed once + // this is active. + // + // On the other hand, the 'UserData' field is for user data and should be + // managed by the user code. The framework will NOT free any memory + // allocated to this pointer + Event->event_type = EventType; + Event->activity_type = (uint16_t)ActivityType; + return Event; + } + + inline const xpti::trace_event_data_t *findEvent(int64_t UniversalID) { + return MTracepoints.eventData(UniversalID); + } + + xpti::result_t initializeStream(const char *Stream, uint32_t MajorRevision, + uint32_t MinorRevision, + const char *VersionString) { + if (!Stream || !VersionString) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + + MSubscribers.initializeForStream(Stream, MajorRevision, MinorRevision, + VersionString); + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + uint8_t registerStream(const char *StreamName) { + return (uint8_t)MStreamStringTable.add(StreamName); + } + + void closeAllStreams() { + auto Table = MStreamStringTable.table(); + StringTable::st_reverse_t::iterator it; + for (it = Table.begin(); it != Table.end(); ++it) { + xptiFinalize(it->second); + } + } + + xpti::result_t unregisterStream(const char *StreamName) { + return finalizeStream(StreamName); + } + + uint8_t registerVendor(const char *StreamName) { + return (uint8_t)MVendorStringTable.add(StreamName); + } + + string_id_t registerString(const char *String, char **TableString) { + if (!TableString || !String) + return xpti::invalid_id; + + *TableString = 0; + + const char *RefStr; + auto ID = MStringTableRef.add(String, &RefStr); + *TableString = const_cast(RefStr); + + return ID; + } + + const char *lookupString(string_id_t ID) { + if (ID < 0) + return nullptr; + return MStringTableRef.query(ID); + } + + xpti::result_t registerCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + return MNotifier.registerCallback(StreamID, TraceType, cbFunc); + } + + xpti::result_t unregisterCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + return MNotifier.unregisterCallback(StreamID, TraceType, cbFunc); + } + + xpti::result_t notifySubscribers(uint8_t StreamID, uint16_t TraceType, + xpti::trace_event_data_t *Parent, + xpti::trace_event_data_t *Object, + uint64_t InstanceNo, const void *UserData) { + if (!MTraceEnabled) + return xpti::result_t::XPTI_RESULT_FALSE; + if (!Object) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + // + // Notify all subscribers for the stream 'StreamID' + // + return MNotifier.notifySubscribers(StreamID, TraceType, Parent, Object, + InstanceNo, UserData); + } + + bool hasSubscribers() { return MSubscribers.hasValidSubscribers(); } + + xpti::result_t finalizeStream(const char *Stream) { + if (!Stream) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + MSubscribers.finalizeForStream(Stream); + return MNotifier.unregisterStream(MStreamStringTable.add(Stream)); + } + + const xpti::payload_t *queryPayload(xpti::trace_event_data_t *Event) { + return MTracepoints.payloadData(Event); + } + + void printStatistics() { + MNotifier.printStatistics(); + MStringTableRef.printStatistics(); + MTracepoints.printStatistics(); + } + +private: + /// Thread-safe counter used for generating universal IDs + xpti::safe_uint64_t MUniversalIDs; + /// Manages loading the subscribers and calling their init() functions + xpti::Subscribers MSubscribers; + /// Used to send event notification to subscribers + xpti::Notifications MNotifier; + /// Thread-safe string table + xpti::StringTable MStringTableRef; + /// Thread-safe string table, used for stream IDs + xpti::StringTable MStreamStringTable; + /// Thread-safe string table, used for vendor IDs + xpti::StringTable MVendorStringTable; + /// Manages the tracepoints - framework caching + xpti::Tracepoints MTracepoints; + /// Flag indicates whether tracing should be enabled + bool MTraceEnabled; +}; + +static Framework GXPTIFramework; +} // namespace xpti + +extern "C" { +XPTI_EXPORT_API uint16_t +xptiRegisterUserDefinedTracePoint(const char *ToolName, uint8_t UserDefinedTP) { + uint8_t ToolID = xpti::GXPTIFramework.registerVendor(ToolName); + UserDefinedTP |= (uint8_t)xpti::trace_point_type_t::user_defined; + uint16_t UserDefTracepoint = XPTI_PACK08_RET16(ToolID, UserDefinedTP); + + return UserDefTracepoint; +} + +XPTI_EXPORT_API uint16_t xptiRegisterUserDefinedEventType( + const char *ToolName, uint8_t UserDefinedEvent) { + uint8_t ToolID = xpti::GXPTIFramework.registerVendor(ToolName); + UserDefinedEvent |= (uint8_t)xpti::trace_event_type_t::user_defined; + uint16_t UserDefEventType = XPTI_PACK08_RET16(ToolID, UserDefinedEvent); + return UserDefEventType; +} + +XPTI_EXPORT_API xpti::result_t xptiInitialize(const char *Stream, uint32_t maj, + uint32_t min, + const char *version) { + return xpti::GXPTIFramework.initializeStream(Stream, maj, min, version); +} + +XPTI_EXPORT_API void xptiFinalize(const char *Stream) { + xpti::GXPTIFramework.finalizeStream(Stream); +} + +XPTI_EXPORT_API uint64_t xptiGetUniqueId() { + return xpti::GXPTIFramework.makeUniqueID(); +} + +XPTI_EXPORT_API xpti::string_id_t xptiRegisterString(const char *String, + char **RefTableStr) { + return xpti::GXPTIFramework.registerString(String, RefTableStr); +} + +XPTI_EXPORT_API const char *xptiLookupString(xpti::string_id_t ID) { + return xpti::GXPTIFramework.lookupString(ID); +} + +XPTI_EXPORT_API uint8_t xptiRegisterStream(const char *StreamName) { + return xpti::GXPTIFramework.registerStream(StreamName); +} + +XPTI_EXPORT_API xpti::result_t xptiUnregisterStream(const char *StreamName) { + return xpti::GXPTIFramework.unregisterStream(StreamName); +} +XPTI_EXPORT_API xpti::trace_event_data_t * +xptiMakeEvent(const char * /*Name*/, xpti::payload_t *Payload, uint16_t Event, + xpti::trace_activity_type_t Activity, uint64_t *InstanceNo) { + return xpti::GXPTIFramework.createEvent(Payload, Event, Activity, InstanceNo); +} + +XPTI_EXPORT_API void xptiReset() { xpti::GXPTIFramework.clear(); } + +XPTI_EXPORT_API const xpti::trace_event_data_t *xptiFindEvent(int64_t UId) { + return xpti::GXPTIFramework.findEvent(UId); +} + +XPTI_EXPORT_API const xpti::payload_t * +xptiQueryPayload(xpti::trace_event_data_t *LookupObject) { + return xpti::GXPTIFramework.queryPayload(LookupObject); +} + +XPTI_EXPORT_API xpti::result_t +xptiRegisterCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + return xpti::GXPTIFramework.registerCallback(StreamID, TraceType, cbFunc); +} + +XPTI_EXPORT_API xpti::result_t +xptiUnregisterCallback(uint8_t StreamID, uint16_t TraceType, + xpti::tracepoint_callback_api_t cbFunc) { + return xpti::GXPTIFramework.unregisterCallback(StreamID, TraceType, cbFunc); +} + +XPTI_EXPORT_API xpti::result_t +xptiNotifySubscribers(uint8_t StreamID, uint16_t TraceType, + xpti::trace_event_data_t *Parent, + xpti::trace_event_data_t *Object, uint64_t InstanceNo, + const void *TemporalUserData) { + return xpti::GXPTIFramework.notifySubscribers( + StreamID, TraceType, Parent, Object, InstanceNo, TemporalUserData); +} + +XPTI_EXPORT_API bool xptiTraceEnabled() { + return xpti::GXPTIFramework.traceEnabled(); +} + +XPTI_EXPORT_API xpti::result_t xptiAddMetadata(xpti::trace_event_data_t *Event, + const char *Key, + const char *Value) { + return xpti::GXPTIFramework.addMetadata(Event, Key, Value); +} + +XPTI_EXPORT_API xpti::metadata_t * +xptiQueryMetadata(xpti::trace_event_data_t *Event) { + return &Event->reserved.metadata; +} + +XPTI_EXPORT_API void xptiForceSetTraceEnabled(bool YesOrNo) { + xpti::GXPTIFramework.setTraceEnabled(YesOrNo); +} +} // extern "C" + +#if (defined(_WIN32) || defined(_WIN64)) + +#include +#include + +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fwdReason, LPVOID lpvReserved) { + switch (fwdReason) { + case DLL_PROCESS_ATTACH: + break; + case DLL_PROCESS_DETACH: + // + // We cannot unload all subscribers here... + // +#ifdef XPTI_STATISTICS + __g_framework.printStatistics(); +#endif + break; + } + + return TRUE; +} + +#else // Linux (possibly macOS?) + +__attribute__((constructor)) static void framework_init() {} + +__attribute__((destructor)) static void framework_fini() { +#ifdef XPTI_STATISTICS + __g_framework.printStatistics(); +#endif +} + +#endif diff --git a/xptifw/unit_test/CMakeLists.txt b/xptifw/unit_test/CMakeLists.txt new file mode 100644 index 0000000000000..aab3f0b247eff --- /dev/null +++ b/xptifw/unit_test/CMakeLists.txt @@ -0,0 +1,42 @@ +if (NOT EXISTS ${XPTI_SOURCE_DIR}) + message (FATAL_ERROR "Undefined XPTI_SOURCE_DIR variable: Must be set for tests to work!") +endif() +include_directories(${XPTI_SOURCE_DIR}/include) + +# Download and unpack googletest at configure time +configure_file(../CMakeLists.txt.in googletest-download/CMakeLists.txt) +execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) +if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) +if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") +endif() + +# Prevent overriding the parent project's compiler/linker +# settings on Windows +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + +# Add googletest directly to our build. This defines +# the gtest and gtest_main targets. +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src + ${CMAKE_CURRENT_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + +# The gtest/gtest_main targets carry header search path +# dependencies automatically when using CMake 2.8.11 or +# later. Otherwise we have to add them here ourselves. +if (CMAKE_VERSION VERSION_LESS 2.8.11) + include_directories("${gtest_SOURCE_DIR}/include") +endif() + +# Now simply link against gtest or gtest_main as needed. Eg +add_executable(xpti_tests xpti_api_tests.cpp xpti_correctness_tests.cpp) +target_link_libraries(xpti_tests gtest) +target_link_libraries(xpti_tests gtest_main xptifw) +add_test(NAME example_test COMMAND xpti_tests) diff --git a/xptifw/unit_test/README.md b/xptifw/unit_test/README.md new file mode 100644 index 0000000000000..d843d195cdd64 --- /dev/null +++ b/xptifw/unit_test/README.md @@ -0,0 +1,7 @@ +# Unit tests + +Unit tests in XPTI use the googletest framework and test the APIs for +correctness. + +For more detail on the framework, the tests that are provided and their usage, +please consult the [XPTI Framework library documentation](doc/XPTI_Framework.md). diff --git a/xptifw/unit_test/xpti_api_tests.cpp b/xptifw/unit_test/xpti_api_tests.cpp new file mode 100644 index 0000000000000..5420ab40bf6e1 --- /dev/null +++ b/xptifw/unit_test/xpti_api_tests.cpp @@ -0,0 +1,318 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#include "xpti_trace_framework.hpp" + +#include +#include +#include + +TEST(xptiApiTest, xptiInitializeBadInput) { + auto Result = xptiInitialize(nullptr, 0, 0, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiRegisterStringBadInput) { + char *TStr; + + auto ID = xptiRegisterString(nullptr, nullptr); + EXPECT_EQ(ID, xpti::invalid_id); + ID = xptiRegisterString(nullptr, &TStr); + EXPECT_EQ(ID, xpti::invalid_id); + ID = xptiRegisterString("foo", nullptr); + EXPECT_EQ(ID, xpti::invalid_id); +} + +TEST(xptiApiTest, xptiRegisterStringGoodInput) { + char *TStr = nullptr; + + auto ID = xptiRegisterString("foo", &TStr); + EXPECT_NE(ID, xpti::invalid_id); + EXPECT_NE(TStr, nullptr); + EXPECT_STREQ("foo", TStr); +} + +TEST(xptiApiTest, xptiLookupStringBadInput) { + const char *TStr; + xptiReset(); + TStr = xptiLookupString(-1); + EXPECT_EQ(TStr, nullptr); +} + +TEST(xptiApiTest, xptiLookupStringGoodInput) { + char *TStr = nullptr; + + auto ID = xptiRegisterString("foo", &TStr); + EXPECT_NE(ID, xpti::invalid_id); + EXPECT_NE(TStr, nullptr); + EXPECT_STREQ("foo", TStr); + + const char *LookUpString = xptiLookupString(ID); + EXPECT_EQ(LookUpString, TStr); + EXPECT_STREQ(LookUpString, TStr); + EXPECT_STREQ("foo", LookUpString); +} + +TEST(xptiApiTest, xptiGetUniqueId) { + std::set IDs; + for (int i = 0; i < 10; ++i) { + auto ID = xptiGetUniqueId(); + auto Loc = IDs.find(ID); + EXPECT_EQ(Loc, IDs.end()); + IDs.insert(ID); + } +} + +TEST(xptiApiTest, xptiRegisterStreamBadInput) { + auto ID = xptiRegisterStream(nullptr); + EXPECT_EQ(ID, (uint8_t)xpti::invalid_id); +} + +TEST(xptiApiTest, xptiRegisterStreamGoodInput) { + auto ID = xptiRegisterStream("foo"); + EXPECT_NE(ID, xpti::invalid_id); + auto NewID = xptiRegisterStream("foo"); + EXPECT_EQ(ID, NewID); +} + +TEST(xptiApiTest, xptiUnregisterStreamBadInput) { + auto Result = xptiUnregisterStream(nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiUnregisterStreamGoodInput) { + auto ID = xptiRegisterStream("foo"); + EXPECT_NE(ID, xpti::invalid_id); + auto Result = xptiUnregisterStream("NoSuchStream"); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_NOTFOUND); + // Event though stream exists, no callbacks registered + auto NewResult = xptiUnregisterStream("foo"); + EXPECT_EQ(NewResult, xpti::result_t::XPTI_RESULT_NOTFOUND); +} + +TEST(xptiApiTest, xptiMakeEventBadInput) { + xpti::payload_t P; + auto Result = + xptiMakeEvent(nullptr, &P, 0, (xpti::trace_activity_type_t)1, nullptr); + EXPECT_EQ(Result, nullptr); + P = xpti::payload_t("foo", "foo.cpp", 1, 0, (void *)13); + EXPECT_NE(P.flags, 0); + Result = + xptiMakeEvent(nullptr, &P, 0, (xpti::trace_activity_type_t)1, nullptr); + EXPECT_EQ(Result, nullptr); + Result = xptiMakeEvent("foo", &P, 0, (xpti::trace_activity_type_t)1, nullptr); + EXPECT_EQ(Result, nullptr); +} + +TEST(xptiApiTest, xptiMakeEventGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + auto Result = xptiMakeEvent("foo", &Payload, 0, + (xpti::trace_activity_type_t)1, &instance); + EXPECT_NE(Result, nullptr); + EXPECT_EQ(instance, 1); + Payload = xpti::payload_t("foo", "foo.cpp", 1, 0, (void *)13); + auto NewResult = xptiMakeEvent("foo", &Payload, 0, + (xpti::trace_activity_type_t)1, &instance); + EXPECT_EQ(Result, NewResult); + EXPECT_EQ(instance, 2); +} + +TEST(xptiApiTest, xptiFindEventBadInput) { + auto Result = xptiFindEvent(0); + EXPECT_EQ(Result, nullptr); + Result = xptiFindEvent(1000000); + EXPECT_EQ(Result, nullptr); +} + +TEST(xptiApiTest, xptiFindEventGoodInput) { + uint64_t Instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Result = xptiMakeEvent("foo", &Payload, 0, + (xpti::trace_activity_type_t)1, &Instance); + EXPECT_NE(Result, nullptr); + EXPECT_GT(Instance, 1); + auto NewResult = xptiFindEvent(Result->unique_id); + EXPECT_EQ(Result, NewResult); +} + +TEST(xptiApiTest, xptiQueryPayloadBadInput) { + auto Result = xptiQueryPayload(nullptr); + EXPECT_EQ(Result, nullptr); +} + +TEST(xptiApiTest, xptiQueryPayloadGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + auto Result = xptiMakeEvent("foo", &Payload, 0, + (xpti::trace_activity_type_t)1, &instance); + EXPECT_NE(Result, nullptr); + EXPECT_GT(instance, 1); + auto NewResult = xptiQueryPayload(Result); + EXPECT_STREQ(Payload.name, NewResult->name); + EXPECT_STREQ(Payload.source_file, NewResult->source_file); + // NewResult->name_sid will have a string ID whereas 'Payload' will not + EXPECT_NE(Payload.name_sid, NewResult->name_sid); + EXPECT_NE(Payload.source_file_sid, NewResult->source_file_sid); + EXPECT_EQ(Payload.line_no, NewResult->line_no); +} + +TEST(xptiApiTest, xptiTraceEnabled) { + // If no env is set, this should be false + // The state is determined at app startup + // XPTI_TRACE_ENABLE=1 or 0 and XPTI_FRAMEWORK_DISPATCHER= + // Result false + auto Result = xptiTraceEnabled(); + EXPECT_EQ(Result, false); +} + +XPTI_CALLBACK_API void trace_point_callback(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, + const void *user_data) { + + if (user_data) + (*(int *)user_data) = 1; +} + +XPTI_CALLBACK_API void trace_point_callback2(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, + const void *user_data) { + if (user_data) + (*(int *)user_data) = 1; +} + +TEST(xptiApiTest, xptiRegisterCallbackBadInput) { + uint8_t StreamID = xptiRegisterStream("foo"); + auto Result = xptiRegisterCallback(StreamID, 1, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiRegisterCallbackGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + uint8_t StreamID = xptiRegisterStream("foo"); + auto Result = xptiRegisterCallback(StreamID, 1, trace_point_callback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback(StreamID, 1, trace_point_callback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); +} + +TEST(xptiApiTest, xptiUnregisterCallbackBadInput) { + uint8_t StreamID = xptiRegisterStream("foo"); + auto Result = xptiUnregisterCallback(StreamID, 1, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiUnregisterCallbackGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + uint8_t StreamID = xptiRegisterStream("foo"); + auto Result = xptiUnregisterCallback(StreamID, 1, trace_point_callback2); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_NOTFOUND); + Result = xptiRegisterCallback(StreamID, 1, trace_point_callback2); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiUnregisterCallback(StreamID, 1, trace_point_callback2); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiUnregisterCallback(StreamID, 1, trace_point_callback2); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); + Result = xptiRegisterCallback(StreamID, 1, trace_point_callback2); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_UNDELETE); +} + +TEST(xptiApiTest, xptiNotifySubscribersBadInput) { + uint8_t StreamID = xptiRegisterStream("foo"); + auto Result = + xptiNotifySubscribers(StreamID, 1, nullptr, nullptr, 0, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_FALSE); + xptiForceSetTraceEnabled(true); + Result = xptiNotifySubscribers(StreamID, 1, nullptr, nullptr, 0, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiNotifySubscribersGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + uint8_t StreamID = xptiRegisterStream("foo"); + xptiForceSetTraceEnabled(true); + int foo_return = 0; + auto Result = xptiRegisterCallback(StreamID, 1, trace_point_callback2); + Result = xptiNotifySubscribers(StreamID, 1, nullptr, Event, 0, + (void *)(&foo_return)); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + EXPECT_EQ(foo_return, 1); +} + +TEST(xptiApiTest, xptiAddMetadataBadInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + auto Result = xptiAddMetadata(nullptr, nullptr, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); + Result = xptiAddMetadata(Event, nullptr, nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); + Result = xptiAddMetadata(Event, "foo", nullptr); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); + Result = xptiAddMetadata(Event, nullptr, "bar"); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_INVALIDARG); +} + +TEST(xptiApiTest, xptiAddMetadataGoodInput) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + auto Result = xptiAddMetadata(Event, "foo", "bar"); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiAddMetadata(Event, "foo", "bar"); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); +} + +TEST(xptiApiTest, xptiQueryMetadata) { + uint64_t instance; + xpti::payload_t Payload("foo", "foo.cpp", 1, 0, (void *)13); + + auto Event = xptiMakeEvent("foo", &Payload, 0, (xpti::trace_activity_type_t)1, + &instance); + EXPECT_NE(Event, nullptr); + + auto md = xptiQueryMetadata(Event); + EXPECT_NE(md, nullptr); + + auto Result = xptiAddMetadata(Event, "foo1", "bar1"); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + + char *ts; + EXPECT_TRUE(md->size() > 1); + auto ID = (*md)[xptiRegisterString("foo1", &ts)]; + auto str = xptiLookupString(ID); + EXPECT_STREQ(str, "bar1"); +} diff --git a/xptifw/unit_test/xpti_correctness_tests.cpp b/xptifw/unit_test/xpti_correctness_tests.cpp new file mode 100644 index 0000000000000..9f8b7ece34bd7 --- /dev/null +++ b/xptifw/unit_test/xpti_correctness_tests.cpp @@ -0,0 +1,329 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +#include "xpti_trace_framework.h" +#include "xpti_trace_framework.hpp" + +#include +#include +#include + +XPTI_CALLBACK_API void tpCallback(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, const void *user_data) { + + if (user_data) + (*(int *)user_data) = trace_type; +} + +#define NOTIFY(stream, tt, event, retval) \ + { \ + xpti::result_t Result = xptiNotifySubscribers(stream, tt, nullptr, event, \ + 0, (void *)(&retval)); \ + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); \ + EXPECT_EQ(retval, tt); \ + } + +TEST(xptiCorrectnessTest, xptiMakeEvent) { + uint64_t Instance = 0; + xpti::payload_t p("foo", "foo.cpp", 1, 0, (void *)13); + auto Result = + xptiMakeEvent("foo", &p, 0, (xpti::trace_activity_type_t)1, &Instance); + EXPECT_NE(Result, nullptr); + p = xpti::payload_t("foo", "foo.cpp", 1, 0, (void *)13); + auto NewResult = + xptiMakeEvent("foo", &p, 0, (xpti::trace_activity_type_t)1, &Instance); + EXPECT_EQ(Result, NewResult); + EXPECT_EQ(Result->unique_id, NewResult->unique_id); + EXPECT_EQ(Result->reserved.payload, NewResult->reserved.payload); + EXPECT_STREQ(Result->reserved.payload->name, "foo"); + EXPECT_STREQ(Result->reserved.payload->source_file, "foo.cpp"); + EXPECT_EQ(Result->reserved.payload->line_no, 1); +} + +TEST(xptiCorrectnessTest, xptiRegisterString) { + char *TStr = nullptr; + auto ID = xptiRegisterString("foo", &TStr); + EXPECT_NE(ID, xpti::invalid_id); + EXPECT_NE(TStr, nullptr); + EXPECT_STREQ("foo", TStr); + + const char *LUTStr = xptiLookupString(ID); + EXPECT_EQ(TStr, LUTStr); + EXPECT_STREQ(LUTStr, TStr); +} + +TEST(xptiCorrectnessTest, xptiInitializeForDefaultTracePointTypes) { + // We will test functionality of a subscriber + // without actually creating a plugin + uint8_t StreamID = xptiRegisterStream("test_foo"); + auto Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::graph_create, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::node_create, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::edge_create, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::region_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::region_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::task_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::task_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::barrier_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::barrier_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::lock_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::lock_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::transfer_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::transfer_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::thread_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::thread_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::wait_begin, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::wait_end, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::signal, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); +} + +TEST(xptiCorrectnessTest, xptiNotifySubscribersForDefaultTracePointTypes) { + uint64_t Instance; + xpti::payload_t p("foo", "foo.cpp", 1, 0, (void *)13); + xptiForceSetTraceEnabled(true); + + uint8_t StreamID = xptiRegisterStream("test_foo"); + auto Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::graph_create, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::node_create, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::edge_create, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::region_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::region_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::task_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::task_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::barrier_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::barrier_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::lock_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::lock_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::transfer_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::transfer_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::thread_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::thread_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::wait_begin, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::wait_end, tpCallback); + Result = xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::signal, tpCallback); + + auto GE = + xptiMakeEvent("foo", &p, 0, (xpti::trace_activity_type_t)1, &Instance); + EXPECT_NE(GE, nullptr); + + int FooReturn = 0; + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::graph_create, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::node_create, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::edge_create, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::region_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::region_end, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::task_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::task_end, GE, FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::barrier_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::barrier_end, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::lock_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::lock_end, GE, FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::transfer_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::transfer_end, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::thread_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::thread_end, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::wait_begin, GE, + FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::wait_end, GE, FooReturn); + NOTIFY(StreamID, (uint16_t)xpti::trace_point_type_t::signal, GE, FooReturn); +} + +TEST(xptiCorrectnessTest, xptiInitializeForUserDefinedTracePointTypes) { + // We will test functionality of a subscriber + // without actually creating a plugin + uint8_t StreamID = xptiRegisterStream("test_foo"); + typedef enum { + extn1_begin = XPTI_TRACE_POINT_BEGIN(0), + extn1_end = XPTI_TRACE_POINT_END(0), + extn2_begin = XPTI_TRACE_POINT_BEGIN(1), + extn2_end = XPTI_TRACE_POINT_END(1) + } tp_extension_t; + + auto TTType = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn1_begin); + auto Result = xptiRegisterCallback(StreamID, TTType, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + TTType = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn1_end); + Result = xptiRegisterCallback(StreamID, TTType, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + TTType = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn2_begin); + Result = xptiRegisterCallback(StreamID, TTType, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); + TTType = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn2_end); + Result = xptiRegisterCallback(StreamID, TTType, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_SUCCESS); +} + +TEST(xptiCorrectnessTest, xptiNotifySubscribersForUserDefinedTracePointTypes) { + uint64_t Instance; + xpti::payload_t p("foo", "foo.cpp", 1, 0, (void *)13); + xptiForceSetTraceEnabled(true); + + uint8_t StreamID = xptiRegisterStream("test_foo"); + typedef enum { + extn1_begin = XPTI_TRACE_POINT_BEGIN(0), + extn1_end = XPTI_TRACE_POINT_END(0), + extn2_begin = XPTI_TRACE_POINT_BEGIN(1), + extn2_end = XPTI_TRACE_POINT_END(1) + } tp_extension_t; + + auto TTType1 = + xptiRegisterUserDefinedTracePoint("test_foo_tool", extn1_begin); + auto Result = xptiRegisterCallback(StreamID, TTType1, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); + auto TTType2 = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn1_end); + Result = xptiRegisterCallback(StreamID, TTType2, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); + auto TTType3 = + xptiRegisterUserDefinedTracePoint("test_foo_tool", extn2_begin); + Result = xptiRegisterCallback(StreamID, TTType3, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); + auto TTType4 = xptiRegisterUserDefinedTracePoint("test_foo_tool", extn2_end); + Result = xptiRegisterCallback(StreamID, TTType4, tpCallback); + EXPECT_EQ(Result, xpti::result_t::XPTI_RESULT_DUPLICATE); + + auto GE = + xptiMakeEvent("foo", &p, 0, (xpti::trace_activity_type_t)1, &Instance); + EXPECT_NE(GE, nullptr); + + int FooReturn = 0; + NOTIFY(StreamID, TTType1, GE, FooReturn); + NOTIFY(StreamID, TTType2, GE, FooReturn); + NOTIFY(StreamID, TTType3, GE, FooReturn); + NOTIFY(StreamID, TTType4, GE, FooReturn); + + auto ToolID1 = XPTI_TOOL_ID(TTType1); + auto ToolID2 = XPTI_TOOL_ID(TTType2); + auto ToolID3 = XPTI_TOOL_ID(TTType3); + auto ToolID4 = XPTI_TOOL_ID(TTType4); + EXPECT_EQ(ToolID1, ToolID2); + EXPECT_EQ(ToolID2, ToolID3); + EXPECT_EQ(ToolID3, ToolID4); + EXPECT_EQ(ToolID4, ToolID1); + + auto TpID1 = XPTI_EXTRACT_USER_DEFINED_ID(TTType1); + auto TpID2 = XPTI_EXTRACT_USER_DEFINED_ID(TTType2); + auto TpID3 = XPTI_EXTRACT_USER_DEFINED_ID(TTType3); + auto TpID4 = XPTI_EXTRACT_USER_DEFINED_ID(TTType4); + EXPECT_NE(TpID1, TpID2); + EXPECT_NE(TpID2, TpID3); + EXPECT_NE(TpID3, TpID4); + EXPECT_NE(TpID4, TpID1); +} + +TEST(xptiCorrectnessTest, xptiGetUniqueId) { + auto Result = xptiGetUniqueId(); + EXPECT_NE(Result, 0); + auto Result1 = xptiGetUniqueId(); + EXPECT_NE(Result, Result1); +} + +TEST(xptiCorrectnessTest, xptiUserDefinedEventTypes) { + uint64_t Instance; + xpti::payload_t p("foo", "foo.cpp", 1, 0, (void *)13); + xptiForceSetTraceEnabled(true); + + uint8_t StreamID = xptiRegisterStream("test_foo"); + typedef enum { + extn_ev1 = XPTI_EVENT(0), + extn_ev2 = XPTI_EVENT(1), + extn_ev3 = XPTI_EVENT(2), + extn_ev4 = XPTI_EVENT(3) + } event_extension_t; + + auto EventType1 = xptiRegisterUserDefinedEventType("test_foo_tool", extn_ev1); + auto EventType2 = xptiRegisterUserDefinedEventType("test_foo_tool", extn_ev2); + auto EventType3 = xptiRegisterUserDefinedEventType("test_foo_tool", extn_ev3); + auto EventType4 = xptiRegisterUserDefinedEventType("test_foo_tool", extn_ev4); + EXPECT_NE(EventType1, EventType2); + EXPECT_NE(EventType2, EventType3); + EXPECT_NE(EventType3, EventType4); + EXPECT_NE(EventType4, EventType1); + + auto ToolID1 = XPTI_TOOL_ID(EventType1); + auto ToolID2 = XPTI_TOOL_ID(EventType2); + auto ToolID3 = XPTI_TOOL_ID(EventType3); + auto ToolID4 = XPTI_TOOL_ID(EventType4); + EXPECT_EQ(ToolID1, ToolID2); + EXPECT_EQ(ToolID2, ToolID3); + EXPECT_EQ(ToolID3, ToolID4); + EXPECT_EQ(ToolID4, ToolID1); + + auto TpID1 = XPTI_EXTRACT_USER_DEFINED_ID(EventType1); + auto TpID2 = XPTI_EXTRACT_USER_DEFINED_ID(EventType2); + auto TpID3 = XPTI_EXTRACT_USER_DEFINED_ID(EventType3); + auto TpID4 = XPTI_EXTRACT_USER_DEFINED_ID(EventType4); + EXPECT_NE(TpID1, TpID2); + EXPECT_NE(TpID2, TpID3); + EXPECT_NE(TpID3, TpID4); + EXPECT_NE(TpID4, TpID1); +}