Skip to content

[SYCL] Add runtime support for device code argument elimination #2315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,9 @@ using _pi_offload_entry = _pi_offload_entry_struct *;
// A type of a binary image property.
typedef enum {
PI_PROPERTY_TYPE_UNKNOWN,
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
PI_PROPERTY_TYPE_STRING // null-terminated string
PI_PROPERTY_TYPE_UINT32, // 32-bit integer
PI_PROPERTY_TYPE_BYTE_ARRAY, // byte array
PI_PROPERTY_TYPE_STRING // null-terminated string
} pi_property_type;

// Device binary image property.
Expand Down Expand Up @@ -652,6 +653,8 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
#define PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants"
/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h
#define PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask"
/// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h
#define PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt"

/// This struct is a record of the device binary information. If the Kind field
/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec
Expand Down
23 changes: 23 additions & 0 deletions sycl/include/CL/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
#include <CL/sycl/detail/pi.h>

#include <cassert>
#include <cstdint>
#include <sstream>
#include <string>
#include <vector>

#ifdef XPTI_ENABLE_INSTRUMENTATION
// Forward declarations
Expand Down Expand Up @@ -197,13 +199,30 @@ void printArgs(Arg0 arg0, Args... args) {
pi::printArgs(std::forward<Args>(args)...);
}

// A wrapper for passing around byte array properties
class ByteArray {
public:
using ConstIterator = const std::uint8_t *;

ByteArray(const std::uint8_t *Ptr, std::size_t Size) : Ptr{Ptr}, Size{Size} {}
const std::uint8_t &operator[](std::size_t Idx) const { return Ptr[Idx]; }
std::size_t size() const { return Size; }
ConstIterator begin() const { return Ptr; }
ConstIterator end() const { return Ptr + Size; }

private:
const std::uint8_t *Ptr;
const std::size_t Size;
};

// C++ wrapper over the _pi_device_binary_property_struct structure.
class DeviceBinaryProperty {
public:
DeviceBinaryProperty(const _pi_device_binary_property_struct *Prop)
: Prop(Prop) {}

pi_uint32 asUint32() const;
ByteArray asByteArray() const;
const char *asCString() const;

protected:
Expand Down Expand Up @@ -300,6 +319,9 @@ class DeviceBinaryImage {
/// value is 32-bit unsigned integer ID.
const PropertyRange &getSpecConstants() const { return SpecConstIDMap; }
const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; }
const PropertyRange &getKernelParamOptInfo() const {
return KernelParamOptInfo;
}
virtual ~DeviceBinaryImage() {}

protected:
Expand All @@ -310,6 +332,7 @@ class DeviceBinaryImage {
pi::PiDeviceBinaryType Format = PI_DEVICE_BINARY_TYPE_NONE;
DeviceBinaryImage::PropertyRange SpecConstIDMap;
DeviceBinaryImage::PropertyRange DeviceLibReqMask;
DeviceBinaryImage::PropertyRange KernelParamOptInfo;
};

/// Tries to determine the device binary image foramat. Returns
Expand Down
23 changes: 22 additions & 1 deletion sycl/source/detail/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
case PI_PROPERTY_TYPE_UINT32:
Out << "[UINT32] ";
break;
case PI_PROPERTY_TYPE_BYTE_ARRAY:
Out << "[Byte array] ";
break;
case PI_PROPERTY_TYPE_STRING:
Out << "[String] ";
break;
Expand All @@ -429,11 +432,21 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) {
case PI_PROPERTY_TYPE_UINT32:
Out << P.asUint32();
break;
case PI_PROPERTY_TYPE_BYTE_ARRAY: {
ByteArray BA = P.asByteArray();
std::ios_base::fmtflags FlagsBackup = Out.flags();
Out << std::hex;
for (const auto &Byte : BA) {
Out << "0x" << Byte << " ";
}
Out.flags(FlagsBackup);
break;
}
case PI_PROPERTY_TYPE_STRING:
Out << P.asCString();
break;
default:
assert("unsupported property");
assert(false && "Unsupported property");
return Out;
}
return Out;
Expand Down Expand Up @@ -491,6 +504,13 @@ pi_uint32 DeviceBinaryProperty::asUint32() const {
return sycl::detail::pi::asUint32(&Prop->ValSize);
}

ByteArray DeviceBinaryProperty::asByteArray() const {
assert(Prop->Type == PI_PROPERTY_TYPE_BYTE_ARRAY && "property type mismatch");
assert(Prop->ValSize > 0 && "property size mismatch");
const auto *Data = pi::cast<const std::uint8_t *>(Prop->ValAddr);
return {Data, Prop->ValSize};
}

const char *DeviceBinaryProperty::asCString() const {
assert(Prop->Type == PI_PROPERTY_TYPE_STRING && "property type mismatch");
assert(Prop->ValSize > 0 && "property size mismatch");
Expand Down Expand Up @@ -550,6 +570,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) {

SpecConstIDMap.init(Bin, PI_PROPERTY_SET_SPEC_CONST_MAP);
DeviceLibReqMask.init(Bin, PI_PROPERTY_SET_DEVICELIB_REQ_MASK);
KernelParamOptInfo.init(Bin, PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO);
}

} // namespace pi
Expand Down
82 changes: 82 additions & 0 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
Expand Down Expand Up @@ -397,6 +398,10 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M,
Img.getLinkOptions(), PiDevices,
ContextImpl->getCachedLibPrograms(), DeviceLibReqMask);

{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
NativePrograms[BuiltProgram.get()] = &Img;
}
return BuiltProgram.release();
};

Expand Down Expand Up @@ -851,6 +856,23 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
return Program;
}

static ProgramManager::KernelArgMask
createKernelArgMask(const pi::ByteArray &Bytes) {
const int NBytesForSize = 8;
const int NBitsInElement = 8;
std::uint64_t SizeInBits = 0;
for (int I = 0; I < NBytesForSize; ++I)
SizeInBits |= static_cast<std::uint64_t>(Bytes[I]) << I * NBitsInElement;

ProgramManager::KernelArgMask Result;
for (std::uint64_t I = 0; I < SizeInBits; ++I) {
std::uint8_t Byte = Bytes[NBytesForSize + (I / NBitsInElement)];
Result.push_back(Byte & (1 << (I % NBitsInElement)));
}

return Result;
}

void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());

Expand All @@ -860,6 +882,17 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) {
const _pi_offload_entry EntriesB = RawImg->EntriesBegin;
const _pi_offload_entry EntriesE = RawImg->EntriesEnd;
auto Img = make_unique_ptr<RTDeviceBinaryImage>(RawImg, M);

// Fill the kernel argument mask map
const pi::DeviceBinaryImage::PropertyRange &KPOIRange =
Img->getKernelParamOptInfo();
if (KPOIRange.isAvailable()) {
KernelNameToArgMaskMap &ArgMaskMap =
m_EliminatedKernelArgMasks[Img.get()];
for (const auto &Info : KPOIRange)
ArgMaskMap[Info->Name] =
createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray());
}
// Use the entry information if it's available
if (EntriesB != EntriesE) {
// The kernel sets for any pair of images are either disjoint or
Expand Down Expand Up @@ -1018,6 +1051,55 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) {
return 0xFFFFFFFF;
}

// TODO consider another approach with storing the masks in the integration
// header instead.
ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask(
OSModuleHandle M, const context &Context, pi::PiProgram NativePrg,
const string_class &KernelName, bool KnownProgram) {
// If instructed to use a spv file, assume no eliminated arguments.
if (m_UseSpvFile && M == OSUtil::ExeModuleHandle)
return {};

{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
auto ImgIt = NativePrograms.find(NativePrg);
if (ImgIt != NativePrograms.end()) {
auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second);
if (MapIt != m_EliminatedKernelArgMasks.end())
return MapIt->second[KernelName];
return {};
}
}

if (KnownProgram)
throw runtime_error("Program is not associated with a binary image",
PI_INVALID_VALUE);

// If not sure whether the program was built with one of the images, try
// finding the binary.
// TODO this can backfire in some extreme edge cases where there's a kernel
// name collision between our binaries and user-created native programs.
KernelSetId KSId;
try {
KSId = getKernelSetId(M, KernelName);
} catch (sycl::runtime_error &e) {
// If the kernel name wasn't found, assume that the program wasn't created
// from one of our device binary images.
if (e.get_cl_code() == PI_INVALID_KERNEL_NAME)
return {};
std::rethrow_exception(std::current_exception());
}
RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context);
{
std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
NativePrograms[NativePrg] = &Img;
}
auto MapIt = m_EliminatedKernelArgMasks.find(&Img);
if (MapIt != m_EliminatedKernelArgMasks.end())
return MapIt->second[KernelName];
return {};
}

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
Expand Down
29 changes: 29 additions & 0 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ enum class DeviceLibExt : std::uint32_t {
// that is necessary for no interoperability cases with lambda.
class ProgramManager {
public:
// TODO use a custom dynamic bitset instead to make initialization simpler.
using KernelArgMask = std::vector<bool>;

// Returns the single instance of the program manager for the entire
// process. Can only be called after staticInit is done.
static ProgramManager &getInstance();
Expand Down Expand Up @@ -110,6 +113,22 @@ class ProgramManager {
const RTDeviceBinaryImage *Img = nullptr);
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);

/// Returns the mask for eliminated kernel arguments for the requested kernel
/// within the native program.
/// \param M identifies the OS module the kernel comes from (multiple OS
/// modules may have kernels with the same name).
/// \param Context the context associated with the kernel.
/// \param NativePrg the PI program associated with the kernel.
/// \param KernelName the name of the kernel.
/// \param KnownProgram indicates whether the PI program is guaranteed to
/// be known to program manager (built with its API) or not (not
/// cacheable or constructed with interoperability).
KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M,
const context &Context,
pi::PiProgram NativePrg,
const string_class &KernelName,
bool KnownProgram);

private:
ProgramManager();
~ProgramManager() = default;
Expand Down Expand Up @@ -175,6 +194,8 @@ class ProgramManager {
// - knowing which specialization constants are used in the program and
// injecting their current values before compiling the SPIRV; the binary
// image object has info about all spec constants used in the module
// - finding kernel argument masks for kernels associated with each
// pi_program
// NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
// referenced from outside SYCL runtime and RTDeviceBinaryImage object
// lifetime matches program manager's one.
Expand All @@ -186,6 +207,14 @@ class ProgramManager {

/// Protects NativePrograms that can be changed by class' methods.
std::mutex MNativeProgramsMutex;

using KernelNameToArgMaskMap =
std::unordered_map<string_class, KernelArgMask>;
/// Maps binary image and kernel name pairs to kernel argument masks which
/// specify which arguments were eliminated during device code optimization.
std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
m_EliminatedKernelArgMasks;

/// True iff a SPIRV file has been specified with an environment variable
bool m_UseSpvFile = false;
};
Expand Down
Loading