diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index ea07d8dd2d02..9536fd85fed5 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -168,7 +168,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); - if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) { + if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice || Opts.SYCLIsDevice) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { switch (GPU) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 63450ee7e7cf..e6626b807c10 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -101,6 +101,8 @@ #include // ::getenv #include #include +#include +#include #include #if LLVM_ON_UNIX #include // getpid @@ -5064,6 +5066,76 @@ class OffloadingActionBuilder final { } } + // Return whether to use native bfloat16 library. + bool selectBfloatLibs(const ToolChain *TC, bool &useNative) { + const OptTable &Opts = C.getDriver().getOpts(); + const char *TargetOpt = nullptr; + const char *DeviceOpt = nullptr; + bool needLibs = false; + for (auto *A : Args) { + llvm::Triple *TargetBE = nullptr; + + auto GetTripleIt = [&, this](llvm::StringRef Triple) { + llvm::Triple TargetTriple{Triple}; + auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) { + return SYCLTriple == TargetTriple; + }); + return TripleIt != SYCLTripleList.end() ? &*TripleIt : nullptr; + }; + + if (A->getOption().matches(options::OPT_fsycl_targets_EQ)) { + // spir64 target is actually JIT compilation, so we defer selection of + // bfloat16 libraries to runtime. For AOT we need libraries. + needLibs = TC->getTriple().getSubArch() != llvm::Triple::NoSubArch; + TargetBE = GetTripleIt(A->getValue(0)); + if (TargetBE) + TargetOpt = A->getValue(0); + else + continue; + } else if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) { + // Passing device args: -Xsycl-target-backend= + TargetBE = GetTripleIt(A->getValue(0)); + if (TargetBE) + DeviceOpt = A->getValue(1); + else + continue; + } else if (A->getOption().matches(options::OPT_Xsycl_backend)) { + // Passing device args: -Xsycl-target-backend + TargetBE = &SYCLTripleList.front(); + DeviceOpt = A->getValue(0); + } else if (A->getOption().matches(options::OPT_Xs_separate)) { + // Passing device args: -Xs + DeviceOpt = A->getValue(0); + } else { + continue; + }; + } + useNative = false; + if (needLibs) + if (TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && + TargetOpt && DeviceOpt) { + + auto checkBF = [=](std::string &Dev) { + static const std::regex BFFs("pvc.*|ats.*"); + return std::regex_match(Dev, BFFs); + }; + + needLibs = true; + std::string Params{DeviceOpt}; + size_t DevicesPos = Params.find("-device "); + useNative = false; + if (DevicesPos != std::string::npos) { + useNative = true; + std::istringstream Devices(Params.substr(DevicesPos + 8)); + for (std::string S; std::getline(Devices, S, ',');) { + useNative &= checkBF(S); + } + } + } + + return needLibs; + } + bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects, bool isSpirvAOT, bool isMSVCEnv) { struct DeviceLibOptInfo { @@ -5139,6 +5211,10 @@ class OffloadingActionBuilder final { {"libsycl-fallback-imf", "libimf-fp32"}, {"libsycl-fallback-imf-fp64", "libimf-fp64"}, {"libsycl-fallback-imf-bf16", "libimf-bf16"}}; + const SYCLDeviceLibsList sycl_device_bfloat16_fallback_lib = { + {"libsycl-fallback-bfloat16", "libm-bfloat16"}}; + const SYCLDeviceLibsList sycl_device_bfloat16_native_lib = { + {"libsycl-native-bfloat16", "libm-bfloat16"}}; // ITT annotation libraries are linked in separately whenever the device // code instrumentation is enabled. const SYCLDeviceLibsList sycl_device_annotation_libs = { @@ -5188,6 +5264,17 @@ class OffloadingActionBuilder final { addInputs(sycl_device_wrapper_libs); if (isSpirvAOT || TC->getTriple().isNVPTX()) addInputs(sycl_device_fallback_libs); + + bool nativeBfloatLibs; + bool needBfloatLibs = selectBfloatLibs(TC, nativeBfloatLibs); + if (needBfloatLibs) { + // Add native or fallback bfloat16 library. + if (nativeBfloatLibs) + addInputs(sycl_device_bfloat16_native_lib); + else + addInputs(sycl_device_bfloat16_fallback_lib); + } + if (Args.hasFlag(options::OPT_fsycl_instrument_device_code, options::OPT_fno_sycl_instrument_device_code, true)) addInputs(sycl_device_annotation_libs); diff --git a/clang/test/Driver/sycl-bfloat16-lib-win.cpp b/clang/test/Driver/sycl-bfloat16-lib-win.cpp new file mode 100755 index 000000000000..f5f9c8c97d69 --- /dev/null +++ b/clang/test/Driver/sycl-bfloat16-lib-win.cpp @@ -0,0 +1,283 @@ +/// +/// Check if bfloat16 native and fallback libraries are added on Windows +/// + +// REQUIRES: windows +// REQUIRES: opencl-aot, ocloc, cpu, gpu +// UNSUPPORTED: cuda + +/// ########################################################################### +/// test that no bfloat16 libraries are added in JIT mode +// RUN: %clangxx -fsycl %s --sysroot=%S/Inputs/SYCL -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16 + +// test that fallback bfloat16 libraries are added in JIT mode with generic target +// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that a PVC AOT compilation uses the native library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE + +// test that unless all targets support bfloat16, AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc,gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that when all targets support bfloat16, AOT compilation uses the native library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc-sdv,ats-m75" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE + +// test that a gen9 AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that a generic AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that a mixed JIT + AOT-PVC compilation uses no libs + fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NONE-NATIVE + +// test that a mixed JIT + AOT-Gen9 compilation uses no libs + native libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NONE-FALLBACK + +// test that an AOT-CPU + AOT-PVC compilation fallback + fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NATIVE + +// test that an AOT-CPU + AOT-Gen9 compilation uses fallback + native libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK + +// BFLOAT16: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NOT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-{{fallback|native}}-bfloat16.obj" "-output={{.*}}libsycl-{{fallback|native}}-{{.*}}.o" "-unbundle" + +// BFLOAT16-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.obj" + +// BFLOAT16-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.obj" + +// BFLOAT16-NONE-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.obj" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.obj" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.obj" "-output={{.*}}libsycl-itt-stubs-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: sycl-post-link{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llc{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: clang-16{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.obj" + +// BFLOAT16-NONE-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.obj" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.obj" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.obj" "-output={{.*}}libsycl-itt-stubs-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: sycl-post-link{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llc{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: clang-16{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.obj" + +// BFLOAT16-FALLBACK-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.obj" "-output={{.*}}libsycl-fallback-bfloat16-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.obj" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.obj" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.obj" "-output={{.*}}libsycl-itt-stubs-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: sycl-post-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llc{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-16{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.obj" + +// BFLOAT16-FALLBACK-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.obj" "-output={{.*}}libsycl-fallback-bfloat16-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.obj" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.obj" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.obj" "-output={{.*}}libsycl-itt-stubs-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: sycl-post-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llc{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-16{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-mscv-math.obj" "-output={{.*}}libsycl-mscv-math-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.obj" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.obj" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.obj" "-output={{.*}}libsycl-fallback-complex-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.obj" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.obj" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.obj" "-output={{.*}}libsycl-fallback-imf-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.obj" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.obj" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.obj" \ No newline at end of file diff --git a/clang/test/Driver/sycl-bfloat16-lib.cpp b/clang/test/Driver/sycl-bfloat16-lib.cpp new file mode 100755 index 000000000000..54066a3c5694 --- /dev/null +++ b/clang/test/Driver/sycl-bfloat16-lib.cpp @@ -0,0 +1,283 @@ +/// +/// Check if bfloat16 native and fallback libraries are added on Linux +/// + +// UNSUPPORTED: system-windows +// REQUIRES: opencl-aot, ocloc, cpu, gpu +// UNSUPPORTED: cuda + +/// ########################################################################### +/// test that no bfloat16 libraries are added in JIT mode +// RUN: %clangxx -fsycl %s --sysroot=%S/Inputs/SYCL -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16 + +// test that no bfloat16 libraries are added in JIT mode with generic target +// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16 + +// test that a PVC AOT compilation uses the native library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE + +// test that unless all targets support bfloat16, AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc,gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that when all targets support bfloat16, AOT compilation uses the native library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc-sdv,ats-m75" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE + +// test that a gen9 AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that a generic AOT compilation uses the fallback library +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK + +// test that a mixed JIT + AOT-PVC compilation uses no libs + fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NONE-NATIVE + +// test that a mixed JIT + AOT-Gen9 compilation uses no libs + native libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NONE-FALLBACK + +// test that an AOT-CPU + AOT-PVC compilation fallback + fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NATIVE + +// test that an AOT-CPU + AOT-Gen9 compilation uses fallback + native libs +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK + +// BFLOAT16: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NOT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-{{fallback|native}}-bfloat16.o" "-output={{.*}}libsycl-{{fallback|native}}-{{.*}}.o" "-unbundle" + +// BFLOAT16-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.o" + +// BFLOAT16-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-{{spir64_gen-|spir64-}}unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.o" + +// BFLOAT16-NONE-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.o" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.o" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.o" "-output={{.*}}libsycl-itt-stubs-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: sycl-post-link{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llc{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: clang-16{{.*}} +// BFLOAT16-NONE-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.o" + +// BFLOAT16-NONE-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.o" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.o" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.o" "-output={{.*}}libsycl-itt-stubs-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: sycl-post-link{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llc{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: clang-16{{.*}} +// BFLOAT16-NONE-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-NONE-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-NONE-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.o" + +// BFLOAT16-FALLBACK-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.o" "-output={{.*}}libsycl-fallback-bfloat16-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.o" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.o" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.o" "-output={{.*}}libsycl-itt-stubs-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: sycl-post-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llc{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-16{{.*}} +// BFLOAT16-FALLBACK-NATIVE-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-NATIVE: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-NATIVE-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-native-bfloat16.o" + +// BFLOAT16-FALLBACK-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.o" "-output={{.*}}libsycl-fallback-bfloat16-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-user-wrappers.o" "-output={{.*}}libsycl-itt-user-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-compiler-wrappers.o" "-output={{.*}}libsycl-itt-compiler-wrappers-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_x86_64-unknown-unknown" "-input={{.*}}libsycl-itt-stubs.o" "-output={{.*}}libsycl-itt-stubs-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: sycl-post-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-foreach{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: file-table-tform{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-wrapper{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llc{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-16{{.*}} +// BFLOAT16-FALLBACK-FALLBACK-NEXT: llvm-link{{.*}} +// BFLOAT16-FALLBACK-FALLBACK: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cassert.o" "-output={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cstring.o" "-output={{.*}}libsycl-fallback-cstring-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex.o" "-output={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-complex-fp64.o" "-output={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath.o" "-output={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-cmath-fp64.o" "-output={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf.o" "-output={{.*}}libsycl-fallback-imf-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-imf-fp64.o" "-output={{.*}}libsycl-fallback-imf-fp64-{{.*}}.o" "-unbundle" +// BFLOAT16-FALLBACK-FALLBACK-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_gen-unknown-unknown" "-input={{.*}}libsycl-fallback-bfloat16.o" diff --git a/clang/test/Preprocessor/sycl-macro.cpp b/clang/test/Preprocessor/sycl-macro.cpp index b2af292db71d..ba4708d6e7e9 100644 --- a/clang/test/Preprocessor/sycl-macro.cpp +++ b/clang/test/Preprocessor/sycl-macro.cpp @@ -9,6 +9,7 @@ // RUNx: %clang_cc1 %s -fsycl-id-queries-fit-in-int -fsycl-is-device -E -dM -fms-compatibility | FileCheck --check-prefix=CHECK-MSVC %s // RUN: %clang_cc1 -fno-sycl-id-queries-fit-in-int %s -E -dM | FileCheck \ // RUN: --check-prefix=CHECK-NO-SYCL_FIT_IN_INT %s +// RUN: %clang_cc1 %s -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-CUDA %s // CHECK-NOT:#define __SYCL_DEVICE_ONLY__ 1 // CHECK-NOT:#define SYCL_EXTERNAL @@ -30,3 +31,5 @@ // CHECK-NO-SYCL_FIT_IN_INT-NOT:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1 // CHECK-SYCL-ID:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1 + +// CHECK-CUDA:#define __CUDA_ARCH__ 800 diff --git a/libdevice/bfloat16_wrapper.cpp b/libdevice/bfloat16_wrapper.cpp new file mode 100755 index 000000000000..b2b8709f9dfb --- /dev/null +++ b/libdevice/bfloat16_wrapper.cpp @@ -0,0 +1,26 @@ +//==--- bfloat16_wrapper.cpp - wrappers for bfloat16 library functions ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "device.h" + +#ifdef __SPIR__ + +#include +#include + +DEVICE_EXTERN_C_INLINE +uint16_t __devicelib_ConvertFToBF16INTEL(const float &x) { + return __spirv_ConvertFToBF16INTEL(x); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_ConvertBF16ToFINTEL(const uint16_t &x) { + return __spirv_ConvertBF16ToFINTEL(x); +} + +#endif // __SPIR__ diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 7ae3543a789a..080e2e575e54 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -101,6 +101,7 @@ set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp device.h sycl-compiler) set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler) +set(bfloat16_obj_deps sycl-compiler) add_devicelib_obj(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) add_devicelib_obj(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps}) @@ -114,6 +115,7 @@ add_devicelib_obj(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_ add_devicelib_obj(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) add_devicelib_obj(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) add_devicelib_obj(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) +add_devicelib_obj(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) if(WIN32) add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) endif() @@ -124,6 +126,8 @@ add_fallback_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${c add_fallback_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) add_fallback_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) add_fallback_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) +add_fallback_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps}) +add_fallback_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps}) file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) diff --git a/libdevice/fallback-bfloat16.cpp b/libdevice/fallback-bfloat16.cpp new file mode 100755 index 000000000000..e5596ff4871d --- /dev/null +++ b/libdevice/fallback-bfloat16.cpp @@ -0,0 +1,46 @@ +//==------- fallback-bfloat16.cpp - bfloat16 conversions in software -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "device.h" + +#ifdef __SPIR__ + +#include + +// To support fallback device libraries on-demand loading, please update the +// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add +// or remove any item in this file. +// TODO: generate the DeviceLibFuncMap in sycl-post-link.cpp automatically +// during the build based on libdevice to avoid manually sync. + +DEVICE_EXTERN_C_INLINE uint16_t +__devicelib_ConvertFToBF16INTEL(const float &a) { + // In case float value is nan - propagate bfloat16's qnan + if (__spirv_IsNan(a)) + return 0xffc1; + union { + uint32_t intStorage; + float floatValue; + }; + floatValue = a; + // Do RNE and truncate + uint32_t roundingBias = ((intStorage >> 16) & 0x1) + 0x00007FFF; + return static_cast((intStorage + roundingBias) >> 16); +} + +DEVICE_EXTERN_C_INLINE float +__devicelib_ConvertBF16ToFINTEL(const uint16_t &a) { + union { + uint32_t intStorage; + float floatValue; + }; + intStorage = a << 16; + return floatValue; +} + +#endif // __SPIR__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 83312731f6d1..9b1c80cc8828 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -497,6 +497,10 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_floorbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, {"__devicelib_imf_ceilbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, {"__devicelib_imf_truncbf16", DeviceLibExt::cl_intel_devicelib_imf_bf16}, + {"__devicelib_ConvertFToBF16INTEL", + DeviceLibExt::cl_intel_devicelib_bfloat16}, + {"__devicelib_ConvertBF16ToFINTEL", + DeviceLibExt::cl_intel_devicelib_bfloat16}, }; // Each fallback device library corresponds to one bit in "require mask" which @@ -512,6 +516,7 @@ SYCLDeviceLibFuncMap SDLMap = { // fallback-imf: 0x40 // fallback-imf-fp64: 0x80 // fallback-imf-bf16: 0x100 +// fallback-bfloat16: 0x200 uint32_t getDeviceLibBits(const std::string &FuncName) { auto DeviceLibFuncIter = SDLMap.find(FuncName); return ((DeviceLibFuncIter == SDLMap.end()) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h index 4e340e9be6b1..c9b737e2d053 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h @@ -35,6 +35,7 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_imf, cl_intel_devicelib_imf_fp64, cl_intel_devicelib_imf_bf16, + cl_intel_devicelib_bfloat16, }; uint32_t getSYCLDeviceLibReqMask(const Module &M); diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16.asciidoc deleted file mode 100644 index bec08876ed08..000000000000 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16.asciidoc +++ /dev/null @@ -1,411 +0,0 @@ -= sycl_ext_oneapi_bfloat16 - -:source-highlighter: coderay -:coderay-linenums-mode: table - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en - -:blank: pass:[ +] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - -// This is necessary for asciidoc, but not for asciidoctor -:cpp: C++ - -== Notice - -IMPORTANT: This specification is a draft. - -Copyright (c) 2021-2022 Intel Corporation. All rights reserved. - -NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are -trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. -used by permission by Khronos. - -== Dependencies - -This extension is written against the SYCL 2020 specification, Revision 4. - -== Status - -Draft - -This is a preview extension specification, intended to provide early access to -a feature for review and community feedback. When the feature matures, this -specification may be released as a formal extension. - -Because the interfaces defined by this specification are not final and are -subject to change they are not intended to be used by shipping software -products. - -== Version - -Revision: 5 - -== Introduction - -This extension adds functionality to convert value of single-precision -floating-point type(`float`) to `bfloat16` type and vice versa. The extension -doesn't add support for `bfloat16` type as such, instead it uses 16-bit integer -type(`uint16_t`) as a storage for `bfloat16` values. - -The purpose of conversion from float to bfloat16 is to reduce the amount of memory -required to store floating-point numbers. Computations are expected to be done with -32-bit floating-point values. - -This extension is an optional kernel feature as described in -https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:optional-kernel-features[section 5.7] -of the SYCL 2020 spec. Therefore, attempting to submit a kernel using this -feature to a device that does not support it should cause a synchronous -`errc::kernel_not_supported` exception to be thrown from the kernel invocation -command (e.g. from `parallel_for`). - -== Feature test macro - -This extension provides a feature-test macro as described in the core SYCL -specification section 6.3.3 "Feature test macros". Therefore, an implementation -supporting this extension must predefine the macro -`SYCL_EXT_ONEAPI_BFLOAT16` to one of the values defined in the table -below. Applications can test for the existence of this macro to determine if -the implementation supports this feature, or applications can test the macro’s - value to determine which of the extension’s APIs the implementation supports. - -[%header,cols="1,5"] -|=== -|Value |Description -|1 |Initial extension version. Base features are supported. -|=== - -== Extension to `enum class aspect` - -[source] ----- -namespace sycl { -enum class aspect { - ... - ext_oneapi_bfloat16 -} -} ----- - -If a SYCL device has the `ext_oneapi_bfloat16` aspect, then it natively -supports conversion of values of `float` type to `bfloat16` and back. - -If the device doesn't have the aspect, objects of `bfloat16` class must not be -used in the device code. - -**NOTE**: The `ext_oneapi_bfloat16` aspect is not yet supported. The -`bfloat16` class is currently supported only on Xe HP GPU and Nvidia GPUs with Compute Capability >= SM80. - -== New `bfloat16` class - -The `bfloat16` class below provides the conversion functionality. Conversion -from `float` to `bfloat16` is done with round to nearest even(RTE) rounding -mode. - -[source] ----- -namespace sycl { -namespace ext { -namespace oneapi { -namespace experimental { - -class bfloat16 { - using storage_t = uint16_t; - storage_t value; - -public: - bfloat16() = default; - bfloat16(const bfloat16 &) = default; - ~bfloat16() = default; - - // Explicit conversion functions - static storage_t from_float(const float &a); - static float to_float(const storage_t &a); - - // Convert from float to bfloat16 - bfloat16(const float &a); - bfloat16 &operator=(const float &a); - - // Convert from bfloat16 to float - operator float() const; - - // Get bfloat16 as uint16. - operator storage_t() const; - - // Convert to bool type - explicit operator bool(); - - friend bfloat16 operator-(bfloat16 &bf) { /* ... */ } - - // OP is: prefix ++, -- - friend bfloat16 &operatorOP(bfloat16 &bf) { /* ... */ } - - // OP is: postfix ++, -- - friend bfloat16 operatorOP(bfloat16 &bf, int) { /* ... */ } - - // OP is: +=, -=, *=, /= - friend bfloat16 &operatorOP(bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ } - - // OP is +, -, *, / - friend bfloat16 operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ } - template - friend bfloat16 operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ } - template - friend bfloat16 operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ } - - // OP is ==,!=, <, >, <=, >= - friend bool operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ } - template - friend bool operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ } - template - friend bool operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ } -}; - -} // namespace experimental -} // namespace oneapi -} // namespace ext -} // namespace sycl ----- - -Table 1. Member functions of `bfloat16` class. -|=== -| Member Function | Description - -| `static storage_t from_float(const float &a);` -| Explicitly convert from `float` to `bfloat16`. - -| `static float to_float(const storage_t &a);` -| Interpret `a` as `bfloat16` and explicitly convert it to `float`. - -| `bfloat16(const float& a);` -| Construct `bfloat16` from `float`. Converts `float` to `bfloat16`. - -| `bfloat16 &operator=(const float &a);` -| Replace the value with `a` converted to `bfloat16` - -| `operator float() const;` -| Return `bfloat16` value converted to `float`. - -| `operator storage_t() const;` -| Return `uint16_t` value, whose bits represent `bfloat16` value. - -| `explicit operator bool() { /* ... */ }` -| Convert `bfloat16` to `bool` type. Return `false` if the value equals to - zero, return `true` otherwise. - -| `friend bfloat16 operator-(bfloat16 &bf) { /* ... */ }` -| Construct new instance of `bfloat16` class with negated value of the `bf`. - -| `friend bfloat16 &operatorOP(bfloat16 &bf) { /* ... */ }` -| Perform an in-place `OP` prefix arithmetic operation on the `bf`, - assigning the result to the `bf` and return the `bf`. - - OP is: `++, --` - -| `friend bfloat16 operatorOP(bfloat16 &bf, int) { /* ... */ }` -| Perform an in-place `OP` postfix arithmetic operation on `bf`, assigning - the result to the `bf` and return a copy of `bf` before the operation is - performed. - - OP is: `++, --` - -| `friend bfloat16 operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ }` -| Perform an in-place `OP` arithmetic operation between the `lhs` and the `rhs` - and return the `lhs`. - - OP is: `+=, -=, *=, /=` - -| `friend type operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ }` -| Construct a new instance of the `bfloat16` class with the value of the new - `bfloat16` instance being the result of an OP arithmetic operation between - the `lhs` `bfloat16` and `rhs` `bfloat16` values. - - OP is `+, -, *, /` - -| `template - friend bfloat16 operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ }` -| Construct a new instance of the `bfloat16` class with the value of the new - `bfloat16` instance being the result of an OP arithmetic operation between - the `lhs` `bfloat16` value and `rhs` of template type `T`. Type `T` must be - convertible to `float`. - - OP is `+, -, *, /` - -| `template - friend bfloat16 operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ }` -| Construct a new instance of the `bfloat16` class with the value of the new - `bfloat16` instance being the result of an OP arithmetic operation between - the `lhs` of template type `T` and `rhs` `bfloat16` value. Type `T` must be - convertible to `float`. - - OP is `+, -, *, /` - -| `friend bool operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ }` -| Perform comparison operation OP between `lhs` `bfloat16` and `rhs` `bfloat16` - values and return the result as a boolean value. - -OP is `==, !=, <, >, <=, >=` - -| `template - friend bool operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ }` -| Perform comparison operation OP between `lhs` `bfloat16` and `rhs` of - template type `T` and return the result as a boolean value. Type `T` must be - convertible to `float`. - -OP is `==, !=, <, >, <=, >=` - -| `template - friend bool operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ }` -| Perform comparison operation OP between `lhs` of template type `T` and `rhs` - `bfloat16` value and return the result as a boolean value. Type `T` must be - convertible to `float`. - -OP is `==, !=, <, >, <=, >=` -|=== - -== Example - -[source] ----- -#include -#include - -using sycl::ext::oneapi::experimental::bfloat16; - -bfloat16 operator+(const bfloat16 &lhs, const bfloat16 &rhs) { - return static_cast(lhs) + static_cast(rhs); -} - -float foo(float a, float b) { - // Convert from float to bfloat16. - bfloat16 A {a}; - bfloat16 B {b}; - - // Convert A and B from bfloat16 to float, do addition on floating-pointer - // numbers, then convert the result to bfloat16 and store it in C. - bfloat16 C = A + B; - - // Return the result converted from bfloat16 to float. - return C; -} - -int main (int argc, char *argv[]) { - float data[3] = {7.0, 8.1, 0.0}; - sycl::device dev; - sycl::queue deviceQueue{dev}; - sycl::buffer buf {data, sycl::range<1> {3}}; - - if (dev.has(sycl::aspect::ext_oneapi_bfloat16)) { - deviceQueue.submit ([&] (sycl::handler& cgh) { - auto numbers = buf.get_access (cgh); - cgh.single_task ([=] () { - numbers[2] = foo(numbers[0], numbers[1]); - }); - }); - } - return 0; -} ----- - -== New bfloat16 math functions - -Many applications will require dedicated functions that take parameters of type `bfloat16`. This extension adds `bfloat16` support to the `fma`, `fmin`, `fmax` and `fabs` SYCL floating point math functions. These functions can be used as element wise operations on matrices, supplementing the `bfloat16` support in the sycl_ext_oneapi_matrix extension. - -The descriptions of the `fma`, `fmin`, `fmax` and `fabs` SYCL floating point math functions can be found in the SYCL specification: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_math_functions. - -The following functions are only available when `T` is `bfloat16` or `sycl::marray`, where `{N}` means any positive value of `size_t` type. - -=== fma - -```c++ -namespace sycl::ext::oneapi::experimental { - -template -T fma(T a, T b, T c); -} // namespace sycl::ext::oneapi::experimental -``` - -==== Description - -Returns the correctly rounded floating-point representation of the sum of `c` with the infinitely precise product of `a` and `b`. -Rounding of intermediate products shall not occur. The mantissa LSB rounds to the nearest even. Subnormal numbers are supported. - -=== fmax - -```c++ -namespace sycl::ext::oneapi::experimental { -template -T fmax(T x, T y); -} // namespace sycl::ext::oneapi::experimental -``` - -==== Description - -Returns `y` if -`x < y`, otherwise it -returns `x`. If one argument is a -NaN, `fmax()` returns the other -argument. If both arguments are -NaNs, `fmax()` returns a NaN. - -=== fmin - -```c++ -namespace sycl::ext::oneapi::experimental { -template -T fmin(T x, T y); -} // namespace sycl::ext::oneapi::experimental -``` - -==== Description - -Returns `y` if -`y < x`, otherwise it -returns `x`. If one argument is a -NaN, `fmax()` returns the other -argument. If both arguments are -NaNs, `fmax()` returns a NaN. - -=== fabs - -```c++ -namespace sycl::ext::oneapi::experimental { -template -T fabs(T x); -} // namespace sycl::ext::oneapi::experimental -``` - -==== Description - -Compute absolute value of a `bfloat16`. - -== Issues - -1. The CUDA backend does not have a use case that would necessitate support of the `vec` class in bfloat16 math functions, and `marray` would always be preferred over `vec` if `vec` support were to be added in the CUDA backend. For portability reasons, support for the `vec` class can be easily added if other backends require it. - -2. We should decide on a roadmap to extend support of `bfloat16` to other SYCL 2020 math functions. - -== Revision History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|======================================== -|Rev|Date|Author|Changes -|1|2021-08-02|Alexey Sotkin |Initial public working draft -|2|2021-08-17|Alexey Sotkin |Add explicit conversion functions + - Add operator overloadings + - Apply code review suggestions -|3|2021-08-18|Alexey Sotkin |Remove `uint16_t` constructor -|4|2022-03-07|Aidan Belton and Jack Kirk |Switch from Intel vendor specific to oneapi -|5|2022-04-05|Jack Kirk | Added section for bfloat16 math builtins -|======================================== diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc new file mode 100755 index 000000000000..41d75660cd94 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bfloat16_math_functions.asciidoc @@ -0,0 +1,182 @@ += sycl_ext_oneapi_bfloat16_math_functions + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +// This is necessary for asciidoc, but not for asciidoctor +:cpp: C++ + +== Notice + +Copyright © 2022-2022 Intel Corporation. All rights reserved. + +Khronos® is a registered trademark and SYCL™ and SPIR™ are trademarks of +The Khronos Group Inc. OpenCL™ is a trademark of Apple Inc. used by permission +by Khronos. + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + +== Dependencies + +This extension is written against the SYCL 2020 revision 5 specification. +All references below to the "core SYCL specification" or to section +numbers in the SYCL specification refer to that revision. + +This extension depends on the following other SYCL extension: + +* sycl_ext_oneapi_bfloat16 + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in DPC\++ but they are not finalized and may +change incompatibly in future versions of DPC++ without prior notice. +Shipping software products should not rely on APIs defined in this +specification. + +== Overview + +This extension adds `bfloat16` support to the `fma`, `fmin`, `fmax` and +`fabs` SYCL floating point math functions. These functions can be used as +element wise operations on matrices, supplementing the `bfloat16` support +in the sycl_ext_oneapi_matrix extension. + +The descriptions of the `fma`, `fmin`, `fmax` and `fabs` SYCL floating point +math functions can be found in the SYCL specification: +https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_math_functions. + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification section 6.3.3 "Feature test macros". Therefore, an implementation +supporting this extension must predefine the macro +`SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS` to one of the values defined in the +table below. Applications can test for the existence of this macro to determine +if the implementation supports this feature, or applications can test the +macro's value to determine which of the extension's APIs the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value |Description +|1 |Initial extension version. Base features are supported. +|=== + +=== Extension to `enum class aspect` + +[source] +---- +namespace sycl { +enum class aspect { + ... + sycl_ext_oneapi_bfloat16_math_functions +} +} +---- + +If a SYCL device has the `sycl_ext_oneapi_bfloat16_math_functions` aspect, +then it supports the `bfloat16` math functions described in the next section. + +=== Math Functions + +The following functions are only available when `T` is `bfloat16` or +`sycl::marray`, where `{N}` means any positive value of +`size_t` type. + +==== fma + +```c++ +namespace sycl::ext::oneapi::experimental { + +template +T fma(T a, T b, T c); +} // namespace sycl::ext::oneapi::experimental +``` + +===== Description + +Returns the correctly rounded floating-point representation of the +sum of `c` with the infinitely precise product of `a` and `b`. +Rounding of intermediate products shall not occur. The mantissa +LSB rounds to the nearest even. Subnormal numbers are supported. + +==== fmax + +```c++ +namespace sycl::ext::oneapi::experimental { +template +T fmax(T x, T y); +} // namespace sycl::ext::oneapi::experimental +``` + +===== Description + +Returns `y` if +`x < y`, otherwise it +returns `x`. If one argument is a +NaN, `fmax()` returns the other +argument. If both arguments are +NaNs, `fmax()` returns a NaN. + +==== fmin + +```c++ +namespace sycl::ext::oneapi::experimental { +template +T fmin(T x, T y); +} // namespace sycl::ext::oneapi::experimental +``` + +===== Description + +Returns `y` if +`y < x`, otherwise it +returns `x`. If one argument is a +NaN, `fmax()` returns the other +argument. If both arguments are +NaNs, `fmax()` returns a NaN. + +==== fabs + +```c++ +namespace sycl::ext::oneapi::experimental { +template +T fabs(T x); +} // namespace sycl::ext::oneapi::experimental +``` + +===== Description + +Compute absolute value of a `bfloat16`. + +== Issues + +1. The CUDA backend does not have a use case that would necessitate support +of the `vec` class in bfloat16 math functions, and `marray` would always be +preferred over `vec` if `vec` support were to be added in the CUDA backend. +For portability reasons, support for the `vec` class can be easily added if +other backends require it. + +2. We should decide on a roadmap to extend support of `bfloat16` to other +SYCL 2020 math functions. diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_bfloat16.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_bfloat16.asciidoc new file mode 100644 index 000000000000..ff72b3c959f8 --- /dev/null +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_bfloat16.asciidoc @@ -0,0 +1,323 @@ += sycl_ext_oneapi_bfloat16 + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +// This is necessary for asciidoc, but not for asciidoctor +:cpp: C++ + +== Notice + +[%hardbreaks] +Copyright (C) 2022-2022 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 specification, Revision 5. + +== Status + +This extension is implemented and fully supported by DPC++. +[NOTE] +==== +The DPC++ compiler has the following limitation when using this extension +in conjunction with ahead-of-time (AOT) compilation with the `-fsycl-targets` +compiler option. When doing AOT compilation for an Intel GPU device via +`-fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device ..."`, +the compiler chooses either fallback or native support for bfloat16 according +to the device(s) specified in `...`. Native support is used only if all of +these devices have native bfloat16 support. As a result, AOT compiling for +multiple Intel GPU devices could result in the lower performance fallback +support even when running on a GPU that has native support. Therefore, the +recommendation is to use AOT only when all Intel GPU devices have the same +type of bfloat16 support (all native support or all fallback support). + +There is a similar limitation when AOT compiling for one Intel GPU device and +running on a different Intel GPU device. In this case, the compiler chooses +either fallback or native bfloat16 support according to the device(s) specified +on the command line. If the fallback library was chosen at AOT compilation +time, then the binary will run on all Intel GPU devices but you will not +get the performance benefit of native support even when running on a new +Intel GPU that has native support. If however, the native +bfloat16 library had been chosen at AOT compilation time then the binary +will run only on Intel GPU devices that have native bfloat16 support. +==== + + +== Overview + +This extension adds support for a 16-bit floating point type `bfloat16`. +This type occupies 16 bits of storage space as does the `sycl::half` type. +However, `bfloat16` allots 8 bits to the exponent instead of the 5 bits used by +`sycl::half` and 7 bits to the significand versus 10 bits used by `sycl::half`. +Thus, `bfloat16` has the same dynamic range as a 32-bit `float` but with +reduced precision. This type is useful when memory required to store the values +must be reduced, and when the calculations require high dynamic range but can +tolerate lower precision. Some implementations may still perform operations + on this type using 32-bit math. For example, they may convert the `bfloat16` + value to `float`, and then perform the operation on the 32-bit `float`. + +[NOTE] +The bfloat16 type is supported on all devices. DPC++ currently supports this +type natively on Intel Xe HP GPUs and Nvidia GPUs with +Compute Capability >= SM80. On other devices, and in host code, it is emulated +in software. + +== Specification + + +=== New `bfloat16` class + +The `bfloat16` type represents a 16-bit floating point value. +Conversions from `float` to `bfloat16` are done with round to +nearest even (RTE) rounding mode. + +The bfloat16 type and its operations are available in both device code and +host code. + +[source] +---- +namespace sycl { +namespace ext { +namespace oneapi { + +class bfloat16 { + +public: + bfloat16() = default; + bfloat16(const bfloat16 &) = default; + ~bfloat16() = default; + + // Convert from float to bfloat16 + bfloat16(const float &a); + bfloat16 &operator=(const float &a); + + // Convert bfloat16 to float + operator float() const; + + // Convert from sycl::half to bfloat16 + bfloat16(const sycl::half &a); + bfloat16 &operator=(const sycl::half &a); + + // Convert bfloat16 to sycl::half + operator sycl::half() const; + + // Convert bfloat16 to bool type + explicit operator bool(); + + friend bfloat16 operator-(bfloat16 &bf) { /* ... */ } + + // OP is: prefix ++, -- + friend bfloat16 &operatorOP(bfloat16 &bf) { /* ... */ } + + // OP is: postfix ++, -- + friend bfloat16 operatorOP(bfloat16 &bf, int) { /* ... */ } + + // OP is: +=, -=, *=, /= + friend bfloat16 &operatorOP(bfloat16 &lhs, const bfloat16 &rhs) { /* ... */ } + + // OP is +, -, *, / + friend bfloat16 operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) + { /* ... */ } + template + friend bfloat16 operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ } + template + friend bfloat16 operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ } + + // OP is ==,!=, <, >, <=, >= + friend bool operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) + { /* ... */ } + template + friend bool operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ } + template + friend bool operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ } +}; + +} // namespace oneapi +} // namespace ext +} // namespace sycl +---- + +Table 1. Member functions of `bfloat16` class. +|=== +| Member Function | Description + +| `bfloat16(const float& a);` +| Construct `bfloat16` from `float`. Converts `float` to `bfloat16`. + +| `bfloat16 &operator=(const float &a);` +| Replace the value with `a` converted to `bfloat16` + +| `operator float() const;` +| Return `bfloat16` value converted to `float`. + +| `bfloat16(const sycl::half& a);` +| Construct `bfloat16` from `sycl::half`. Converts `sycl::half` to `bfloat16`. + +| `bfloat16 &operator=(const sycl::half &a);` +| Replace the value with `a` converted to `bfloat16` + +| `operator sycl::half() const;` +| Return `bfloat16` value converted to `sycl::half`. + +| `explicit operator bool() { /* ... */ }` +| Convert `bfloat16` to `bool` type. Return `false` if the `value` equals to + zero, return `true` otherwise. + +| `friend bfloat16 operator-(bfloat16 &bf) { /* ... */ }` +| Construct new instance of `bfloat16` class with negated value of the `bf`. + +| `friend bfloat16 &operatorOP(bfloat16 &bf) { /* ... */ }` +| Perform an in-place `OP` prefix arithmetic operation on the `bf`, + assigning the result to the `bf` and return the `bf`. + + OP is: `++, --` + +| `friend bfloat16 operatorOP(bfloat16 &bf, int) { /* ... */ }` +| Perform an in-place `OP` postfix arithmetic operation on `bf`, assigning + the result to the `bf` and return a copy of `bf` before the operation is + performed. + + OP is: `++, --` + +| `friend bfloat16 operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) +{ /* ... */ }` +| Perform an in-place `OP` arithmetic operation between the `lhs` and the `rhs` + and return the `lhs`. + + OP is: `+=, -=, *=, /=` + +| `friend type operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) +{ /* ... */ }` +| Construct a new instance of the `bfloat16` class with the value of the new + `bfloat16` instance being the result of an OP arithmetic operation between + the `lhs` `bfloat16` and `rhs` `bfloat16` values. + + OP is `+, -, *, /` + +| `template + friend bfloat16 operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ }` +| Construct a new instance of the `bfloat16` class with the value of the new + `bfloat16` instance being the result of an OP arithmetic operation between + the `lhs` `bfloat16` value and `rhs` of template type `T`. Type `T` must be + convertible to `float`. + + OP is `+, -, *, /` + +| `template + friend bfloat16 operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ }` +| Construct a new instance of the `bfloat16` class with the value of the new + `bfloat16` instance being the result of an OP arithmetic operation between + the `lhs` of template type `T` and `rhs` `bfloat16` value. Type `T` must be + convertible to `float`. + + OP is `+, -, *, /` + +| `friend bool operatorOP(const bfloat16 &lhs, const bfloat16 &rhs) +{ /* ... */ }` +| Perform comparison operation OP between `lhs` `bfloat16` and `rhs` `bfloat16` + values and return the result as a boolean value. + +OP is `+==, !=, <, >, <=, >=+` + +| `template + friend bool operatorOP(const bfloat16 &lhs, const T &rhs) { /* ... */ }` +| Perform comparison operation OP between `lhs` `bfloat16` and `rhs` of + template type `T` and return the result as a boolean value. Type `T` must be + convertible to `float`. + +OP is `+==, !=, <, >, <=, >=+` + +| `template + friend bool operatorOP(const T &lhs, const bfloat16 &rhs) { /* ... */ }` +| Perform comparison operation OP between `lhs` of template type `T` and `rhs` + `bfloat16` value and return the result as a boolean value. Type `T` must be + convertible to `float`. + +OP is `+==, !=, <, >, <=, >=+` +|=== + +=== Example + +[source] +---- +#include + +using namespace sycl; +using sycl::ext::oneapi::bfloat16; + +float foo(float a, float b) { + // Convert from float to bfloat16. + bfloat16 A{a}; + bfloat16 B{b}; + + // Convert A and B from bfloat16 to float, do addition on floating-point + // numbers, then convert the result to bfloat16 and store it in C. + bfloat16 C = A + B; + + // Return the result converted from bfloat16 to float. + return C; +} + +int main(int argc, char *argv[]) { + float data[3] = {7.0, 8.1, 0.0}; + device dev{gpu_selector()}; + queue deviceQueue{dev}; + buffer buf{data, 3}; + + deviceQueue.submit([&](handler &cgh) { + accessor numbers{buf, cgh, read_write}; + cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); }); + }); + + host_accessor hostOutAcc{buf, read_only}; + std::cout << "Result = " << hostOutAcc[2] << std::endl; + + return 0; +} +---- + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-08-02|Alexey Sotkin |Initial public working draft +|2|2021-08-17|Alexey Sotkin |Add explicit conversion functions + + Add operator overloadings + + Apply code review suggestions +|3|2021-08-18|Alexey Sotkin |Remove `uint16_t` constructor +|4|2022-03-07|Aidan Belton and Jack Kirk |Switch from Intel vendor specific + to oneapi +|5|2022-04-05|Jack Kirk | Added section for bfloat16 math builtins +|6|2022-09-15|Rajiv Deodhar |Move bfloat16 from experimental to supported +and leave math functions as experimental +|======================================== diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 5c1aa007a315..4c840b74d919 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -291,8 +291,8 @@ typedef enum { PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, - // Return true if bfloat16 data type is supported by device - PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16 = 0x1FFFF, + // Return whether bfloat16 math functions are supported by device + PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS = 0x1FFFF, PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, diff --git a/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp b/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp index 95bdeb5e63c5..3883ee0382ba 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/bfloat16_type_traits.hpp @@ -13,7 +13,7 @@ #include #include -#include +#include /// @cond ESIMD_DETAIL @@ -21,7 +21,7 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext::intel::esimd::detail { -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; template <> struct element_type_traits { // TODO map the raw type to __bf16 once SPIRV target supports it: diff --git a/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp b/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp index 69d27ef2b8f5..736c2095123e 100644 --- a/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp +++ b/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -29,8 +29,7 @@ template constexpr dpas_argument_type dpas_precision_from_type() { return dpas_argument_type::tf32; else if constexpr (std::is_same_v) return dpas_argument_type::fp16; - else if constexpr (std::is_same_v) + else if constexpr (std::is_same_v) return dpas_argument_type::bf16; else if constexpr (std::is_same_v) return dpas_argument_type::u8; @@ -145,7 +144,7 @@ constexpr int verify_parameters_and_deduce_exec_size() { } } else if constexpr (APrecision == dpas_argument_type::bf16 || BPrecision == dpas_argument_type::bf16) { - using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; + using bfloat16 = sycl::ext::oneapi::bfloat16; if constexpr (ExecutionSize == 8) { static_assert(APrecision == BPrecision && __ESIMD_DNS::is_type() && diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16.hpp b/sycl/include/sycl/ext/oneapi/bfloat16.hpp similarity index 81% rename from sycl/include/sycl/ext/oneapi/experimental/bfloat16.hpp rename to sycl/include/sycl/ext/oneapi/bfloat16.hpp index 9a68606c9791..2643cf35daaf 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16.hpp +++ b/sycl/include/sycl/ext/oneapi/bfloat16.hpp @@ -15,11 +15,15 @@ #include #endif +extern "C" SYCL_EXTERNAL uint16_t +__devicelib_ConvertFToBF16INTEL(const float &) noexcept; +extern "C" SYCL_EXTERNAL float +__devicelib_ConvertBF16ToFINTEL(const uint16_t &) noexcept; + namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext { namespace oneapi { -namespace experimental { class bfloat16 { using storage_t = uint16_t; @@ -30,16 +34,31 @@ class bfloat16 { bfloat16(const bfloat16 &) = default; ~bfloat16() = default; +private: // Explicit conversion functions static storage_t from_float(const float &a) { #if defined(__SYCL_DEVICE_ONLY__) #if defined(__NVPTX__) +#if (__CUDA_ARCH__ >= 800) return __nvvm_f2bf16_rn(a); #else - return __spirv_ConvertFToBF16INTEL(a); + // TODO find a better way to check for NaN + if (a != a) + return 0xffc1; + union { + uint32_t intStorage; + float floatValue; + }; + floatValue = a; + // Do RNE and truncate + uint32_t roundingBias = ((intStorage >> 16) & 0x1) + 0x00007FFF; + return static_cast((intStorage + roundingBias) >> 16); +#endif +#else + return __devicelib_ConvertFToBF16INTEL(a); #endif #else - // In case of float value is nan - propagate bfloat16's qnan + // In case float value is nan - propagate bfloat16's qnan if (std::isnan(a)) return 0xffc1; union { @@ -52,20 +71,17 @@ class bfloat16 { return static_cast((intStorage + roundingBias) >> 16); #endif } + static float to_float(const storage_t &a) { -#if defined(__SYCL_DEVICE_ONLY__) -#if defined(__NVPTX__) - uint32_t y = a; - y = y << 16; - float *res = reinterpret_cast(&y); - return *res; +#if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__) + return __devicelib_ConvertBF16ToFINTEL(a); #else - return __spirv_ConvertBF16ToFINTEL(a); -#endif -#else - uint32_t bits = a; - bits <<= 16; - return sycl::bit_cast(bits); + union { + uint32_t intStorage; + float floatValue; + }; + intStorage = a << 16; + return floatValue; #endif } @@ -75,6 +91,7 @@ class bfloat16 { return res; } +public: // Implicit conversion from float to bfloat16 bfloat16(const float &a) { value = from_float(a); } @@ -83,12 +100,19 @@ class bfloat16 { return *this; } + // Implicit conversion from sycl::half to bfloat16 + bfloat16(const sycl::half &a) { value = from_float(a); } + + bfloat16 &operator=(const sycl::half &rhs) { + value = from_float(rhs); + return *this; + } + // Implicit conversion from bfloat16 to float operator float() const { return to_float(value); } - operator sycl::half() const { return to_float(value); } - // Get raw bits representation of bfloat16 - storage_t raw() const { return value; } + // Implicit conversion from bfloat16 to sycl::half + operator sycl::half() const { return to_float(value); } // Logical operators (!,||,&&) are covered if we can cast to bool explicit operator bool() { return to_float(value) != 0.0f; } @@ -97,14 +121,16 @@ class bfloat16 { friend bfloat16 operator-(bfloat16 &lhs) { #if defined(__SYCL_DEVICE_ONLY__) #if defined(__NVPTX__) +#if (__CUDA_ARCH__ >= 800) return from_bits(__nvvm_neg_bf16(lhs.value)); #else - return bfloat16{-__spirv_ConvertBF16ToFINTEL(lhs.value)}; + return -to_float(lhs.value); +#endif +#else + return bfloat16{-__devicelib_ConvertBF16ToFINTEL(lhs.value)}; #endif #else - (void)lhs; - throw exception{errc::feature_not_supported, - "Bfloat16 unary minus is not supported on host device"}; + return -to_float(lhs.value); #endif } @@ -177,7 +203,6 @@ class bfloat16 { // for floating-point types. }; -} // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp old mode 100644 new mode 100755 index c3c6fb0dd0ac..e676cf101329 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -15,7 +15,7 @@ #include #include -#include +#include // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ @@ -31,6 +31,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext { namespace oneapi { namespace experimental { + namespace detail { template uint32_t to_uint32_t(sycl::marray x, size_t start) { @@ -124,154 +125,6 @@ inline __SYCL_ALWAYS_INLINE } // namespace native -template -std::enable_if_t::value, T> fabs(T x) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fabs(x.raw())); -#else - std::ignore = x; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fabs(sycl::marray x) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fabs(detail::to_uint32_t(x, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = bfloat16::from_bits(__clc_fabs(x[N - 1].raw())); - } - return res; -#else - std::ignore = x; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t::value, T> fmin(T x, T y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fmin(x.raw(), y.raw())); -#else - std::ignore = x; - std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fmin(sycl::marray x, - sycl::marray y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fmin(detail::to_uint32_t(x, i * 2), - detail::to_uint32_t(y, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = - bfloat16::from_bits(__clc_fmin(x[N - 1].raw(), y[N - 1].raw())); - } - - return res; -#else - std::ignore = x; - std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t::value, T> fmax(T x, T y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fmax(x.raw(), y.raw())); -#else - std::ignore = x; - std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fmax(sycl::marray x, - sycl::marray y) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = __clc_fmax(detail::to_uint32_t(x, i * 2), - detail::to_uint32_t(y, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = - bfloat16::from_bits(__clc_fmax(x[N - 1].raw(), y[N - 1].raw())); - } - return res; -#else - std::ignore = x; - std::ignore = y; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -std::enable_if_t::value, T> fma(T x, T y, T z) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - return bfloat16::from_bits(__clc_fma(x.raw(), y.raw(), z.raw())); -#else - std::ignore = x; - std::ignore = y; - std::ignore = z; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - -template -sycl::marray fma(sycl::marray x, - sycl::marray y, - sycl::marray z) { -#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) - sycl::marray res; - - for (size_t i = 0; i < N / 2; i++) { - auto partial_res = - __clc_fma(detail::to_uint32_t(x, i * 2), detail::to_uint32_t(y, i * 2), - detail::to_uint32_t(z, i * 2)); - std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t)); - } - - if (N % 2) { - res[N - 1] = bfloat16::from_bits( - __clc_fma(x[N - 1].raw(), y[N - 1].raw(), z[N - 1].raw())); - } - return res; -#else - std::ignore = x; - std::ignore = y; - std::ignore = z; - throw runtime_error("bfloat16 is not currently supported on the host device.", - PI_ERROR_INVALID_DEVICE); -#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) -} - } // namespace experimental } // namespace oneapi } // namespace ext diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit-use.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit-use.hpp index 6cb38ee74fea..2a7e1ef3ad06 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit-use.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit-use.hpp @@ -324,8 +324,7 @@ class wi_element { // represent bf16 type. Since the AMX and DPAS implementations don't support // uint16_t, this interpretation is possible. This design choice was made before // the introduction of SYCL experimental bfloat16 type. Our plan is to move -// towards using the SYCL bfloat16. But since it is still experimental, we will -// probably keep both uint16 interpretation and SYCL bfloat16. +// towards using the SYCL bfloat16. template class wi_element { @@ -475,18 +474,18 @@ class wi_element { template -class wi_element { - joint_matrix &M; +class wi_element { + joint_matrix &M; std::size_t idx; public: - wi_element(joint_matrix &Mat, + wi_element(joint_matrix &Mat, std::size_t i) : M(Mat), idx(i) {} - operator sycl::ext::oneapi::experimental::bfloat16() { + operator sycl::ext::oneapi::bfloat16() { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_VectorExtractDynamic(M.spvm, idx); #else @@ -505,7 +504,7 @@ class wi_element &rhs) { + wi_element &operator=(const wi_element &rhs) { #ifdef __SYCL_DEVICE_ONLY__ M.spvm = __spirv_VectorInsertDynamic( M.spvm, __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx), idx); @@ -532,16 +530,14 @@ class wi_element &lhs, \ - const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ + const wi_element &lhs, \ + const sycl::ext::oneapi::bfloat16 &rhs) { \ return __spirv_VectorExtractDynamic(lhs.M.spvm, lhs.idx) op rhs; \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ + const sycl::ext::oneapi::bfloat16 &lhs, \ + const wi_element &rhs) { \ return __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx) op lhs; \ } - OP(sycl::ext::oneapi::experimental::bfloat16, +) - OP(sycl::ext::oneapi::experimental::bfloat16, -) - OP(sycl::ext::oneapi::experimental::bfloat16, *) - OP(sycl::ext::oneapi::experimental::bfloat16, /) + OP(sycl::ext::oneapi::bfloat16, +) + OP(sycl::ext::oneapi::bfloat16, -) + OP(sycl::ext::oneapi::bfloat16, *) + OP(sycl::ext::oneapi::bfloat16, /) #undef OP #define OP(type, op) \ friend type operator op( \ - const wi_element &lhs, \ - const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ + const wi_element &lhs, \ + const sycl::ext::oneapi::bfloat16 &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ lhs.M.spvm, lhs.idx)) op static_cast(rhs)}; \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ + const sycl::ext::oneapi::bfloat16 &lhs, \ + const wi_element &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ rhs.M.spvm, rhs.idx)) op static_cast(lhs)}; \ } @@ -597,23 +593,23 @@ class wi_element &, \ - const sycl::ext::oneapi::experimental::bfloat16 &) { \ + const wi_element &, \ + const sycl::ext::oneapi::bfloat16 &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_ERROR_INVALID_DEVICE); \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &, \ - const wi_element &) { \ + const sycl::ext::oneapi::bfloat16 &, \ + const wi_element &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_ERROR_INVALID_DEVICE); \ } - OP(sycl::ext::oneapi::experimental::bfloat16, +) - OP(sycl::ext::oneapi::experimental::bfloat16, -) - OP(sycl::ext::oneapi::experimental::bfloat16, *) - OP(sycl::ext::oneapi::experimental::bfloat16, /) + OP(sycl::ext::oneapi::bfloat16, +) + OP(sycl::ext::oneapi::bfloat16, -) + OP(sycl::ext::oneapi::bfloat16, *) + OP(sycl::ext::oneapi::bfloat16, /) OP(bool, ==) OP(bool, !=) OP(bool, <) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp index 30a3a54a7d3f..fc3dc4684f8c 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include namespace sycl { @@ -457,18 +457,16 @@ class wi_element { }; template -class wi_element { - joint_matrix &M; +class wi_element { + joint_matrix &M; std::size_t idx; public: - wi_element(joint_matrix &Mat, + wi_element(joint_matrix &Mat, std::size_t i) : M(Mat), idx(i) {} - operator sycl::ext::oneapi::experimental::bfloat16() { + operator sycl::ext::oneapi::bfloat16() { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_VectorExtractDynamic(M.spvm, idx); #else @@ -487,7 +485,7 @@ class wi_element &rhs) { + wi_element &operator=(const wi_element &rhs) { #ifdef __SYCL_DEVICE_ONLY__ M.spvm = __spirv_VectorInsertDynamic( M.spvm, __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx), idx); @@ -514,16 +511,14 @@ class wi_element &lhs, \ - const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ + const wi_element &lhs, \ + const sycl::ext::oneapi::bfloat16 &rhs) { \ return __spirv_VectorExtractDynamic(lhs.M.spvm, lhs.idx) op rhs; \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ + const sycl::ext::oneapi::bfloat16 &lhs, \ + const wi_element &rhs) { \ return __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx) op lhs; \ } - OP(sycl::ext::oneapi::experimental::bfloat16, +) - OP(sycl::ext::oneapi::experimental::bfloat16, -) - OP(sycl::ext::oneapi::experimental::bfloat16, *) - OP(sycl::ext::oneapi::experimental::bfloat16, /) + OP(sycl::ext::oneapi::bfloat16, +) + OP(sycl::ext::oneapi::bfloat16, -) + OP(sycl::ext::oneapi::bfloat16, *) + OP(sycl::ext::oneapi::bfloat16, /) #undef OP #define OP(type, op) \ friend type operator op( \ - const wi_element &lhs, \ - const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ + const wi_element &lhs, \ + const sycl::ext::oneapi::bfloat16 &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ lhs.M.spvm, lhs.idx)) op static_cast(rhs)}; \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ + const sycl::ext::oneapi::bfloat16 &lhs, \ + const wi_element &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ rhs.M.spvm, rhs.idx)) op static_cast(lhs)}; \ } @@ -578,24 +573,23 @@ class wi_element &, \ - const sycl::ext::oneapi::experimental::bfloat16 &) { \ + friend type operator op(const wi_element &, \ + const sycl::ext::oneapi::bfloat16 &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_ERROR_INVALID_DEVICE); \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &, \ - const wi_element &) { \ + const sycl::ext::oneapi::bfloat16 &, \ + const wi_element &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_ERROR_INVALID_DEVICE); \ } - OP(sycl::ext::oneapi::experimental::bfloat16, +) - OP(sycl::ext::oneapi::experimental::bfloat16, -) - OP(sycl::ext::oneapi::experimental::bfloat16, *) - OP(sycl::ext::oneapi::experimental::bfloat16, /) + OP(sycl::ext::oneapi::bfloat16, +) + OP(sycl::ext::oneapi::bfloat16, -) + OP(sycl::ext::oneapi::bfloat16, *) + OP(sycl::ext::oneapi::bfloat16, /) OP(bool, ==) OP(bool, !=) OP(bool, <) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp index b49021c76b1c..7127e42e8a84 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp @@ -8,7 +8,7 @@ #pragma once #include "sycl/detail/defines_elementary.hpp" -#include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -221,9 +221,8 @@ struct joint_matrix_load_impl< S, Use, NumRows, NumCols, Layout, sycl::sub_group> &res, multi_ptr src, size_t stride) { if constexpr (std::is_same, uint16_t>::value || - std::is_same< - std::remove_const_t, - sycl::ext::oneapi::experimental::bfloat16>::value) { + std::is_same, + sycl::ext::oneapi::bfloat16>::value) { auto tileptr = reinterpret_cast(src.get()); auto destptr = reinterpret_cast(&res.wi_marray); if constexpr (NumRows == 16 && NumCols == 16) { @@ -589,8 +588,8 @@ struct joint_matrix_mad_impl< get_layout_pair_id(), 0); } } else if constexpr (std::is_same::value || - std::is_same::value) { + std::is_same::value) { __mma_bf16_m16n16k16_mma_f32( reinterpret_cast(&D.wi_marray), reinterpret_cast(&A.wi_marray), @@ -626,8 +625,8 @@ struct joint_matrix_mad_impl< get_layout_pair_id(), 0); } } else if constexpr (std::is_same::value || - std::is_same::value) { + std::is_same::value) { __mma_bf16_m8n32k16_mma_f32( reinterpret_cast(&D.wi_marray), reinterpret_cast(&A.wi_marray), @@ -649,8 +648,8 @@ struct joint_matrix_mad_impl< get_layout_pair_id(), 0); } } else if constexpr (std::is_same::value || - std::is_same::value) { + std::is_same::value) { __mma_bf16_m32n8k16_mma_f32( reinterpret_cast(&D.wi_marray), reinterpret_cast(&A.wi_marray), diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index a4e86b9a82b4..4ffc81e3eb73 100644 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -49,7 +49,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { #define SYCL_EXT_ONEAPI_SUB_GROUP 1 #define SYCL_EXT_ONEAPI_PROPERTIES 1 #define SYCL_EXT_ONEAPI_NATIVE_MATH 1 -#define SYCL_EXT_ONEAPI_BFLOAT16 1 +#define SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS 1 #define SYCL_EXT_INTEL_DATAFLOW_PIPES 1 #ifdef __clang__ #if __has_extension(sycl_extended_atomics) diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index d7c6947c6f66..4eb93d73b6dc 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -31,7 +31,7 @@ __SYCL_ASPECT(ext_oneapi_native_assert, 31) __SYCL_ASPECT(host_debuggable, 32) __SYCL_ASPECT(ext_intel_gpu_hw_threads_per_eu, 33) __SYCL_ASPECT(ext_oneapi_cuda_async_barrier, 34) -__SYCL_ASPECT(ext_oneapi_bfloat16, 35) +__SYCL_ASPECT(ext_oneapi_bfloat16_math_functions, 35) __SYCL_ASPECT(ext_intel_free_memory, 36) __SYCL_ASPECT(ext_intel_device_id, 37) __SYCL_ASPECT(ext_intel_memory_clock_rate, 38) diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index 0f0e93cd00ea..d0bd71092b81 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -196,8 +196,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool, PI_DEVICE_INFO_IMAGE_SRGB) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool, PI_MEM_PROPERTIES_CHANNEL) -__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16, bool, - PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16) +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16_math_functions, bool, + PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS) //Deprecated oneapi/intel extension //TODO:Remove when possible diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index bfbed1351494..5a3e82cfacbf 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 65883399041e..e2e761f904b8 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1327,7 +1327,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: { + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { int major = 0; sycl::detail::pi::assertion( cuDeviceGetAttribute(&major, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index ac40e32707ed..dc46b138b175 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1855,7 +1855,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: return PI_ERROR_INVALID_VALUE; default: diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 9516a29ffa58..3aa1238eb963 100755 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2731,6 +2731,13 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, // Supports reading and writing of images. SupportedExtensions += ("cl_khr_3d_image_writes "); + // L0 does not tell us if bfloat16 is supported. + // For now, assume ATS and PVC support it. + // TODO: change the way we detect bfloat16 support. + if ((Device->ZeDeviceProperties->deviceId & 0xfff) == 0x201 || + (Device->ZeDeviceProperties->deviceId & 0xff0) == 0xbd0) + SupportedExtensions += ("cl_intel_bfloat16_conversions "); + return ReturnValue(SupportedExtensions.c_str()); } case PI_DEVICE_INFO_NAME: @@ -3205,8 +3212,10 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: // currently not supported in level zero runtime return PI_ERROR_INVALID_VALUE; - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: - return PI_ERROR_INVALID_VALUE; + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { + // bfloat16 math functions are not yet supported on Intel GPUs. + return ReturnValue(bool{false}); + } // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index f26bc5516c8c..979c39b48588 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -295,8 +295,12 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } - case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: - return PI_ERROR_INVALID_VALUE; + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { + // bfloat16 math functions are not yet supported on Intel GPUs. + cl_bool result = false; + std::memcpy(paramValue, &result, sizeof(cl_bool)); + return PI_SUCCESS; + } case PI_DEVICE_INFO_IMAGE_SRGB: { cl_bool result = true; std::memcpy(paramValue, &result, sizeof(cl_bool)); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index f4f92d037aee..b4c33e718153 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -277,8 +277,8 @@ bool device_impl::has(aspect Aspect) const { return has_extension("cl_khr_fp16"); case aspect::fp64: return has_extension("cl_khr_fp64"); - case aspect::ext_oneapi_bfloat16: - return get_info(); + case aspect::ext_oneapi_bfloat16_math_functions: + return get_info(); case aspect::int64_base_atomics: return has_extension("cl_khr_int64_base_atomics"); case aspect::int64_extended_atomics: diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index fcf505dd9e04..9e5306fd2710 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -279,15 +279,16 @@ struct get_device_info_impl, } }; -// Specialization for bf16 +// Specialization for bf16 math functions template <> -struct get_device_info_impl { +struct get_device_info_impl { static bool get(RT::PiDevice dev, const plugin &Plugin) { - bool result = false; RT::PiResult Err = Plugin.call_nocheck( - dev, PiInfoCode::value, + dev, + PiInfoCode::value, sizeof(result), &result, nullptr); if (Err != PI_SUCCESS) { return false; @@ -1002,7 +1003,8 @@ get_device_info_host() { } template <> -inline bool get_device_info_host() { +inline bool +get_device_info_host() { return false; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 45162fe108b0..77f72ff7a8fc 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -800,61 +800,74 @@ static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, return Prog != nullptr; } -static const char *getDeviceLibFilename(DeviceLibExt Extension) { - switch (Extension) { - case DeviceLibExt::cl_intel_devicelib_assert: - return "libsycl-fallback-cassert.spv"; - case DeviceLibExt::cl_intel_devicelib_math: - return "libsycl-fallback-cmath.spv"; - case DeviceLibExt::cl_intel_devicelib_math_fp64: - return "libsycl-fallback-cmath-fp64.spv"; - case DeviceLibExt::cl_intel_devicelib_complex: - return "libsycl-fallback-complex.spv"; - case DeviceLibExt::cl_intel_devicelib_complex_fp64: - return "libsycl-fallback-complex-fp64.spv"; - case DeviceLibExt::cl_intel_devicelib_cstring: - return "libsycl-fallback-cstring.spv"; - case DeviceLibExt::cl_intel_devicelib_imf: - return "libsycl-fallback-imf.spv"; - case DeviceLibExt::cl_intel_devicelib_imf_fp64: - return "libsycl-fallback-imf-fp64.spv"; - case DeviceLibExt::cl_intel_devicelib_imf_bf16: - return "libsycl-fallback-imf-bf16.spv"; - } - throw compile_program_error("Unhandled (new?) device library extension", - PI_ERROR_INVALID_OPERATION); +// For each extension, a pair of library names. The first uses native support, +// the second emulates functionality in software. +static const std::map> + DeviceLibNames = { + {DeviceLibExt::cl_intel_devicelib_assert, + {nullptr, "libsycl-fallback-cassert.spv"}}, + {DeviceLibExt::cl_intel_devicelib_math, + {nullptr, "libsycl-fallback-cmath.spv"}}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + {nullptr, "libsycl-fallback-cmath-fp64.spv"}}, + {DeviceLibExt::cl_intel_devicelib_complex, + {nullptr, "libsycl-fallback-complex.spv"}}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + {nullptr, "libsycl-fallback-complex-fp64.spv"}}, + {DeviceLibExt::cl_intel_devicelib_cstring, + {nullptr, "libsycl-fallback-cstring.spv"}}, + {DeviceLibExt::cl_intel_devicelib_imf, + {nullptr, "libsycl-fallback-imf.spv"}}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, + {nullptr, "libsycl-fallback-imf-fp64.spv"}}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, + {nullptr, "libsycl-fallback-imf-bf16.spv"}}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + {"libsycl-native-bfloat16.spv", "libsycl-fallback-bfloat16.spv"}}}; + +static const char *getDeviceLibFilename(DeviceLibExt Extension, bool Native) { + auto LibPair = DeviceLibNames.find(Extension); + const char *Lib = nullptr; + if (LibPair != DeviceLibNames.end()) + Lib = Native ? LibPair->second.first : LibPair->second.second; + if (Lib == nullptr) + throw compile_program_error("Unhandled (new?) device library extension", + PI_ERROR_INVALID_OPERATION); + return Lib; } +// For each extension understood by the SYCL runtime, the string representation +// of its name. Names with devicelib in them are internal to the runtime. Others +// are actual OpenCL extensions. +static const std::map DeviceLibExtensionStrs = { + {DeviceLibExt::cl_intel_devicelib_assert, "cl_intel_devicelib_assert"}, + {DeviceLibExt::cl_intel_devicelib_math, "cl_intel_devicelib_math"}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, + "cl_intel_devicelib_math_fp64"}, + {DeviceLibExt::cl_intel_devicelib_complex, "cl_intel_devicelib_complex"}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, + "cl_intel_devicelib_complex_fp64"}, + {DeviceLibExt::cl_intel_devicelib_cstring, "cl_intel_devicelib_cstring"}, + {DeviceLibExt::cl_intel_devicelib_imf, "cl_intel_devicelib_imf"}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, "cl_intel_devicelib_imf_fp64"}, + {DeviceLibExt::cl_intel_devicelib_imf_bf16, "cl_intel_devicelib_imf_bf16"}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, + "cl_intel_bfloat16_conversions"}}; + static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { - switch (Extension) { - case DeviceLibExt::cl_intel_devicelib_assert: - return "cl_intel_devicelib_assert"; - case DeviceLibExt::cl_intel_devicelib_math: - return "cl_intel_devicelib_math"; - case DeviceLibExt::cl_intel_devicelib_math_fp64: - return "cl_intel_devicelib_math_fp64"; - case DeviceLibExt::cl_intel_devicelib_complex: - return "cl_intel_devicelib_complex"; - case DeviceLibExt::cl_intel_devicelib_complex_fp64: - return "cl_intel_devicelib_complex_fp64"; - case DeviceLibExt::cl_intel_devicelib_cstring: - return "cl_intel_devicelib_cstring"; - case DeviceLibExt::cl_intel_devicelib_imf: - return "cl_intel_devicelib_imf"; - case DeviceLibExt::cl_intel_devicelib_imf_fp64: - return "cl_intel_devicelib_imf_fp64"; - case DeviceLibExt::cl_intel_devicelib_imf_bf16: - return "cl_intel_devicelib_imf_bf16"; - } - throw compile_program_error("Unhandled (new?) device library extension", - PI_ERROR_INVALID_OPERATION); + auto Ext = DeviceLibExtensionStrs.find(Extension); + if (Ext == DeviceLibExtensionStrs.end()) + throw compile_program_error("Unhandled (new?) device library extension", + PI_ERROR_INVALID_OPERATION); + return Ext->second; } static RT::PiProgram loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, - const RT::PiDevice &Device) { + const RT::PiDevice &Device, + bool UseNativeLib) { - const char *LibFileName = getDeviceLibFilename(Extension); + auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); auto LockedCache = Context->acquireCachedLibPrograms(); auto CachedLibPrograms = LockedCache.get(); @@ -1010,7 +1023,8 @@ getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, {DeviceLibExt::cl_intel_devicelib_cstring, false}, {DeviceLibExt::cl_intel_devicelib_imf, false}, {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}}; + {DeviceLibExt::cl_intel_devicelib_imf_bf16, false}, + {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. @@ -1038,18 +1052,25 @@ getDeviceLibPrograms(const ContextImplPtr Context, const RT::PiDevice &Device, continue; } - const char *ExtStr = getDeviceLibExtensionStr(Ext); + auto ExtName = getDeviceLibExtensionStr(Ext); bool InhibitNativeImpl = false; if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; + InhibitNativeImpl = strstr(Env, ExtName) != nullptr; } - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); - + bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtName); if (!DeviceSupports || InhibitNativeImpl) { - Programs.push_back(loadDeviceLibFallback(Context, Ext, Device)); + Programs.push_back( + loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/false)); FallbackIsLoaded = true; + } else { + // bfloat16 needs native library if device supports it + if (Ext == DeviceLibExt::cl_intel_devicelib_bfloat16) { + Programs.push_back( + loadDeviceLibFallback(Context, Ext, Device, /*UseNativeLib=*/true)); + FallbackIsLoaded = true; + } } } return Programs; @@ -1249,10 +1270,6 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { StrToKSIdMap &KSIdMap = m_KernelSets[M]; auto KSIdIt = KSIdMap.find(EntriesB->name); if (KSIdIt != KSIdMap.end()) { - for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE; - ++EntriesIt) - assert(KSIdMap[EntriesIt->name] == KSIdIt->second && - "Kernel sets are not disjoint"); auto &Imgs = m_DeviceImages[KSIdIt->second]; assert(Imgs && "Device image vector should have been already created"); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 522ee861f482..bfb39d0cec7e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -67,6 +67,7 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_imf, cl_intel_devicelib_imf_fp64, cl_intel_devicelib_imf_bf16, + cl_intel_devicelib_bfloat16, }; // Provides single loading and building OpenCL programs with unique contexts diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 1a3f21f63425..210de34595fb 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4215,7 +4215,6 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device18max_num_sub_groupsEEENS0_6detail _ZNK4sycl3_V16device8get_infoINS0_4info6device18max_parameter_sizeEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device18printf_buffer_sizeEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device19built_in_kernel_idsEEENS0_6detail19is_device_info_descIT_E11return_typeEv -_ZNK4sycl3_V16device8get_infoINS0_4info6device19ext_oneapi_bfloat16EEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device19host_unified_memoryEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device19is_linker_availableEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device19max_clock_frequencyEEENS0_6detail19is_device_info_descIT_E11return_typeEv @@ -4278,6 +4277,7 @@ _ZNK4sycl3_V16device8get_infoINS0_4info6device32atomic_memory_scope_capabilities _ZNK4sycl3_V16device8get_infoINS0_4info6device33ext_intel_gpu_subslices_per_sliceEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device33ext_oneapi_max_global_work_groupsEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device33usm_restricted_shared_allocationsEEENS0_6detail19is_device_info_descIT_E11return_typeEv +_ZNK4sycl3_V16device8get_infoINS0_4info6device34ext_oneapi_bfloat16_math_functionsEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device35ext_intel_gpu_eu_count_per_subsliceEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device38sub_group_independent_forward_progressEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_4info6device4nameEEENS0_6detail19is_device_info_descIT_E11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index b1b892c6e24a..86f4cf14c1df 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -58,7 +58,7 @@ ??$get_info@Uext_intel_max_mem_bandwidth@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_KXZ ??$get_info@Uext_intel_mem_channel@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ ??$get_info@Uext_intel_pci_address@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ -??$get_info@Uext_oneapi_bfloat16@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ +??$get_info@Uext_oneapi_bfloat16_math_functions@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ ??$get_info@Uext_oneapi_max_global_work_groups@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_KXZ ??$get_info@Uext_oneapi_max_work_groups_1d@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$id@$00@12@XZ ??$get_info@Uext_oneapi_max_work_groups_2d@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$id@$01@12@XZ diff --git a/sycl/test/check_device_code/matrix/matrix-nvptx-bfloat16-test.cpp b/sycl/test/check_device_code/matrix/matrix-nvptx-bfloat16-test.cpp index 448a0fa0a956..dc28943d9b10 100644 --- a/sycl/test/check_device_code/matrix/matrix-nvptx-bfloat16-test.cpp +++ b/sycl/test/check_device_code/matrix/matrix-nvptx-bfloat16-test.cpp @@ -7,7 +7,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; constexpr int stride = 16; diff --git a/sycl/test/esimd/dpas.cpp b/sycl/test/esimd/dpas.cpp index 207886aaa6ee..27307b0ddfce 100644 --- a/sycl/test/esimd/dpas.cpp +++ b/sycl/test/esimd/dpas.cpp @@ -11,7 +11,7 @@ using namespace sycl::ext::intel::esimd; namespace old = sycl::ext::intel::experimental::esimd; namespace xmx = sycl::ext::intel::esimd::xmx; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; using half = sycl::half; constexpr auto bf16 = xmx::dpas_argument_type::bf16; diff --git a/sycl/test/extensions/bfloat16.cpp b/sycl/test/extensions/bfloat16.cpp index 3666b3217711..31c5780e20fc 100644 --- a/sycl/test/extensions/bfloat16.cpp +++ b/sycl/test/extensions/bfloat16.cpp @@ -2,10 +2,10 @@ // UNSUPPORTED: cuda || hip_amd -#include +#include #include -using sycl::ext::oneapi::experimental::bfloat16; +using sycl::ext::oneapi::bfloat16; SYCL_EXTERNAL uint16_t some_bf16_intrinsic(uint16_t x, uint16_t y); SYCL_EXTERNAL void foo(long x, sycl::half y); @@ -13,39 +13,41 @@ SYCL_EXTERNAL void foo(long x, sycl::half y); __attribute__((noinline)) float op(float a, float b) { // CHECK: define {{.*}} spir_func float @_Z2opff(float [[a:%.*]], float [[b:%.*]]) bfloat16 A{a}; - // CHECK: [[A:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[a]]) + // CHECK: [[A:%.*]] = call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{.*}} [[a]].addr.ascast) // CHECK-NOT: fptoui bfloat16 B{b}; - // CHECK: [[B:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[b]]) + // CHECK: [[B:%.*]] = call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{.*}} [[b]].addr.ascast) // CHECK-NOT: fptoui bfloat16 C = A + B; - // CHECK: [[A_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[A]]) - // CHECK: [[B_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[B]]) + // CHECK: [[RTCASTI:%ref.tmp.ascast.i]] = addrspacecast float* [[RT:%ref.tmp.i]] to float addrspace(4)* + // CHECK: [[A_float:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{.*}} %1) + // CHECK: [[B_float:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{.*}} %4) // CHECK: [[Add:%.*]] = fadd float [[A_float]], [[B_float]] - // CHECK: [[C:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[Add]]) - // CHECK-NOT: uitofp - // CHECK-NOT: fptoui + // CHECK: store float [[Add]], float* [[RT]], align 4 + // CHECK: [[C:%.*]] = call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{.*}}) [[RTCASTI]]) - bfloat16 D = bfloat16::from_bits(some_bf16_intrinsic(A.raw(), C.raw())); - // CHECK: [[D:%.*]] = tail call spir_func zeroext i16 @_Z19some_bf16_intrinsictt(i16 zeroext [[A]], i16 zeroext [[C]]) // CHECK-NOT: uitofp // CHECK-NOT: fptoui long L = bfloat16(3.14f); - // CHECK: [[L_bfloat16:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float 0x40091EB860000000) - // CHECK: [[L_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[L_bfloat16]]) + // CHECK: [[L:%.*]] = call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{.*}} %ref.tmp1.ascast) + // CHECK: [[P8:%.*]] = addrspacecast i16* [[VI9:%.*]] to i16 addrspace(4)* + // CHECK: store i16 [[L]], i16* [[VI9]] + // CHECK: [[L_float:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{.*}} [[P8]]) // CHECK: [[L:%.*]] = fptosi float [[L_float]] to i{{32|64}} sycl::half H = bfloat16(2.71f); - // CHECK: [[H_bfloat16:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float 0x4005AE1480000000) - // CHECK: [[H_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[H_bfloat16]]) + // CHECK: [[H:%.*]] = call spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(float {{.*}} %ref.tmp3.ascast) + // CHECK: [[P11:%.*]] = addrspacecast i16* [[VI13:%.*]] to i16 addrspace(4)* + // CHECK: store i16 [[H]], i16* [[VI13]], align 2 + // CHECK: [[H_float:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{.*}} [[P11]]) // CHECK: [[H:%.*]] = fptrunc float [[H_float]] to half foo(L, H); - return D; - // CHECK: [[RetVal:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[D]]) + return A; + // CHECK: [[RetVal:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(i16 {{.*}} %2) // CHECK: ret float [[RetVal]] // CHECK-NOT: uitofp // CHECK-NOT: fptoui diff --git a/sycl/test/extensions/bfloat16_host.cpp b/sycl/test/extensions/bfloat16_host.cpp index acd02d2829b4..c18e1b2e2495 100644 --- a/sycl/test/extensions/bfloat16_host.cpp +++ b/sycl/test/extensions/bfloat16_host.cpp @@ -8,7 +8,7 @@ // RUN: %clangxx -fsycl %s -o %t.out // RUN: %t.out -#include +#include #include #include @@ -42,7 +42,8 @@ float bitsToFloatConv(std::string Bits) { } bool check_bf16_from_float(float Val, uint16_t Expected) { - uint16_t Result = sycl::ext::oneapi::experimental::bfloat16::from_float(Val); + sycl::ext::oneapi::bfloat16 B = Val; + uint16_t Result = *reinterpret_cast(&B); if (Result != Expected) { std::cout << "from_float check for Val = " << Val << " failed!\n" << "Expected " << Expected << " Got " << Result << "\n"; @@ -52,7 +53,7 @@ bool check_bf16_from_float(float Val, uint16_t Expected) { } bool check_bf16_to_float(uint16_t Val, float Expected) { - float Result = sycl::ext::oneapi::experimental::bfloat16::to_float(Val); + float Result = *reinterpret_cast(&Val); if (Result != Expected) { std::cout << "to_float check for Val = " << Val << " failed!\n" << "Expected " << Expected << " Got " << Result << "\n"; diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index b06b1829ac6c..a492048d5b4e 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -25,7 +25,8 @@ using device_has_all = aspect::ext_intel_device_info_uuid, aspect::ext_oneapi_srgb, aspect::ext_oneapi_native_assert, aspect::host_debuggable, aspect::ext_intel_gpu_hw_threads_per_eu, - aspect::ext_oneapi_cuda_async_barrier, aspect::ext_oneapi_bfloat16, + aspect::ext_oneapi_cuda_async_barrier, + aspect::ext_oneapi_bfloat16_math_functions, aspect::ext_intel_free_memory, aspect::ext_intel_device_id, aspect::ext_intel_memory_clock_rate, aspect::ext_intel_memory_bus_width>); @@ -119,7 +120,7 @@ int main() { singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); - singleAspectDeviceHasChecks(); + singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); singleAspectDeviceHasChecks(); @@ -175,7 +176,8 @@ int main() { aspect::ext_intel_gpu_hw_threads_per_eu); static_assert(device_has_all::value[32] == aspect::ext_oneapi_cuda_async_barrier); - static_assert(device_has_all::value[33] == aspect::ext_oneapi_bfloat16); + static_assert(device_has_all::value[33] == + aspect::ext_oneapi_bfloat16_math_functions); static_assert(device_has_all::value[34] == aspect::ext_intel_free_memory); static_assert(device_has_all::value[35] == aspect::ext_intel_device_id); static_assert(device_has_all::value[36] == diff --git a/sycl/test/extensions/properties/properties_kernel_device_has.cpp b/sycl/test/extensions/properties/properties_kernel_device_has.cpp index fa3c82ab68cd..08791aadd237 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has.cpp @@ -8,13 +8,14 @@ using namespace sycl; using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< - aspect::ext_oneapi_cuda_async_barrier, aspect::ext_oneapi_bfloat16, - aspect::custom, aspect::fp16, aspect::fp64, aspect::image, - aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, - aspect::usm_device_allocations, aspect::usm_restricted_shared_allocations, - aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::host, - aspect::cpu, aspect::gpu, aspect::accelerator, - aspect::ext_intel_gpu_eu_count, aspect::ext_intel_gpu_subslices_per_slice, + aspect::ext_oneapi_cuda_async_barrier, + aspect::ext_oneapi_bfloat16_math_functions, aspect::custom, aspect::fp16, + aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, + aspect::queue_profiling, aspect::usm_device_allocations, + aspect::usm_restricted_shared_allocations, aspect::usm_system_allocations, + aspect::ext_intel_pci_address, aspect::host, aspect::cpu, aspect::gpu, + aspect::accelerator, aspect::ext_intel_gpu_eu_count, + aspect::ext_intel_gpu_subslices_per_slice, aspect::ext_intel_gpu_eu_count_per_subslice, aspect::ext_intel_max_mem_bandwidth, aspect::ext_intel_mem_channel, aspect::usm_atomic_host_allocations, aspect::usm_atomic_shared_allocations, @@ -131,7 +132,7 @@ int main() { } // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_cuda_async_barrier", i32 [[ext_oneapi_cuda_async_barrier_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16", i32 [[ext_oneapi_bfloat16_ASPECT_MD:[0-9]+]]} +// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16_math_functions", i32 [[ext_oneapi_bfloat16_math_functions_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"custom", i32 [[custom_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp16", i32 [[fp16_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp64", i32 [[fp64_ASPECT_MD:[0-9]+]]} @@ -167,6 +168,6 @@ int main() { // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_free_memory", i32 [[ext_intel_free_memory_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_device_id", i32 [[ext_intel_device_id_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" -// CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" -// CHECK-IR-DAG: attributes #[[DHAttr3]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr3]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]]" diff --git a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp index e5dd265f558b..cd311a7b3d42 100644 --- a/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp +++ b/sycl/test/extensions/properties/properties_kernel_device_has_macro.cpp @@ -8,13 +8,14 @@ using namespace sycl; using namespace ext::oneapi::experimental; static constexpr auto device_has_all = device_has< - aspect::ext_oneapi_cuda_async_barrier, aspect::ext_oneapi_bfloat16, - aspect::custom, aspect::fp16, aspect::fp64, aspect::image, - aspect::online_compiler, aspect::online_linker, aspect::queue_profiling, - aspect::usm_device_allocations, aspect::usm_restricted_shared_allocations, - aspect::usm_system_allocations, aspect::ext_intel_pci_address, aspect::host, - aspect::cpu, aspect::gpu, aspect::accelerator, - aspect::ext_intel_gpu_eu_count, aspect::ext_intel_gpu_subslices_per_slice, + aspect::ext_oneapi_cuda_async_barrier, + aspect::ext_oneapi_bfloat16_math_functions, aspect::custom, aspect::fp16, + aspect::fp64, aspect::image, aspect::online_compiler, aspect::online_linker, + aspect::queue_profiling, aspect::usm_device_allocations, + aspect::usm_restricted_shared_allocations, aspect::usm_system_allocations, + aspect::ext_intel_pci_address, aspect::host, aspect::cpu, aspect::gpu, + aspect::accelerator, aspect::ext_intel_gpu_eu_count, + aspect::ext_intel_gpu_subslices_per_slice, aspect::ext_intel_gpu_eu_count_per_subslice, aspect::ext_intel_max_mem_bandwidth, aspect::ext_intel_mem_channel, aspect::usm_atomic_host_allocations, aspect::usm_atomic_shared_allocations, @@ -56,7 +57,7 @@ int main() { } // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_cuda_async_barrier", i32 [[ext_oneapi_cuda_async_barrier_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16", i32 [[ext_oneapi_bfloat16_ASPECT_MD:[0-9]+]]} +// CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_oneapi_bfloat16_math_functions", i32 [[ext_oneapi_bfloat16_math_functions_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"custom", i32 [[custom_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp16", i32 [[fp16_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"fp64", i32 [[fp64_ASPECT_MD:[0-9]+]]} @@ -94,6 +95,6 @@ int main() { // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_memory_clock_rate", i32 [[ext_intel_memory_clock_rate_ASPECT_MD:[0-9]+]]} // CHECK-IR-DAG: !{{[0-9]+}} = !{!"ext_intel_memory_bus_width", i32 [[ext_intel_memory_bus_width_ASPECT_MD:[0-9]+]]} -// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]],[[ext_intel_memory_clock_rate_ASPECT_MD]],[[ext_intel_memory_bus_width_ASPECT_MD]]" +// CHECK-IR-DAG: attributes #[[DHAttr1]] = { {{.*}}"sycl-device-has"="[[ext_oneapi_cuda_async_barrier_ASPECT_MD]],[[ext_oneapi_bfloat16_math_functions_ASPECT_MD]],[[custom_ASPECT_MD]],[[fp16_ASPECT_MD]],[[fp64_ASPECT_MD]],[[image_ASPECT_MD]],[[online_compiler_ASPECT_MD]],[[online_linker_ASPECT_MD]],[[queue_profiling_ASPECT_MD]],[[usm_device_allocations_ASPECT_MD]],[[usm_restricted_shared_allocations_ASPECT_MD]],[[usm_system_allocations_ASPECT_MD]],[[ext_intel_pci_address_ASPECT_MD]],[[host_ASPECT_MD]],[[cpu_ASPECT_MD]],[[gpu_ASPECT_MD]],[[accelerator_ASPECT_MD]],[[ext_intel_gpu_eu_count_ASPECT_MD]],[[ext_intel_gpu_subslices_per_slice_ASPECT_MD]],[[ext_intel_gpu_eu_count_per_subslice_ASPECT_MD]],[[ext_intel_max_mem_bandwidth_ASPECT_MD]],[[ext_intel_mem_channel_ASPECT_MD]],[[usm_atomic_host_allocations_ASPECT_MD]],[[usm_atomic_shared_allocations_ASPECT_MD]],[[atomic64_ASPECT_MD]],[[ext_intel_device_info_uuid_ASPECT_MD]],[[ext_oneapi_srgb_ASPECT_MD]],[[ext_intel_gpu_eu_simd_width_ASPECT_MD]],[[ext_intel_gpu_slices_ASPECT_MD]],[[ext_oneapi_native_assert_ASPECT_MD]],[[host_debuggable_ASPECT_MD]],[[ext_intel_gpu_hw_threads_per_eu_ASPECT_MD]],[[usm_host_allocations_ASPECT_MD]],[[usm_shared_allocations_ASPECT_MD]],[[ext_intel_free_memory_ASPECT_MD]],[[ext_intel_device_id_ASPECT_MD]],[[ext_intel_memory_clock_rate_ASPECT_MD]],[[ext_intel_memory_bus_width_ASPECT_MD]]" // CHECK-IR-DAG: attributes #[[DHAttr2]] = { {{.*}}"sycl-device-has" {{.*}} // CHECK-IR-DAG: attributes #[[DHAttr3]] = { {{.*}}"sycl-device-has"="[[fp16_ASPECT_MD]],[[atomic64_ASPECT_MD]]" diff --git a/sycl/test/matrix/matrix-bfloat16-test-use.cpp b/sycl/test/matrix/matrix-bfloat16-test-use.cpp index 89a295cb23b7..f133b5d5bd9c 100644 --- a/sycl/test/matrix/matrix-bfloat16-test-use.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test-use.cpp @@ -3,7 +3,7 @@ #include using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; static constexpr auto TILE_SZ = 16; static constexpr auto TM = TILE_SZ - 1; @@ -137,13 +137,13 @@ int main() { for (int j = 0; j < MATRIX_K; j++) { // Ee create bfloat16 from unsigned short since float-to-bfloat's // conversion is not allowed. - A[i][j] = bfloat16::from_bits(make_bf16(1.0f * (i + j))); + A[i][j] = make_bf16(1.0f * (i + j)); Aref[i][j] = make_bf16(1.0f * (i + j)); } } for (int i = 0; i < MATRIX_K / 2; i++) { for (int j = 0; j < MATRIX_N * 2; j++) { - B[i][j] = bfloat16::from_bits((make_bf16(2.0f * i + 3.0f * j))); + B[i][j] = make_bf16(2.0f * i + 3.0f * j); Bref[i][j] = make_bf16(2.0f * i + 3.0f * j); } } diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp index 8840a7e5ccc6..f4a7262b9fd8 100644 --- a/sycl/test/matrix/matrix-bfloat16-test.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -3,7 +3,7 @@ #include using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; +using bfloat16 = sycl::ext::oneapi::bfloat16; static constexpr auto TILE_SZ = 16; static constexpr auto TM = TILE_SZ - 1; @@ -139,13 +139,13 @@ int main() { for (int j = 0; j < MATRIX_K; j++) { // Ee create bfloat16 from unsigned short since float-to-bfloat's // conversion is not allowed. - A[i][j] = bfloat16::from_bits(make_bf16(1.0f * (i + j))); + A[i][j] = make_bf16(1.0f * (i + j)); Aref[i][j] = make_bf16(1.0f * (i + j)); } } for (int i = 0; i < MATRIX_K / 2; i++) { for (int j = 0; j < MATRIX_N * 2; j++) { - B[i][j] = bfloat16::from_bits((make_bf16(2.0f * i + 3.0f * j))); + B[i][j] = make_bf16(2.0f * i + 3.0f * j); Bref[i][j] = make_bf16(2.0f * i + 3.0f * j); } }