Skip to content

Commit 74faa3f

Browse files
[SYCL][FPGA] Add a wrapper header for __builtin_intel_fpga_mem (#2033)
This patch adds a new wrapper header for the Clang builtin `__builtin_intel_fpga_mem` The SPIRV spec for this featuer is here: https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/INTEL/SPV_INTEL_fpga_memory_accesses.asciidoc The Intel FPGA `lsu` class is implemented in `CL/sycl/intel/fpga_lsu.hpp` which is included in `CL/sycl/intel/fpga_extensions.hpp`. The class `cl::sycl::intel::lsu` allows users to explicitly request that the implementation of a global memory access is configured in a certain way. The class has two member functions, `load()` and `store()` which allow loading from and storing to a `global_ptr`, respectively, and is templated on 4 optional paremeters.
1 parent b2da2c8 commit 74faa3f

File tree

4 files changed

+227
-0
lines changed

4 files changed

+227
-0
lines changed

sycl/include/CL/sycl/intel/fpga_extensions.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88

99
#pragma once
1010
#include <CL/sycl/intel/fpga_device_selector.hpp>
11+
#include <CL/sycl/intel/fpga_lsu.hpp>
1112
#include <CL/sycl/intel/fpga_reg.hpp>
1213
#include <CL/sycl/intel/pipes.hpp>
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//==-------------- fpga_lsu.hpp --- SYCL FPGA Reg Extensions ---------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#pragma once
9+
10+
#include "fpga_utils.hpp"
11+
#include <CL/sycl/detail/defines.hpp>
12+
#include <CL/sycl/pointers.hpp>
13+
14+
__SYCL_INLINE_NAMESPACE(cl) {
15+
namespace sycl {
16+
namespace intel {
17+
constexpr uint8_t BURST_COALESCE = 0x1;
18+
constexpr uint8_t CACHE = 0x2;
19+
constexpr uint8_t STATICALLY_COALESCE = 0x4;
20+
constexpr uint8_t PREFETCH = 0x8;
21+
22+
template <int32_t N> struct burst_coalesce_impl {
23+
static constexpr int32_t value = N;
24+
static constexpr int32_t default_value = 0;
25+
};
26+
27+
template <int32_t N> struct cache {
28+
static constexpr int32_t value = N;
29+
static constexpr int32_t default_value = 0;
30+
};
31+
32+
template <int32_t N> struct prefetch_impl {
33+
static constexpr int32_t value = N;
34+
static constexpr int32_t default_value = 0;
35+
};
36+
37+
template <int32_t N> struct statically_coalesce_impl {
38+
static constexpr int32_t value = N;
39+
static constexpr int32_t default_value = 1;
40+
};
41+
42+
template <bool B> using burst_coalesce = burst_coalesce_impl<B>;
43+
template <bool B> using prefetch = prefetch_impl<B>;
44+
template <bool B> using statically_coalesce = statically_coalesce_impl<B>;
45+
46+
template <class... mem_access_params> class lsu final {
47+
public:
48+
lsu() = delete;
49+
50+
template <typename T> static T &load(sycl::global_ptr<T> Ptr) {
51+
check_load();
52+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
53+
return *__builtin_intel_fpga_mem((T *)Ptr,
54+
_burst_coalesce | _cache |
55+
_dont_statically_coalesce | _prefetch,
56+
_cache_val);
57+
#else
58+
return *Ptr;
59+
#endif
60+
}
61+
62+
template <typename T> static void store(sycl::global_ptr<T> Ptr, T Val) {
63+
check_store();
64+
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
65+
*__builtin_intel_fpga_mem((T *)Ptr,
66+
_burst_coalesce | _cache |
67+
_dont_statically_coalesce | _prefetch,
68+
_cache_val) = Val;
69+
#else
70+
*Ptr = Val;
71+
#endif
72+
}
73+
74+
private:
75+
static constexpr int32_t _burst_coalesce_val =
76+
GetValue<burst_coalesce_impl, mem_access_params...>::value;
77+
static constexpr uint8_t _burst_coalesce =
78+
_burst_coalesce_val == 1 ? BURST_COALESCE : 0;
79+
80+
static constexpr int32_t _cache_val =
81+
GetValue<cache, mem_access_params...>::value;
82+
static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0;
83+
84+
static constexpr int32_t _statically_coalesce_val =
85+
GetValue<statically_coalesce_impl, mem_access_params...>::value;
86+
static constexpr uint8_t _dont_statically_coalesce =
87+
_statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0;
88+
89+
static constexpr int32_t _prefetch_val =
90+
GetValue<prefetch_impl, mem_access_params...>::value;
91+
static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0;
92+
93+
static_assert(_cache_val >= 0, "cache size parameter must be non-negative");
94+
95+
static void check_load() {
96+
static_assert(_cache == 0 || _burst_coalesce == BURST_COALESCE,
97+
"unable to implement a cache without a burst coalescer");
98+
static_assert(_prefetch == 0 || _burst_coalesce == 0,
99+
"unable to implement a prefetcher and a burst coalescer "
100+
"simulataneously");
101+
static_assert(
102+
_prefetch == 0 || _cache == 0,
103+
"unable to implement a prefetcher and a cache simulataneously");
104+
}
105+
static void check_store() {
106+
static_assert(_cache == 0, "unable to implement a store LSU with a cache.");
107+
static_assert(_prefetch == 0,
108+
"unable to implement a store LSU with a prefetcher.");
109+
}
110+
};
111+
} // namespace intel
112+
} // namespace sycl
113+
} // __SYCL_INLINE_NAMESPACE(cl)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//==------------- fpga_utils.hpp --- SYCL FPGA Reg Extensions --------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/detail/defines.hpp>
12+
#include <CL/sycl/stl.hpp>
13+
14+
__SYCL_INLINE_NAMESPACE(cl) {
15+
namespace sycl {
16+
namespace intel {
17+
18+
template <template <int32_t> class Type, class T>
19+
struct MatchType : std::is_same<Type<T::value>, T> {};
20+
21+
template <template <int32_t> class Type, class... T> struct GetValue {
22+
static constexpr auto value = Type<0>::default_value;
23+
};
24+
25+
template <template <int32_t> class Type, class T1, class... T>
26+
struct GetValue<Type, T1, T...> {
27+
static constexpr auto value =
28+
std::conditional<MatchType<Type, T1>::value, T1,
29+
GetValue<Type, T...>>::type::value;
30+
};
31+
} // namespace intel
32+
} // namespace sycl
33+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/fpga_tests/fpga_lsu.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// RUN: %clangxx -fsycl %s -o %t.out
2+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
3+
//==----------------- fpga_lsu.cpp - SYCL FPGA LSU test --------------------==//
4+
//
5+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6+
// See https://llvm.org/LICENSE.txt for license information.
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
#include <CL/sycl.hpp>
11+
#include <CL/sycl/intel/fpga_extensions.hpp>
12+
13+
// TODO: run is disabled, since no support added in FPGA backend yet. Check
14+
// implementation correctness from CXX and SYCL languages perspective.
15+
16+
int test_lsu(cl::sycl::queue Queue) {
17+
int output_data[2];
18+
for (size_t i = 0; i < 2; i++) {
19+
output_data[i] = -1;
20+
}
21+
22+
int input_data[2];
23+
for (size_t i = 0; i < 2; i++) {
24+
input_data[i] = i + 1;
25+
}
26+
27+
{
28+
cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
29+
cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
30+
31+
Queue.submit([&](cl::sycl::handler &cgh) {
32+
auto output_accessor =
33+
output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
34+
auto input_accessor =
35+
input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
36+
37+
cgh.single_task<class kernel>([=] {
38+
auto input_ptr = input_accessor.get_pointer();
39+
auto output_ptr = output_accessor.get_pointer();
40+
41+
using PrefetchingLSU =
42+
cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
43+
cl::sycl::intel::statically_coalesce<false>>;
44+
45+
using BurstCoalescedLSU =
46+
cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
47+
cl::sycl::intel::statically_coalesce<false>>;
48+
49+
using CachingLSU =
50+
cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
51+
cl::sycl::intel::cache<1024>,
52+
cl::sycl::intel::statically_coalesce<false>>;
53+
54+
using PipelinedLSU = cl::sycl::intel::lsu<>;
55+
56+
int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
57+
int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
58+
59+
BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
60+
PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
61+
});
62+
});
63+
}
64+
65+
for (int i = 0; i < 2; i++) {
66+
if (output_data[i] != input_data[i]) {
67+
std::cout << "Unexpected read from output_data: " << output_data[i]
68+
<< ", v.s. expected " << input_data[i] << std::endl;
69+
70+
return -1;
71+
}
72+
}
73+
return 0;
74+
}
75+
76+
int main() {
77+
cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}};
78+
79+
return test_lsu(Queue);
80+
}

0 commit comments

Comments
 (0)