nngn
Loading...
Searching...
No Matches
compute.h
Go to the documentation of this file.
1
84#ifndef NNGN_COMPUTE_COMPUTE_H
85#define NNGN_COMPUTE_COMPUTE_H
86
87#include <array>
88#include <cassert>
89#include <cstring>
90#include <memory>
91#include <numeric>
92#include <ranges>
93#include <span>
94#include <string>
95#include <string_view>
96#include <vector>
97
98#include "utils/concepts.h"
99#include "utils/def.h"
100#include "utils/ranges.h"
101#include "utils/utils.h"
102
103namespace nngn {
104
108struct Compute {
110 enum class Backend : u8 {
119 };
120 enum class DeviceType : u8 {
121 CPU = 1u << 0, GPU = 1u << 1
122 };
123 struct Version { u32 major = {}, minor = {}; };
133 enum class Type : u8 {
135 NONE,
137 LOCAL,
144 DATA,
148 N,
149 };
151 enum MemFlag : u8 {
152 READ_WRITE = 1u << 0, WRITE_ONLY = 1u << 1, READ_ONLY = 1u << 2,
153 };
155 enum ExecFlag : u8 {
162 BLOCKING = 1u << 0,
163 };
169 enum ProfInfo : u8 {
170 QUEUED = 1u << 0, SUBMIT = 1u << 1, START = 1u << 2, END = 1u << 3,
172 };
174 struct Handle {
175 u32 id = {};
176 constexpr explicit operator bool() const { return this->id; }
177 };
178 struct Buffer : Handle { static constexpr auto type = Type::BUFFER; };
179 struct Image : Handle { static constexpr auto type = Type::IMAGE; };
180 struct Sampler : Handle { static constexpr auto type = Type::SAMPLER; };
181 struct Program : Handle {};
182 struct Kernel : Handle {};
188 struct Event;
190 struct Events {
192 std::size_t n_wait = {};
194 const Event *const *wait_list = {};
201 Event *const *events = {};
202 };
210 struct DataArg {
211 std::size_t s = {};
212 const std::byte *p = {};
213 template<typename T>
215 explicit constexpr DataArg(const T *t) : s(sizeof(T)), p(as_bytes(t)) {}
216 constexpr auto begin() const { return this->p; }
217 constexpr auto end() const { return this->p + this->s; }
218 };
220 template<typename T> static constexpr Type arg_type = Type::NONE;
222 static constexpr bool is_vector_type(Type t);
223 static constexpr bool is_handle_type(Type t);
229 template<Type t> static constexpr Type to_vector_type();
235 static std::unique_ptr<Compute> create(
236 Backend b, const void *params = nullptr);
239 virtual bool init() = 0;
240 virtual size_t n_platforms() const = 0;
241 virtual size_t n_devices() const = 0;
243 virtual void get_limits(u64 *p) const = 0;
246 virtual std::string platform_name() const = 0;
247 virtual std::string device_name() const = 0;
253 MemFlag flags, std::size_t n, const std::byte *p) = 0;
254 virtual bool read_buffer(
255 Buffer b, std::size_t off, std::size_t n, std::byte *p,
256 Events events) const = 0;
258 virtual bool fill_buffer(
259 Buffer b, std::size_t off, std::size_t n, std::byte v,
260 Events events) const = 0;
265 virtual bool fill_buffer(
266 Buffer b, std::size_t off, std::size_t n,
267 std::size_t pattern_size, const std::byte *p,
268 Events events) const = 0;
269 virtual bool write_buffer(
270 Buffer b, std::size_t off, std::size_t n, const std::byte *p,
271 Events events) const = 0;
276 virtual bool write_buffer_rect(
277 Buffer b,
278 std::array<std::size_t, 3> buffer_origin,
279 std::array<std::size_t, 3> host_origin,
280 std::array<std::size_t, 3> region,
281 std::size_t buffer_row_pitch, std::size_t buffer_slice_pitch,
282 std::size_t host_row_pitch, std::size_t host_slice_pitch,
283 const std::byte *p, Events events) const = 0;
284 virtual void *map_buffer(
285 Buffer b, MemFlag flags, std::size_t off, std::size_t n,
286 Events events) const = 0;
287 virtual bool unmap_buffer(Buffer b, void *p, Events events) const = 0;
293 template<typename ...Ts>
294 bool write_struct(Buffer b, Events events, Ts &&...ts) const;
295 virtual bool release_buffer(Buffer b) = 0;
302 Type type, std::size_t w, std::size_t h, MemFlag flags,
303 const std::byte *p) = 0;
304 virtual bool read_image(
305 Image i, std::size_t w, std::size_t h, std::byte *p, Events events)
306 const = 0;
308 virtual bool fill_image(
309 Image i, std::size_t w, std::size_t h, const void *v,
310 Events events) const = 0;
311 virtual bool release_image(Image i) = 0;
312 virtual Sampler create_sampler() = 0;
313 virtual bool release_sampler(Sampler s) = 0;
315 virtual Program create_program(std::string_view src, const char *opts) = 0;
316 virtual bool release_program(Program p) = 0;
318 Program program, const char *func,
319 std::size_t len, const Type *types,
320 const std::size_t *sizes, const std::byte *const *data,
321 Events events) = 0;
322 virtual bool release_kernel(Kernel k) = 0;
327 virtual std::size_t n_events(std::size_t n, const Type *types) const = 0;
332 virtual bool prof_info(
333 ProfInfo info, std::size_t n, const Event *const *events, u64 *out)
334 const = 0;
336 virtual bool wait(std::size_t n, const Event *const *v) const = 0;
337 virtual bool release_events(std::size_t n, const Event *const *v) const = 0;
338 virtual bool execute(
339 Kernel kernel, ExecFlag flags,
340 u32 n_dim, const std::size_t *global_size,
341 const std::size_t *local_size, Events events) const = 0;
354 virtual bool execute(
355 Program program, const std::string &func, ExecFlag flags,
356 u32 n_dim, const std::size_t *global_size,
357 const std::size_t *local_size, std::size_t len, const Type *types,
358 const std::size_t *sizes, const std::byte *const *data,
359 Events events) const = 0;
361 template<typename ...Ts>
363 Program program, const char *func, Events events, Ts &&...ts);
376 template<typename ...Ts>
377 bool execute(
378 Program program, const std::string &func, ExecFlag flags,
379 u32 n_dim, const std::size_t *global_size,
380 const std::size_t *local_size, Events events, Ts &&...ts);
381};
382
383template<Compute::Backend>
384std::unique_ptr<Compute> compute_create_backend(const void *params);
385
386template<typename T>
387static constexpr Compute::Type arg_type = Compute::Type::NONE;
388template<>
389constexpr auto Compute::arg_type<std::byte> = Compute::Type::BYTE;
390template<>
391constexpr auto Compute::arg_type<i32> = Compute::Type::INT;
392template<>
393constexpr auto Compute::arg_type<u32> = Compute::Type::UINT;
394template<>
395constexpr auto Compute::arg_type<float> = Compute::Type::FLOAT;
396
397template<std::derived_from<Compute::Handle> T>
398constexpr auto Compute::arg_type<T> = T::type;
399
400template<std::ranges::range T>
401constexpr auto Compute::arg_type<T> =
402 Compute::to_vector_type<Compute::arg_type<std::ranges::range_value_t<T>>>();
403
404inline constexpr bool Compute::is_vector_type(Type t)
405 { return Type::VECTOR_BEGIN <= t && t <= Type::VECTOR_END; }
406inline constexpr bool Compute::is_handle_type(Type t)
407 { return Type::HANDLE_BEGIN <= t && t <= Type::HANDLE_END; }
408
409namespace detail {
410
411inline auto arg_size(const std::byte&) { return sizeof(std::byte); }
412inline auto arg_ptr(const std::byte &b) { return as_bytes(&b); }
413
414inline auto arg_size(const Compute::Handle &t) { return sizeof(t.id); }
415inline auto arg_ptr(const Compute::Handle &t) { return as_bytes(&t.id); }
416
417template<arithmetic T> auto arg_size(const T&) { return sizeof(T); }
418auto arg_ptr(const arithmetic auto &t) { return as_bytes(&t); }
419
420auto arg_size(const std::ranges::sized_range auto &r) {
421 // TODO https://bugs.llvm.org/show_bug.cgi?id=39663
422 // return std::span{r}.size_bytes();
423 auto s = std::span{r};
424 return s.size_bytes();
425}
426auto arg_ptr(const std::ranges::range auto &r)
427 { return std::as_bytes(std::span{r}).data(); }
428
429}
430
431template<Compute::Type t>
432inline constexpr auto Compute::to_vector_type() -> Type {
433 using T = std::underlying_type_t<Compute::Type>;
434 if constexpr(Type::SCALAR_BEGIN <= t && t < Type::VECTOR_BEGIN)
435 return static_cast<Compute::Type>(
436 static_cast<T>(Compute::Type::VECTOR_BEGIN)
437 + static_cast<T>(t)
438 - static_cast<T>(Compute::Type::BYTE));
439 else
441}
442
443inline std::vector<u64> Compute::get_limits() const {
444 std::vector<u64> ret(Limit::N);
445 this->get_limits(ret.data());
446 return ret;
447}
448
449template<typename ...Ts>
450bool Compute::write_struct(Buffer b, Events events, Ts &&...ts) const {
451 const std::array sizes = {detail::arg_size(ts)...};
452 std::vector<std::byte> data(std::reduce(cbegin(sizes), cend(sizes)));
453 assert(!data.empty());
454 auto copy = [p = data.data(), s = sizes.data()](const auto &x) mutable
455 { std::memcpy(p, detail::arg_ptr(x), *s); p += *s++; };
456 (..., copy(ts));
457 return this->write_buffer(b, 0, data.size(), data.data(), events);
458}
459
460template<typename ...Ts>
462 Program program, const char *func, Events events, Ts &&...ts
463) -> Kernel {
464 constexpr auto n = sizeof...(Ts);
465 std::array<Type, n> types = {};
466 std::array<std::size_t, n> sizes = {};
467 std::array<const std::byte*, n> data = {};
468 auto f =
469 [tp = types.data(), sp = sizes.data(), dp = data.data()]
470 <typename T>(const T &t) mutable
471 {
472 *tp++ = Compute::arg_type<T>;
473 *sp++ = detail::arg_size(t);
474 *dp++ = as_bytes(detail::arg_ptr(t));
475 };
476 (..., f(ts));
477 return this->create_kernel(
478 program, func, n, types.data(), sizes.data(), data.data(), events);
479}
480
481template<typename ...Ts>
483 Program program, const std::string &func, ExecFlag flags,
484 u32 n_dim, const std::size_t *global_size,
485 const std::size_t *local_size, Events events, Ts &&...ts) {
486 constexpr auto n = sizeof...(Ts);
487 std::array<Type, n> types = {};
488 std::array<std::size_t, n> sizes = {};
489 std::array<const std::byte*, n> data = {};
490 auto f =
491 [tp = types.data(), sp = sizes.data(), dp = data.data()]
492 <typename T>(const T &t) mutable
493 {
494 *tp++ = Compute::arg_type<T>;
495 *sp++ = detail::arg_size(t);
496 *dp++ = as_bytes(detail::arg_ptr(t));
497 };
498 (..., f(ts));
499 return this->execute(
500 program, func, flags, n_dim, global_size, local_size,
501 n, types.data(), sizes.data(), data.data(), events);
502}
503
504}
505
506#endif
local class const
Definition animation.lua:7
Definition fundamental.h:28
Definition concepts.h:17
Definition vec.h:60
assert
Definition debug.lua:3
local_size
Definition common.lua:28
for i
Definition font.lua:5
local n
Definition dump_lights.lua:5
#define T(f0, f1, f2)
local r
Definition gamma.lua:7
local data
Definition house0.lua:10
read_image
Definition img_common.lua:36
read_buffer
Definition img_common.lua:35
create_buffer
Definition img_common.lua:37
create_image
Definition img_common.lua:38
local function bool(title, init, text)
auto arg_ptr(const std::byte &b)
Definition compute.h:412
auto arg_size(const std::byte &)
Definition compute.h:411
Definition audio.cpp:7
std::uint32_t u32
Definition def.h:14
std::uint8_t u8
Definition def.h:12
std::uint64_t u64
Definition def.h:15
std::int32_t i32
Definition def.h:10
std::unique_ptr< Compute > compute_create_backend(const void *params)
auto as_bytes(const void *p)
Definition utils.h:158
Definition debug.h:13
func
Definition plot.lua:8
v[1]
Definition math.lua:22
local function f()) end
#define NNGN_VIRTUAL(x)
Definition utils.h:27
local w
Definition strict.lua:12
Definition compute.h:178
static constexpr auto type
Definition compute.h:178
Argument type for raw memory passed to the execution kernel "by value".
Definition compute.h:210
constexpr DataArg(const T *t)
Convenience constructor to pass a single object as an argument.
Definition compute.h:215
constexpr auto begin() const
Definition compute.h:216
constexpr auto end() const
Definition compute.h:217
const std::byte * p
Definition compute.h:212
std::size_t s
Definition compute.h:211
Controls dependencies between operations.
Definition compute.h:190
std::size_t n_wait
Number of elements in wait_list.
Definition compute.h:192
Event *const * events
Buffer where the operation's resulting events will be placed.
Definition compute.h:201
const Event *const * wait_list
Events that must precede the execution of the operation.
Definition compute.h:194
Base class for handles to opaque compute objects.
Definition compute.h:174
u32 id
Definition compute.h:175
Definition compute.h:179
static constexpr auto type
Definition compute.h:179
Definition compute.h:182
Definition compute.h:124
Version version
Desired OpenCL version.
Definition compute.h:126
DeviceType preferred_device
Prefer this device type on initialization.
Definition compute.h:130
bool debug
Enables debugging, also required for profiling information.
Definition compute.h:128
Definition compute.h:181
Definition compute.h:180
static constexpr auto type
Definition compute.h:180
Definition compute.h:123
u32 major
Definition compute.h:123
u32 minor
Definition compute.h:123
Base class for computation back ends, which may be hardware-accelerated.
Definition compute.h:108
virtual std::size_t n_events(std::size_t n, const Type *types) const =0
Number of events generated by execute for arguments types.
virtual bool prof_info(ProfInfo info, std::size_t n, const Event *const *events, u64 *out) const =0
Collect profiling information from n events.
virtual size_t n_devices() const =0
virtual Kernel create_kernel(Program program, const char *func, std::size_t len, const Type *types, const std::size_t *sizes, const std::byte *const *data, Events events)=0
virtual bool release_kernel(Kernel k)=0
virtual bool release_buffer(Buffer b)=0
static constexpr bool is_vector_type(Type t)
Determines is t is one of the *V vector values in Type.
Definition compute.h:404
DeviceType
Definition compute.h:120
static constexpr bool is_handle_type(Type t)
Definition compute.h:406
virtual bool execute(Kernel kernel, ExecFlag flags, u32 n_dim, const std::size_t *global_size, const std::size_t *local_size, Events events) const =0
static std::unique_ptr< Compute > create(Backend b, const void *params=nullptr)
Creates a back end of the specified type.
Definition compute.cpp:7
bool write_struct(Buffer b, Events events, Ts &&...ts) const
Writes the variadic arguments sequentially to the buffer.
Definition compute.h:450
virtual bool write_buffer_rect(Buffer b, std::array< std::size_t, 3 > buffer_origin, std::array< std::size_t, 3 > host_origin, std::array< std::size_t, 3 > region, std::size_t buffer_row_pitch, std::size_t buffer_slice_pitch, std::size_t host_row_pitch, std::size_t host_slice_pitch, const std::byte *p, Events events) const =0
Writes a rectangular region of a buffer.
virtual bool release_image(Image i)=0
virtual std::string platform_name() const =0
virtual bool write_buffer(Buffer b, std::size_t off, std::size_t n, const std::byte *p, Events events) const =0
virtual Sampler create_sampler()=0
ProfInfo
Bit mask indicating which profiling data to collect/return.
Definition compute.h:169
@ QUEUED
Definition compute.h:170
@ PROF_INFO_MAX
Definition compute.h:171
@ SUBMIT
Definition compute.h:170
@ PROF_INFO_ALL
Definition compute.h:171
@ END
Definition compute.h:170
@ START
Definition compute.h:170
Limit
Indices for the values accessible via get_limits.
Definition compute.h:165
@ COMPUTE_UNITS
Definition compute.h:166
@ N
Definition compute.h:166
@ WORK_GROUP_SIZE
Definition compute.h:166
@ LOCAL_MEMORY
Definition compute.h:166
virtual bool fill_buffer(Buffer b, std::size_t off, std::size_t n, std::byte v, Events events) const =0
Fills the buffer with n copies of v.
virtual void * map_buffer(Buffer b, MemFlag flags, std::size_t off, std::size_t n, Events events) const =0
virtual bool release_events(std::size_t n, const Event *const *v) const =0
virtual bool fill_image(Image i, std::size_t w, std::size_t h, const void *v, Events events) const =0
Fills the image with copies of v.
virtual bool release_program(Program p)=0
static constexpr Type to_vector_type()
Transforms a scalar type into the equivalent vector type.
virtual Program create_program(std::string_view src, const char *opts)=0
Compiles source into a program using compilation options opts.
ExecFlag
Kernel execution flags.
Definition compute.h:155
@ BLOCKING
Block the execution until the operation finishes.
Definition compute.h:162
MemFlag
Properties of memory blocks.
Definition compute.h:151
@ READ_WRITE
Definition compute.h:152
@ READ_ONLY
Definition compute.h:152
@ WRITE_ONLY
Definition compute.h:152
virtual bool wait(std::size_t n, const Event *const *v) const =0
Block until the given events have completed.
static constexpr Type arg_type
Maps supported types to the equivalent Type value.
Definition compute.h:220
Type
Supported parameter types for kernel execution.
Definition compute.h:133
@ LOCAL
Device-local memory.
@ NONE
Invalid value.
@ DATA
Pointer to raw memory.
Backend
Indicates which type of back end to create.
Definition compute.h:110
@ PSEUDOCOMP
No-op back end which can be used in lieu of a real one.
@ OPENCL_BACKEND
OpenCL 1.2 back end.
virtual size_t n_platforms() const =0
virtual std::string device_name() const =0
virtual bool init()=0
Must be called before the back end can be used.
virtual bool unmap_buffer(Buffer b, void *p, Events events) const =0
std::vector< u64 > get_limits() const
Convenience overload that allocates the required memory.
Definition compute.h:443
virtual bool release_sampler(Sampler s)=0
Definition utils.h:91
Definition types.h:32
std::chrono::seconds s
Definition timing.cpp:6