numerics 0.1.0
Loading...
Searching...
No Matches
cuda_stubs.cpp
Go to the documentation of this file.
2#include <stdexcept>
3
4namespace num::cuda {
5
6[[noreturn]] static void no_cuda() {
7 throw std::runtime_error("CUDA not available");
8}
9
11 no_cuda();
12}
13void free(real*) {
14 no_cuda();
15}
16void to_device(real*, const real*, idx) {
17 no_cuda();
18}
19void to_host(real*, const real*, idx) {
20 no_cuda();
21}
22void scale(real*, idx, real) {
23 no_cuda();
24}
25void add(const real*, const real*, real*, idx) {
26 no_cuda();
27}
28void axpy(real, const real*, real*, idx) {
29 no_cuda();
30}
31real dot(const real*, const real*, idx) {
32 no_cuda();
33}
34void matvec(const real*, const real*, real*, idx, idx) {
35 no_cuda();
36}
37void matmul(const real*, const real*, real*, idx, idx, idx) {
38 no_cuda();
39}
40void thomas_batched(const real*,
41 const real*,
42 const real*,
43 const real*,
44 real*,
45 idx,
46 idx) {
47 no_cuda();
48}
49
50} // namespace num::cuda
CUDA kernel wrappers.
void to_device(real *dst, const real *src, idx n)
Copy host to device.
void scale(real *v, idx n, real alpha)
v = alpha * v
void thomas_batched(const real *a, const real *b, const real *c, const real *d, real *x, idx n, idx batch_size)
Batched Thomas algorithm for tridiagonal systems.
void matmul(const real *A, const real *B, real *C, idx m, idx k, idx n)
C = A * B.
void free(real *ptr)
Free device memory.
real * alloc(idx n)
Allocate device memory.
void to_host(real *dst, const real *src, idx n)
Copy device to host.
void add(const real *x, const real *y, real *z, idx n)
z = x + y
void axpy(real alpha, const real *x, real *y, idx n)
y = alpha*x + y
real dot(const real *x, const real *y, idx n)
dot product
void matvec(const real *A, const real *x, real *y, idx rows, idx cols)
y = A * x (row-major A)
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11