|
numerics
|
CUDA kernel wrappers. More...
#include "core/types.hpp"Go to the source code of this file.
Namespaces | |
| namespace | num |
| namespace | num::cuda |
Functions | |
| real * | num::cuda::alloc (idx n) |
| Allocate device memory. | |
| void | num::cuda::free (real *ptr) |
| Free device memory. | |
| void | num::cuda::to_device (real *dst, const real *src, idx n) |
| Copy host to device. | |
| void | num::cuda::to_host (real *dst, const real *src, idx n) |
| Copy device to host. | |
| void | num::cuda::scale (real *v, idx n, real alpha) |
| v = alpha * v | |
| void | num::cuda::add (const real *x, const real *y, real *z, idx n) |
| z = x + y | |
| void | num::cuda::axpy (real alpha, const real *x, real *y, idx n) |
| y = alpha*x + y | |
| real | num::cuda::dot (const real *x, const real *y, idx n) |
| dot product | |
| void | num::cuda::matvec (const real *A, const real *x, real *y, idx rows, idx cols) |
| y = A * x (row-major A) | |
| void | num::cuda::matmul (const real *A, const real *B, real *C, idx m, idx k, idx n) |
| C = A * B. | |
| void | num::cuda::thomas_batched (const real *a, const real *b, const real *c, const real *d, real *x, idx n, idx batch_size) |
| Batched Thomas algorithm for tridiagonal systems. | |
CUDA kernel wrappers.
Definition in file cuda_ops.hpp.