numerics
Loading...
Searching...
No Matches
cuda_stubs.cpp File Reference
#include "core/parallel/cuda_ops.hpp"
#include <stdexcept>

Go to the source code of this file.

Namespaces

namespace  num
 
namespace  num::cuda
 

Functions

realnum::cuda::alloc (idx n)
 Allocate device memory.
 
void num::cuda::free (real *ptr)
 Free device memory.
 
void num::cuda::to_device (real *dst, const real *src, idx n)
 Copy host to device.
 
void num::cuda::to_host (real *dst, const real *src, idx n)
 Copy device to host.
 
void num::cuda::scale (real *v, idx n, real alpha)
 v = alpha * v
 
void num::cuda::add (const real *x, const real *y, real *z, idx n)
 z = x + y
 
void num::cuda::axpy (real alpha, const real *x, real *y, idx n)
 y = alpha*x + y
 
real num::cuda::dot (const real *x, const real *y, idx n)
 dot product
 
void num::cuda::matvec (const real *A, const real *x, real *y, idx rows, idx cols)
 y = A * x (row-major A)
 
void num::cuda::matmul (const real *A, const real *B, real *C, idx m, idx k, idx n)
 C = A * B.
 
void num::cuda::thomas_batched (const real *a, const real *b, const real *c, const real *d, real *x, idx n, idx batch_size)
 Batched Thomas algorithm for tridiagonal systems.