Functions
real *	alloc (idx n)
	Allocate device memory.

void	free (real *ptr)
	Free device memory.

void	to_device (real dst, const real src, idx n)
	Copy host to device.

void	to_host (real dst, const real src, idx n)
	Copy device to host.

void	scale (real *v, idx n, real alpha)
	v = alpha * v

void	add (const real x, const real y, real *z, idx n)
	z = x + y

void	axpy (real alpha, const real x, real y, idx n)
	y = alpha*x + y

real	dot (const real x, const real y, idx n)
	dot product

void	matvec (const real A, const real x, real *y, idx rows, idx cols)
	y = A * x (row-major A)

void	matmul (const real A, const real B, real *C, idx m, idx k, idx n)
	C = A * B.

void	thomas_batched (const real a, const real b, const real c, const real d, real *x, idx n, idx batch_size)
	Batched Thomas algorithm for tridiagonal systems.

Function Documentation

◆ add()

void num::cuda::add	(	const real *	x,
		const real *	y,
		real *	z,
		idx	n
	)

z = x + y

Definition at line 25 of file cuda_stubs.cpp.

Referenced by num::add().

◆ alloc()

real * num::cuda::alloc ( idx n )

Allocate device memory.

Definition at line 10 of file cuda_stubs.cpp.

Referenced by num::BasicMatrix< T >::to_gpu(), num::BasicVector< T >::to_gpu(), and num::BandedMatrix::to_gpu().

◆ axpy()

void num::cuda::axpy	(	real	alpha,
		const real *	x,
		real *	y,
		idx	n
	)

y = alpha*x + y

Definition at line 28 of file cuda_stubs.cpp.

Referenced by num::backends::gpu::axpy().

◆ dot()

real num::cuda::dot	(	const real *	x,
		const real *	y,
		idx	n
	)

dot product

Definition at line 31 of file cuda_stubs.cpp.

Referenced by num::backends::gpu::dot(), and num::backends::gpu::norm().

◆ free()

void num::cuda::free ( real * ptr )

Free device memory.

Definition at line 13 of file cuda_stubs.cpp.

Referenced by num::BandedMatrix::operator=(), num::BasicMatrix< T >::operator=(), num::BasicVector< T >::operator=(), num::BandedMatrix::operator=(), num::BasicMatrix< T >::to_cpu(), num::BasicVector< T >::to_cpu(), num::BandedMatrix::~BandedMatrix(), num::BasicMatrix< T >::~BasicMatrix(), and num::BasicVector< T >::~BasicVector().

◆ matmul()

void num::cuda::matmul	(	const real *	A,
		const real *	B,
		real *	C,
		idx	m,
		idx	k,
		idx	n
	)

C = A * B.

Definition at line 37 of file cuda_stubs.cpp.

Referenced by num::backends::gpu::matmul().

◆ matvec()

void num::cuda::matvec	(	const real *	A,
		const real *	x,
		real *	y,
		idx	rows,
		idx	cols
	)

y = A * x (row-major A)

Definition at line 34 of file cuda_stubs.cpp.

Referenced by num::backends::gpu::matvec().

◆ scale()

void num::cuda::scale	(	real *	v,
		idx	n,
		real	alpha
	)

v = alpha * v

Definition at line 22 of file cuda_stubs.cpp.

Referenced by num::backends::gpu::scale().

◆ thomas_batched()

void num::cuda::thomas_batched	(	const real *	a,
		const real *	b,
		const real *	c,
		const real *	d,
		real *	x,
		idx	n,
		idx	batch_size
	)

Batched Thomas algorithm for tridiagonal systems.

Parameters

a	Lower diagonals (batch_size arrays of size n-1, packed consecutively)
b	Main diagonals (batch_size arrays of size n)
c	Upper diagonals (batch_size arrays of size n-1, packed consecutively)
d	Right-hand sides (batch_size arrays of size n)
x	Solution vectors (batch_size arrays of size n)
n	Size of each system
batch_size	Number of independent systems to solve

Definition at line 40 of file cuda_stubs.cpp.

Referenced by num::thomas().

◆ to_device()

void num::cuda::to_device	(	real *	dst,
		const real *	src,
		idx	n
	)

Copy host to device.

Definition at line 16 of file cuda_stubs.cpp.

Referenced by num::cg(), num::BasicMatrix< T >::to_gpu(), num::BasicVector< T >::to_gpu(), and num::BandedMatrix::to_gpu().

◆ to_host()

void num::cuda::to_host	(	real *	dst,
		const real *	src,
		idx	n
	)

Copy device to host.

Definition at line 19 of file cuda_stubs.cpp.

Referenced by num::BasicMatrix< T >::to_cpu(), num::BasicVector< T >::to_cpu(), and num::BandedMatrix::to_cpu().

Functions