numerics/api/cg_8cpp_source.html

#include "linalg/solvers/cg.hpp"

#include "core/parallel/cuda_ops.hpp"

#include <cmath>

#include <stdexcept>


namespace num {


SolverResult cg(const Matrix& A,

                const Vector& b,

                Vector&       x,

                real          tol,

                idx           max_iter,

                Backend       backend) {

    idx n = b.size();

    if (A.rows() != n || A.cols() != n || x.size() != n)

        throw std::invalid_argument("Dimension mismatch in CG solver");


    // GPU path: transfer all data to device first

    if (backend == Backend::gpu) {

        const_cast<Matrix&>(A).to_gpu();

        const_cast<Vector&>(b).to_gpu();

        x.to_gpu();

    }


    Vector r(n), p(n), Ap(n);

    if (backend == Backend::gpu) {

        r.to_gpu();

        p.to_gpu();

        Ap.to_gpu();

    }


    matvec(A, x, r, backend);

    if (backend == Backend::gpu) {

        scale(r, -1.0, backend);

        axpy(1.0, b, r, backend);

        cuda::to_device(p.gpu_data(), r.gpu_data(), n);

    } else {

        for (idx i = 0; i < n; ++i)

            r[i] = b[i] - r[i];

        for (idx i = 0; i < n; ++i)

            p[i] = r[i];

    }


    real         rsold = dot(r, r, backend);

    SolverResult result{0, std::sqrt(rsold), false};


    for (idx iter = 0; iter < max_iter; ++iter) {

        result.iterations = iter + 1;

        matvec(A, p, Ap, backend);


        real pAp = dot(p, Ap, backend);

        if (std::abs(pAp) < 1e-15)

            break;

        real alpha = rsold / pAp;


        axpy(alpha, p, x, backend);

        axpy(-alpha, Ap, r, backend);


        real rsnew      = dot(r, r, backend);

        result.residual = std::sqrt(rsnew);


        if (result.residual < tol) {

            result.converged = true;

            break;

        }


        real beta = rsnew / rsold;

        scale(p, beta, backend);

        axpy(1.0, r, p, backend);

        rsold = rsnew;

    }


    if (backend == Backend::gpu)

        x.to_cpu();

    return result;

}


SolverResult cg_matfree(MatVecFn      matvec_fn,

                        const Vector& b,

                        Vector&       x,

                        real          tol,

                        idx           max_iter) {

    idx    n = b.size();

    Vector r(n), p(n), Ap(n);


    matvec_fn(x, r);

    for (idx i = 0; i < n; ++i)

        r[i] = b[i] - r[i];

    for (idx i = 0; i < n; ++i)

        p[i] = r[i];


    real         rsold = dot(r, r, Backend::seq);

    SolverResult result{0, std::sqrt(rsold), false};


    for (idx iter = 0; iter < max_iter; ++iter) {

        result.iterations = iter + 1;

        matvec_fn(p, Ap);


        real pAp = dot(p, Ap, Backend::seq);

        if (std::abs(pAp) < 1e-15)

            break;

        real alpha = rsold / pAp;


        axpy(alpha, p, x, Backend::seq);

        axpy(-alpha, Ap, r, Backend::seq);


        real rsnew      = dot(r, r, Backend::seq);

        result.residual = std::sqrt(rsnew);

        if (result.residual < tol) {

            result.converged = true;

            break;

        }


        real beta = rsnew / rsold;

        scale(p, beta, Backend::seq);

        axpy(1.0, r, p, Backend::seq);

        rsold = rsnew;

    }

    return result;

}


} // namespace num

cg.hpp
Conjugate gradient solvers (dense and matrix-free)

num::BasicVector< real >

num::BasicVector::to_gpu
void to_gpu()
Definition vector.hpp:99

num::BasicVector::size
constexpr idx size() const noexcept
Definition vector.hpp:80

num::BasicVector::to_cpu
void to_cpu()
Definition vector.hpp:108

num::Matrix
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12

num::Matrix::rows
constexpr idx rows() const noexcept
Definition matrix.hpp:24

num::Matrix::cols
constexpr idx cols() const noexcept
Definition matrix.hpp:25

cuda_ops.hpp
CUDA kernel wrappers.

num::cuda::to_device
void to_device(real *dst, const real *src, idx n)
Copy host to device.
Definition cuda_stubs.cpp:16

num
Definition quadrature.hpp:8

num::real
double real
Definition types.hpp:10

num::Backend
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19

num::Backend::gpu
@ gpu
CUDA – custom kernels or cuBLAS.

num::Backend::seq
@ seq
Naive textbook loops – always available.

num::beta
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:248

num::MatVecFn
std::function< void(const Vector &, Vector &)> MatVecFn
Callable type for matrix-free matvec: computes y = A*x.
Definition cg.hpp:13

num::idx
std::size_t idx
Definition types.hpp:11

num::matvec
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:120

num::scale
void scale(Vector &v, real alpha, Backend b=default_backend)
v *= alpha
Definition vector.cpp:29

num::dot
real dot(const Vector &x, const Vector &y, Backend b=default_backend)
dot product
Definition vector.cpp:79

num::e
constexpr real e
Definition math.hpp:43

num::axpy
void axpy(real alpha, const Vector &x, Vector &y, Backend b=default_backend)
y += alpha * x
Definition vector.cpp:58

num::cg_matfree
SolverResult cg_matfree(MatVecFn matvec, const Vector &b, Vector &x, real tol=1e-6, idx max_iter=1000)
Matrix-free conjugate gradient for Ax = b where A is SPD.
Definition cg.cpp:78

num::cg
SolverResult cg(const Matrix &A, const Vector &b, Vector &x, real tol=1e-10, idx max_iter=1000, Backend backend=default_backend)
Conjugate gradient solver for Ax = b.
Definition cg.cpp:8

num::SolverResult
Definition solver_result.hpp:8

num::SolverResult::iterations
idx iterations
Number of iterations performed.
Definition solver_result.hpp:9