numerics 0.1.0
Loading...
Searching...
No Matches
subspace.hpp
Go to the documentation of this file.
1/// @file kernel/subspace.hpp
2/// @brief Subspace construction and orthogonalization kernels.
3/// (namespace num::kernel::subspace)
4///
5/// These are the inner loops that any algorithm building or maintaining an
6/// orthonormal basis reduces to -- Krylov solvers (GMRES, MINRES), Lanczos,
7/// rational Krylov, exponential integrators (expv), randomized SVD, POD,
8/// and model-order reduction all share the same core operations.
9///
10/// Linear operator interface:
11/// LinearOp -- abstract base: apply(x, y), rows(), cols()
12/// DenseOp -- wraps Matrix (Backend-dispatched matvec)
13/// SparseOp -- wraps SparseMatrix
14/// CallableOp<F> -- wraps any callable void(const Vector&, Vector&)
15/// make_op(f, n) -- factory returning CallableOp<F> (stack, typed)
16/// make_op_ptr(f, n) -- factory returning unique_ptr<LinearOp> (heap, erased)
17///
18/// Orthogonalization kernels:
19/// mgs_orthogonalize -- Modified Gram-Schmidt: orthogonalize one vector
20/// against an existing basis, sequential and stable.
21/// arnoldi_step -- One Arnoldi step: apply operator + MGS + normalize.
22/// The inner loop that GMRES, Lanczos, and expv share.
23///
24/// Key design choice -- scratch buffer:
25/// arnoldi_step takes a pre-allocated scratch Vector for the operator apply.
26/// Callers allocate it once before the loop; the kernel reuses it at every
27/// step. This avoids one heap allocation per Arnoldi step.
28///
29/// Key design choice -- no OMP inside subspace kernels:
30/// mgs_orthogonalize and arnoldi_step call num::dot / num::axpy / num::norm
31/// with default_backend (which is BLAS > SIMD > blocked). The outer basis-
32/// expansion loop is inherently serial (each step depends on the previous).
33/// OMP at this level would only cause nested-parallelism issues.
34#pragma once
35
36#include "core/matrix.hpp"
37#include "core/policy.hpp"
38#include "core/types.hpp"
39#include "core/vector.hpp"
41#include <memory>
42#include <vector>
43
45
46// ===========================================================================
47// Linear operator interface
48// ===========================================================================
49
50/// @brief Abstract matrix-free linear operator: y = A*x.
51///
52/// Subclass this or use DenseOp / SparseOp / CallableOp / make_op.
53/// Virtual dispatch overhead is O(1) per apply(); work inside is O(n) or
54/// O(nnz), so the overhead is negligible.
55struct LinearOp {
56 virtual ~LinearOp() = default;
57
58 /// @brief y = A*x (y must be pre-allocated to the correct size)
59 virtual void apply(const Vector& x, Vector& y) const = 0;
60
61 [[nodiscard]] virtual idx rows() const noexcept = 0;
62 [[nodiscard]] virtual idx cols() const noexcept = 0;
63};
64
65/// @brief Wrap a dense Matrix as a LinearOp.
66///
67/// The matvec is dispatched to the chosen Backend (default: default_backend).
68struct DenseOp final : LinearOp {
69 explicit DenseOp(const Matrix& A, Backend b = default_backend)
70 : A_(A), b_(b) {}
71
72 void apply(const Vector& x, Vector& y) const override;
73 [[nodiscard]] idx rows() const noexcept override { return A_.rows(); }
74 [[nodiscard]] idx cols() const noexcept override { return A_.cols(); }
75
76private:
77 const Matrix& A_;
78 Backend b_;
79};
80
81/// @brief Wrap a SparseMatrix as a LinearOp.
82struct SparseOp final : LinearOp {
83 explicit SparseOp(const SparseMatrix& A) : A_(A) {}
84
85 void apply(const Vector& x, Vector& y) const override;
86 [[nodiscard]] idx rows() const noexcept override { return A_.n_rows(); }
87 [[nodiscard]] idx cols() const noexcept override { return A_.n_cols(); }
88
89private:
90 const SparseMatrix& A_;
91};
92
93/// @brief Wrap any callable void(const Vector&, Vector&) as a LinearOp.
94///
95/// Useful for structured operators -- stencils, FFT-based, implicit matrices --
96/// without forming an explicit matrix. The functor is stored by value.
97///
98/// apply() pre-allocates y to the correct size (n_) before calling f_ so that
99/// callers (e.g. arnoldi_step passing a pre-allocated scratch buffer) never
100/// trigger a heap allocation inside the hot Arnoldi loop. Lambdas passed to
101/// make_op / make_op_ptr can safely assume y.size() == n on entry.
102///
103/// @code
104/// auto op = num::kernel::subspace::make_op(
105/// [&](const Vector& x, Vector& y) { laplacian(x, y, N); }, N * N);
106/// @endcode
107template<typename F>
108struct CallableOp final : LinearOp {
109 CallableOp(F f, idx n) : f_(std::move(f)), n_(n) {}
110
111 void apply(const Vector& x, Vector& y) const override {
112 if (y.size() != n_) { y = Vector(n_); }
113 f_(x, y);
114 }
115 [[nodiscard]] idx rows() const noexcept override { return n_; }
116 [[nodiscard]] idx cols() const noexcept override { return n_; }
117
118private:
119 F f_;
120 idx n_;
121};
122
123/// @brief Factory: wrap a callable as a stack-allocated CallableOp<F>.
124///
125/// Use when the operator type is known at the call site (zero overhead;
126/// compiler can inline apply()).
127template<typename F>
128[[nodiscard]] CallableOp<F> make_op(F f, idx n) {
129 return CallableOp<F>(std::move(f), n);
130}
131
132/// @brief Factory: wrap a callable as a heap-allocated LinearOp.
133///
134/// Use when you need type-erased storage (unique_ptr<LinearOp>).
135template<typename F>
136[[nodiscard]] std::unique_ptr<LinearOp> make_op_ptr(F f, idx n) {
137 return std::make_unique<CallableOp<F>>(std::move(f), n);
138}
139
140// ===========================================================================
141// Orthogonalization kernels
142// ===========================================================================
143
144/// @brief Modified Gram-Schmidt: orthogonalize v against basis[0..k-1].
145///
146/// For each i in [0, k), computes h[i] = <v, basis[i]> then subtracts
147/// h[i] * basis[i] from v. Updates v in-place.
148///
149/// h must be pre-allocated with size >= k.
150///
151/// Returns ||v||₂ after all projections are removed (= the next subdiagonal
152/// element in the Hessenberg matrix when used inside arnoldi_step).
153///
154/// MGS performs the projections sequentially; each one modifies v before the
155/// next dot product, giving better numerical stability than classical GS at
156/// the cost of k sequential passes over v.
157///
158/// Uses num::dot and num::axpy with default_backend (BLAS > SIMD > seq).
159[[nodiscard]] real mgs_orthogonalize(const std::vector<Vector>& basis,
160 Vector& v,
161 std::vector<real>& h,
162 idx k);
163
164/// @brief Modified Gram-Schmidt: orthogonalize v against columns 0..k-1 of a
165/// column-stored basis matrix.
166///
167/// Overload for algorithms (e.g. Lanczos) that store their Krylov basis
168/// column-by-column in a dense Matrix rather than a std::vector<Vector>.
169/// The Matrix is stored row-major, so column l is accessed with stride
170/// basis.cols() between successive elements.
171///
172/// When NUMERICS_HAS_BLAS is defined, uses cblas_ddot / cblas_daxpy with
173/// the correct stride so the BLAS handles the strided access natively.
174/// Otherwise falls back to a plain loop via basis(i, l).
175///
176/// @param basis Row-major Matrix of shape (n, max_steps); columns are the
177/// orthonormal basis vectors.
178/// @param k Number of columns to project out (columns 0..k-1).
179/// @param v Vector to orthogonalize in-place (length n).
180/// @return ||v||₂ after all projections are removed.
181[[nodiscard]] real mgs_orthogonalize(const Matrix& basis, idx k, Vector& v);
182
183/// @brief One Arnoldi step: expand the orthonormal basis by one vector.
184///
185/// Algorithm:
186/// 1. w = A * basis[k] (operator apply via scratch buffer)
187/// 2. h[0..k] = mgs_orthogonalize(basis[0..k], w)
188/// 3. h[k+1] = ||w|| (subdiagonal Hessenberg element)
189/// 4. if h[k+1] > breakdown_tol:
190/// w /= h[k+1]; basis.push_back(w) (new orthonormal basis vector)
191///
192/// @param A Linear operator
193/// @param basis Current orthonormal basis (basis[0..k] present on entry;
194/// basis[k+1] appended on exit if no breakdown)
195/// @param h Hessenberg column k; must be pre-allocated with size >= k+2
196/// @param k Current step index (0-based)
197/// @param scratch Pre-allocated Vector of size n; reused across steps to
198/// avoid per-step heap allocation
199/// @param breakdown_tol Lucky breakdown threshold (default 1e-14)
200///
201/// @return h[k+1]: the subdiagonal element. Near-zero signals lucky breakdown
202/// (A-invariant subspace found; solution is exact).
203[[nodiscard]] real arnoldi_step(const LinearOp& A,
204 std::vector<Vector>& basis,
205 std::vector<real>& h,
206 idx k,
207 Vector& scratch,
208 real breakdown_tol = real(1e-14));
209
210} // namespace num::kernel::subspace
constexpr idx size() const noexcept
Definition vector.hpp:80
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12
Sparse matrix in Compressed Sparse Row (CSR) format.
Definition sparse.hpp:15
Backend enum for linear algebra operations.
Core type definitions.
Matrix operations.
CallableOp< F > make_op(F f, idx n)
Factory: wrap a callable as a stack-allocated CallableOp<F>.
Definition subspace.hpp:128
real arnoldi_step(const LinearOp &A, std::vector< Vector > &basis, std::vector< real > &h, idx k, Vector &scratch, real breakdown_tol=real(1e-14))
One Arnoldi step: expand the orthonormal basis by one vector.
Definition subspace.cpp:73
std::unique_ptr< LinearOp > make_op_ptr(F f, idx n)
Factory: wrap a callable as a heap-allocated LinearOp.
Definition subspace.hpp:136
real mgs_orthogonalize(const std::vector< Vector > &basis, Vector &v, std::vector< real > &h, idx k)
Modified Gram-Schmidt: orthogonalize v against basis[0..k-1].
Definition subspace.cpp:32
double real
Definition types.hpp:10
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
std::size_t idx
Definition types.hpp:11
constexpr real e
Definition math.hpp:43
constexpr Backend default_backend
Definition policy.hpp:92
Compressed Sparse Row (CSR) matrix and operations.
Wrap any callable void(const Vector&, Vector&) as a LinearOp.
Definition subspace.hpp:108
void apply(const Vector &x, Vector &y) const override
y = A*x (y must be pre-allocated to the correct size)
Definition subspace.hpp:111
idx rows() const noexcept override
Definition subspace.hpp:115
idx cols() const noexcept override
Definition subspace.hpp:116
Wrap a dense Matrix as a LinearOp.
Definition subspace.hpp:68
DenseOp(const Matrix &A, Backend b=default_backend)
Definition subspace.hpp:69
idx rows() const noexcept override
Definition subspace.hpp:73
idx cols() const noexcept override
Definition subspace.hpp:74
Abstract matrix-free linear operator: y = A*x.
Definition subspace.hpp:55
virtual void apply(const Vector &x, Vector &y) const =0
y = A*x (y must be pre-allocated to the correct size)
virtual idx cols() const noexcept=0
virtual idx rows() const noexcept=0
Wrap a SparseMatrix as a LinearOp.
Definition subspace.hpp:82
idx rows() const noexcept override
Definition subspace.hpp:86
SparseOp(const SparseMatrix &A)
Definition subspace.hpp:83
idx cols() const noexcept override
Definition subspace.hpp:87
Vector operations.