numerics 0.1.0
Loading...
Searching...
No Matches
banded.hpp
Go to the documentation of this file.
1/// @file banded.hpp
2/// @brief High-performance banded matrix solver for HPC applications
3///
4/// Implements LAPACK-style banded storage and LU factorization with partial
5/// pivoting. Optimized for vectorization and cache efficiency on modern
6/// supercomputers (e.g., NCAR Derecho).
7#pragma once
8
9#include "core/policy.hpp"
10#include "core/types.hpp"
11#include "core/vector.hpp"
12#include <memory>
13
14namespace num {
15
16/// @brief Banded matrix with efficient storage for direct solvers
17///
18/// Uses LAPACK-style band storage format optimized for LU factorization.
19/// For a matrix with kl lower diagonals and ku upper diagonals, storage
20/// layout is:
21///
22/// - Rows 0 to kl-1: Extra space for fill-in during LU factorization
23/// - Rows kl to kl+ku: Upper diagonals (ku at top, main diagonal at kl+ku)
24/// - Rows kl+ku+1 to 2*kl+ku: Lower diagonals
25///
26/// Element A(i,j) is stored at band(kl + ku + i - j, j) for max(0,j-ku) <= i <=
27/// min(n-1,j+kl)
28///
29/// This format enables:
30/// - Column-major access patterns for LU factorization
31/// - SIMD-friendly memory layout
32/// - Direct compatibility with LAPACK routines if needed
34public:
35 /// @brief Construct a banded matrix
36 /// @param n Matrix dimension (n x n)
37 /// @param kl Number of lower diagonals
38 /// @param ku Number of upper diagonals
40
41 /// @brief Construct with initial value
42 BandedMatrix(idx n, idx kl, idx ku, real val);
43
45
46 // Rule of Five
48 BandedMatrix(BandedMatrix &&) noexcept;
51
52 /// @brief Matrix dimension
53 idx size() const { return n_; }
54 idx rows() const { return n_; }
55 idx cols() const { return n_; }
56
57 /// @brief Number of lower diagonals
58 idx kl() const { return kl_; }
59
60 /// @brief Number of upper diagonals
61 idx ku() const { return ku_; }
62
63 /// @brief Total bandwidth (kl + ku + 1)
64 idx bandwidth() const { return kl_ + ku_ + 1; }
65
66 /// @brief Leading dimension of band storage (2*kl + ku + 1)
67 idx ldab() const { return ldab_; }
68
69 /// @brief Access element at (row, col) in original matrix coordinates
70 /// @param i Row index (0-based)
71 /// @param j Column index (0-based)
72 /// @return Reference to element (undefined if outside band)
73 real &operator()(idx i, idx j);
74 real operator()(idx i, idx j) const;
75
76 /// @brief Direct access to band storage
77 /// @param band_row Row in band storage (0 to ldab-1)
78 /// @param col Column index (0 to n-1)
79 real &band(idx band_row, idx col);
80 real band(idx band_row, idx col) const;
81
82 /// @brief Raw pointer to band storage (column-major)
83 real *data() { return data_.get(); }
84 const real *data() const { return data_.get(); }
85
86 /// @brief Check if (i,j) is within the band
87 bool in_band(idx i, idx j) const;
88
89 // GPU support
90 void to_gpu();
91 void to_cpu();
92 real *gpu_data() { return d_data_; }
93 const real *gpu_data() const { return d_data_; }
94 bool on_gpu() const { return d_data_ != nullptr; }
95
96private:
97 idx n_ = 0; ///< Matrix dimension
98 idx kl_ = 0; ///< Number of lower diagonals
99 idx ku_ = 0; ///< Number of upper diagonals
100 idx ldab_ = 0; ///< Leading dimension (2*kl + ku + 1)
101 std::unique_ptr<real[]> data_; ///< Band storage (ldab * n elements)
102 real *d_data_ = nullptr; ///< GPU data pointer
103};
104
105/// @brief Result from banded solver
107 bool success = false; ///< True if solve succeeded
108 idx pivot_row = 0; ///< Row of zero pivot if singular (0 if success)
110 0.0; ///< Reciprocal condition number estimate (0 if not computed)
111};
112
113/// @brief LU factorization of banded matrix with partial pivoting
114///
115/// Computes A = P * L * U where:
116/// - P is a permutation matrix
117/// - L is lower triangular with unit diagonal
118/// - U is upper triangular
119///
120/// The factorization is performed in-place. After factorization:
121/// - U is stored in rows 0 to kl+ku (including fill-in)
122/// - L multipliers are stored in rows kl+ku+1 to 2*kl+ku
123/// - ipiv contains the pivot indices
124///
125/// @param A Banded matrix (modified in place with LU factors)
126/// @param ipiv Pivot indices (size n, allocated by caller)
127/// @return Result indicating success or location of zero pivot
128///
129/// Complexity: O(n * kl * (kl + ku)) operations
131
132/// @brief Solve banded system using precomputed LU factorization
133///
134/// Solves A*x = b where A has been factored by banded_lu().
135/// The solution overwrites the right-hand side vector.
136///
137/// @param A LU-factored banded matrix from banded_lu()
138/// @param ipiv Pivot indices from banded_lu()
139/// @param b Right-hand side (overwritten with solution)
140///
141/// Complexity: O(n * (kl + ku)) operations
142void banded_lu_solve(const BandedMatrix &A, const idx *ipiv, Vector &b);
143
144/// @brief Solve multiple right-hand sides using LU factorization
145///
146/// Solves A*X = B where B has nrhs columns stored contiguously.
147///
148/// @param A LU-factored banded matrix
149/// @param ipiv Pivot indices
150/// @param B Right-hand sides (n x nrhs, column-major, overwritten with
151/// solutions)
152/// @param nrhs Number of right-hand sides
153void banded_lu_solve_multi(const BandedMatrix &A, const idx *ipiv, real *B,
154 idx nrhs);
155
156/// @brief Solve banded system Ax = b (convenience function)
157///
158/// Performs LU factorization and solve in one call. For solving multiple
159/// systems with the same matrix, use banded_lu() and banded_lu_solve()
160/// separately to avoid redundant factorization.
161///
162/// @param A Banded matrix (NOT modified - internal copy made)
163/// @param b Right-hand side
164/// @param x Solution vector (output)
165/// @return Result indicating success or failure
167 Vector &x);
168
169/// @brief Banded matrix-vector product y = A*x
170///
171/// Optimized for cache efficiency with loop ordering that accesses
172/// the band storage in column-major order.
173///
174/// @param A Banded matrix
175/// @param x Input vector
176/// @param y Output vector (overwritten)
177/// @param backend Execution backend (Backend::gpu uses CUDA path when
178/// available)
179void banded_matvec(const BandedMatrix &A, const Vector &x, Vector &y,
180 Backend backend = default_backend);
181
182/// @brief Banded matrix-vector product y = alpha*A*x + beta*y
183///
184/// Generalized matrix-vector multiply with scaling factors.
185///
186/// @param alpha Scalar multiplier for A*x
187/// @param A Banded matrix
188/// @param x Input vector
189/// @param beta Scalar multiplier for y
190/// @param y Input/output vector
191/// @param backend Execution backend (Backend::gpu uses CUDA path when
192/// available)
193void banded_gemv(real alpha, const BandedMatrix &A, const Vector &x, real beta,
194 Vector &y, Backend backend = default_backend);
195
196/// @brief Estimate reciprocal condition number of banded matrix
197///
198/// Uses 1-norm condition number estimation after LU factorization.
199///
200/// @param A LU-factored banded matrix
201/// @param ipiv Pivot indices
202/// @param anorm 1-norm of original matrix (before factorization)
203/// @return Reciprocal condition number (small = ill-conditioned)
204real banded_rcond(const BandedMatrix &A, const idx *ipiv, real anorm);
205
206/// @brief Compute 1-norm of banded matrix
207/// @param A Banded matrix
208/// @return Maximum absolute column sum
210
211} // namespace num
Banded matrix with efficient storage for direct solvers.
Definition banded.hpp:33
idx kl() const
Number of lower diagonals.
Definition banded.hpp:58
idx ldab() const
Leading dimension of band storage (2*kl + ku + 1)
Definition banded.hpp:67
idx cols() const
Definition banded.hpp:55
idx size() const
Matrix dimension.
Definition banded.hpp:53
idx bandwidth() const
Total bandwidth (kl + ku + 1)
Definition banded.hpp:64
BandedMatrix & operator=(const BandedMatrix &)
Definition banded.cpp:62
idx rows() const
Definition banded.hpp:54
bool on_gpu() const
Definition banded.hpp:94
const real * gpu_data() const
Definition banded.hpp:93
real & operator()(idx i, idx j)
Access element at (row, col) in original matrix coordinates.
Definition banded.cpp:98
bool in_band(idx i, idx j) const
Check if (i,j) is within the band.
Definition banded.cpp:114
real & band(idx band_row, idx col)
Direct access to band storage.
Definition banded.cpp:106
const real * data() const
Definition banded.hpp:84
idx ku() const
Number of upper diagonals.
Definition banded.hpp:61
real * data()
Raw pointer to band storage (column-major)
Definition banded.hpp:83
real * gpu_data()
Definition banded.hpp:92
Backend enum for linear algebra operations.
Core type definitions.
BandedSolverResult banded_solve(const BandedMatrix &A, const Vector &b, Vector &x)
Solve banded system Ax = b (convenience function)
Definition banded.cpp:286
double real
Definition types.hpp:10
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
void banded_lu_solve_multi(const BandedMatrix &A, const idx *ipiv, real *B, idx nrhs)
Solve multiple right-hand sides using LU factorization.
Definition banded.cpp:249
real banded_norm1(const BandedMatrix &A)
Compute 1-norm of banded matrix.
Definition banded.cpp:358
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:248
std::size_t idx
Definition types.hpp:11
BandedSolverResult banded_lu(BandedMatrix &A, idx *ipiv)
LU factorization of banded matrix with partial pivoting.
Definition banded.cpp:135
real banded_rcond(const BandedMatrix &A, const idx *ipiv, real anorm)
Estimate reciprocal condition number of banded matrix.
Definition banded.cpp:374
void banded_gemv(real alpha, const BandedMatrix &A, const Vector &x, real beta, Vector &y, Backend backend=default_backend)
Banded matrix-vector product y = alpha*A*x + beta*y.
Definition banded.cpp:314
void banded_lu_solve(const BandedMatrix &A, const idx *ipiv, Vector &b)
Solve banded system using precomputed LU factorization.
Definition banded.cpp:209
constexpr Backend default_backend
Definition policy.hpp:92
void banded_matvec(const BandedMatrix &A, const Vector &x, Vector &y, Backend backend=default_backend)
Banded matrix-vector product y = A*x.
Definition banded.cpp:307
Result from banded solver.
Definition banded.hpp:106
bool success
True if solve succeeded.
Definition banded.hpp:107
idx pivot_row
Row of zero pivot if singular (0 if success)
Definition banded.hpp:108
real rcond
Reciprocal condition number estimate (0 if not computed)
Definition banded.hpp:109
Vector operations.