numerics
Loading...
Searching...
No Matches
banded.hpp
Go to the documentation of this file.
1/// @file banded.hpp
2/// @brief High-performance banded matrix solver for HPC applications
3///
4/// Implements LAPACK-style banded storage and LU factorization with partial
5/// pivoting. Optimized for vectorization and cache efficiency on modern
6/// supercomputers (e.g., NCAR Derecho).
7#pragma once
8
9#include "core/types.hpp"
10#include "core/vector.hpp"
11#include "core/policy.hpp"
12#include <memory>
13
14namespace num {
15
16/// @brief Banded matrix with efficient storage for direct solvers
17///
18/// Uses LAPACK-style band storage format optimized for LU factorization.
19/// For a matrix with kl lower diagonals and ku upper diagonals, storage
20/// layout is:
21///
22/// - Rows 0 to kl-1: Extra space for fill-in during LU factorization
23/// - Rows kl to kl+ku: Upper diagonals (ku at top, main diagonal at kl+ku)
24/// - Rows kl+ku+1 to 2*kl+ku: Lower diagonals
25///
26/// Element A(i,j) is stored at band(kl + ku + i - j, j) for max(0,j-ku) <= i <= min(n-1,j+kl)
27///
28/// This format enables:
29/// - Column-major access patterns for LU factorization
30/// - SIMD-friendly memory layout
31/// - Direct compatibility with LAPACK routines if needed
33public:
34 /// @brief Construct a banded matrix
35 /// @param n Matrix dimension (n x n)
36 /// @param kl Number of lower diagonals
37 /// @param ku Number of upper diagonals
39
40 /// @brief Construct with initial value
42
44
45 // Rule of Five
47 BandedMatrix(BandedMatrix&&) noexcept;
50
51 /// @brief Matrix dimension
52 idx size() const { return n_; }
53 idx rows() const { return n_; }
54 idx cols() const { return n_; }
55
56 /// @brief Number of lower diagonals
57 idx kl() const { return kl_; }
58
59 /// @brief Number of upper diagonals
60 idx ku() const { return ku_; }
61
62 /// @brief Total bandwidth (kl + ku + 1)
63 idx bandwidth() const { return kl_ + ku_ + 1; }
64
65 /// @brief Leading dimension of band storage (2*kl + ku + 1)
66 idx ldab() const { return ldab_; }
67
68 /// @brief Access element at (row, col) in original matrix coordinates
69 /// @param i Row index (0-based)
70 /// @param j Column index (0-based)
71 /// @return Reference to element (undefined if outside band)
73 real operator()(idx i, idx j) const;
74
75 /// @brief Direct access to band storage
76 /// @param band_row Row in band storage (0 to ldab-1)
77 /// @param col Column index (0 to n-1)
78 real& band(idx band_row, idx col);
79 real band(idx band_row, idx col) const;
80
81 /// @brief Raw pointer to band storage (column-major)
82 real* data() { return data_.get(); }
83 const real* data() const { return data_.get(); }
84
85 /// @brief Check if (i,j) is within the band
86 bool in_band(idx i, idx j) const;
87
88 // GPU support
89 void to_gpu();
90 void to_cpu();
91 real* gpu_data() { return d_data_; }
92 const real* gpu_data() const { return d_data_; }
93 bool on_gpu() const { return d_data_ != nullptr; }
94
95private:
96 idx n_; ///< Matrix dimension
97 idx kl_; ///< Number of lower diagonals
98 idx ku_; ///< Number of upper diagonals
99 idx ldab_; ///< Leading dimension (2*kl + ku + 1)
100 std::unique_ptr<real[]> data_; ///< Band storage (ldab * n elements)
101 real* d_data_ = nullptr; ///< GPU data pointer
102};
103
104/// @brief Result from banded solver
106 bool success; ///< True if solve succeeded
107 idx pivot_row; ///< Row of zero pivot if singular (0 if success)
108 real rcond; ///< Reciprocal condition number estimate (0 if not computed)
109};
110
111/// @brief LU factorization of banded matrix with partial pivoting
112///
113/// Computes A = P * L * U where:
114/// - P is a permutation matrix
115/// - L is lower triangular with unit diagonal
116/// - U is upper triangular
117///
118/// The factorization is performed in-place. After factorization:
119/// - U is stored in rows 0 to kl+ku (including fill-in)
120/// - L multipliers are stored in rows kl+ku+1 to 2*kl+ku
121/// - ipiv contains the pivot indices
122///
123/// @param A Banded matrix (modified in place with LU factors)
124/// @param ipiv Pivot indices (size n, allocated by caller)
125/// @return Result indicating success or location of zero pivot
126///
127/// Complexity: O(n * kl * (kl + ku)) operations
129
130/// @brief Solve banded system using precomputed LU factorization
131///
132/// Solves A*x = b where A has been factored by banded_lu().
133/// The solution overwrites the right-hand side vector.
134///
135/// @param A LU-factored banded matrix from banded_lu()
136/// @param ipiv Pivot indices from banded_lu()
137/// @param b Right-hand side (overwritten with solution)
138///
139/// Complexity: O(n * (kl + ku)) operations
140void banded_lu_solve(const BandedMatrix& A, const idx* ipiv, Vector& b);
141
142/// @brief Solve multiple right-hand sides using LU factorization
143///
144/// Solves A*X = B where B has nrhs columns stored contiguously.
145///
146/// @param A LU-factored banded matrix
147/// @param ipiv Pivot indices
148/// @param B Right-hand sides (n x nrhs, column-major, overwritten with solutions)
149/// @param nrhs Number of right-hand sides
150void banded_lu_solve_multi(const BandedMatrix& A, const idx* ipiv,
151 real* B, idx nrhs);
152
153/// @brief Solve banded system Ax = b (convenience function)
154///
155/// Performs LU factorization and solve in one call. For solving multiple
156/// systems with the same matrix, use banded_lu() and banded_lu_solve()
157/// separately to avoid redundant factorization.
158///
159/// @param A Banded matrix (NOT modified - internal copy made)
160/// @param b Right-hand side
161/// @param x Solution vector (output)
162/// @return Result indicating success or failure
164
165/// @brief Banded matrix-vector product y = A*x
166///
167/// Optimized for cache efficiency with loop ordering that accesses
168/// the band storage in column-major order.
169///
170/// @param A Banded matrix
171/// @param x Input vector
172/// @param y Output vector (overwritten)
173/// @param backend Execution backend (Backend::gpu uses CUDA path when available)
174void banded_matvec(const BandedMatrix& A, const Vector& x, Vector& y,
175 Backend backend = default_backend);
176
177/// @brief Banded matrix-vector product y = alpha*A*x + beta*y
178///
179/// Generalized matrix-vector multiply with scaling factors.
180///
181/// @param alpha Scalar multiplier for A*x
182/// @param A Banded matrix
183/// @param x Input vector
184/// @param beta Scalar multiplier for y
185/// @param y Input/output vector
186/// @param backend Execution backend (Backend::gpu uses CUDA path when available)
187void banded_gemv(real alpha, const BandedMatrix& A, const Vector& x,
188 real beta, Vector& y, Backend backend = default_backend);
189
190/// @brief Estimate reciprocal condition number of banded matrix
191///
192/// Uses 1-norm condition number estimation after LU factorization.
193///
194/// @param A LU-factored banded matrix
195/// @param ipiv Pivot indices
196/// @param anorm 1-norm of original matrix (before factorization)
197/// @return Reciprocal condition number (small = ill-conditioned)
199
200/// @brief Compute 1-norm of banded matrix
201/// @param A Banded matrix
202/// @return Maximum absolute column sum
204
205} // namespace num
Banded matrix with efficient storage for direct solvers.
Definition banded.hpp:32
idx kl() const
Number of lower diagonals.
Definition banded.hpp:57
idx ldab() const
Leading dimension of band storage (2*kl + ku + 1)
Definition banded.hpp:66
idx cols() const
Definition banded.hpp:54
idx size() const
Matrix dimension.
Definition banded.hpp:52
idx bandwidth() const
Total bandwidth (kl + ku + 1)
Definition banded.hpp:63
BandedMatrix & operator=(const BandedMatrix &)
Definition banded.cpp:51
idx rows() const
Definition banded.hpp:53
bool on_gpu() const
Definition banded.hpp:93
const real * gpu_data() const
Definition banded.hpp:92
real & operator()(idx i, idx j)
Access element at (row, col) in original matrix coordinates.
Definition banded.cpp:76
bool in_band(idx i, idx j) const
Check if (i,j) is within the band.
Definition banded.cpp:92
real & band(idx band_row, idx col)
Direct access to band storage.
Definition banded.cpp:84
const real * data() const
Definition banded.hpp:83
idx ku() const
Number of upper diagonals.
Definition banded.hpp:60
real * data()
Raw pointer to band storage (column-major)
Definition banded.hpp:82
real * gpu_data()
Definition banded.hpp:91
Core type definitions.
BandedSolverResult banded_solve(const BandedMatrix &A, const Vector &b, Vector &x)
Solve banded system Ax = b (convenience function)
Definition banded.cpp:247
double real
Definition types.hpp:10
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
void banded_lu_solve_multi(const BandedMatrix &A, const idx *ipiv, real *B, idx nrhs)
Solve multiple right-hand sides using LU factorization.
Definition banded.cpp:213
real banded_norm1(const BandedMatrix &A)
Compute 1-norm of banded matrix.
Definition banded.cpp:305
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:242
constexpr T ipow(T x) noexcept
Compute x^N at compile time via repeated squaring.
std::size_t idx
Definition types.hpp:11
BandedSolverResult banded_lu(BandedMatrix &A, idx *ipiv)
LU factorization of banded matrix with partial pivoting.
Definition banded.cpp:111
real banded_rcond(const BandedMatrix &A, const idx *ipiv, real anorm)
Estimate reciprocal condition number of banded matrix.
Definition banded.cpp:321
void banded_gemv(real alpha, const BandedMatrix &A, const Vector &x, real beta, Vector &y, Backend backend=default_backend)
Banded matrix-vector product y = alpha*A*x + beta*y.
Definition banded.cpp:268
void banded_lu_solve(const BandedMatrix &A, const idx *ipiv, Vector &b)
Solve banded system using precomputed LU factorization.
Definition banded.cpp:175
constexpr Backend default_backend
Definition policy.hpp:57
void banded_matvec(const BandedMatrix &A, const Vector &x, Vector &y, Backend backend=default_backend)
Banded matrix-vector product y = A*x.
Definition banded.cpp:264
Backend enum for linear algebra operations.
Result from banded solver.
Definition banded.hpp:105
bool success
True if solve succeeded.
Definition banded.hpp:106
idx pivot_row
Row of zero pivot if singular (0 if success)
Definition banded.hpp:107
real rcond
Reciprocal condition number estimate (0 if not computed)
Definition banded.hpp:108
Vector operations.