numerics 0.1.0
Loading...
Searching...
No Matches
dense.cpp
Go to the documentation of this file.
1/// @file kernel/dense.cpp
2/// @brief Implementations for num::kernel::dense (seq_t, par_t, and no-policy ops).
3///
4/// ger seq_t: calls raw::ger (routes to cblas_dger when NUMERICS_HAS_BLAS).
5/// ger par_t: OMP parallel-for over rows (each row is independent).
6///
7/// trsv: no policy; calls raw::trsv which routes to cblas_dtrsv when
8/// NUMERICS_HAS_BLAS, otherwise sequential forward/back substitution.
9
10#include "kernel/dense.hpp"
11#include "kernel/raw.hpp"
12#include <stdexcept>
13
14namespace num::kernel::dense {
15
16// ---------------------------------------------------------------------------
17// ger
18// ---------------------------------------------------------------------------
19
20void ger(real alpha, const Vector& x, const Vector& y, Matrix& A,
21 seq_t) noexcept {
22 raw::ger(A.data(), x.data(), y.data(), alpha, x.size(), y.size());
23}
24
25void ger(real alpha, const Vector& x, const Vector& y, Matrix& A, par_t) {
26#ifdef NUMERICS_HAS_OMP
27 const idx m = x.size();
28 const idx n = y.size();
29 const real* xd = x.data();
30 const real* yd = y.data();
31 real* Ad = A.data();
32 #pragma omp parallel for schedule(static)
33 for (idx i = 0; i < m; ++i) {
34 const real axi = alpha * xd[i];
35 real* row = Ad + (i * n);
36 for (idx j = 0; j < n; ++j) {
37 row[j] += axi * yd[j];
38 }
39 }
40#else
41 ger(alpha, x, y, A, seq_t{});
42#endif
43}
44
45// ---------------------------------------------------------------------------
46// trsv_lower
47// ---------------------------------------------------------------------------
48
49void trsv_lower(const Matrix& L, const Vector& b, Vector& x) {
50 const idx n = L.rows();
51 if (L.cols() != n || b.size() != n) {
52 throw std::invalid_argument("kernel::dense::trsv_lower: dimension mismatch");
53 }
54 if (x.size() != n) {
55 x = Vector(n);
56 }
57 raw::trsv_lower(x.data(), L.data(), b.data(), n);
58}
59
60// ---------------------------------------------------------------------------
61// trsv_upper
62// ---------------------------------------------------------------------------
63
64void trsv_upper(const Matrix& U, const Vector& b, Vector& x) {
65 const idx n = U.rows();
66 if (U.cols() != n || b.size() != n) {
67 throw std::invalid_argument("kernel::dense::trsv_upper: dimension mismatch");
68 }
69 if (x.size() != n) {
70 x = Vector(n);
71 }
72 raw::trsv_upper(x.data(), U.data(), b.data(), n);
73}
74
75} // namespace num::kernel::dense
constexpr idx size() const noexcept
Definition vector.hpp:80
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12
real * data()
Definition matrix.hpp:28
constexpr idx rows() const noexcept
Definition matrix.hpp:24
constexpr idx cols() const noexcept
Definition matrix.hpp:25
Dense matrix inner kernels (namespace num::kernel::dense)
void trsv_upper(const Matrix &U, const Vector &b, Vector &x)
Back substitution: solve Ux = b.
Definition dense.cpp:64
void trsv_lower(const Matrix &L, const Vector &b, Vector &x)
Forward substitution: solve Lx = b.
Definition dense.cpp:49
void ger(real alpha, const Vector &x, const Vector &y, Matrix &A, seq_t) noexcept
Sequential rank-1 update: calls raw::ger (routes to cblas_dger when BLAS available; otherwise vectori...
Definition dense.cpp:20
NUM_K_AINLINE void ger(real *NUM_K_RESTRICT A, const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, real alpha, idx m, idx n) noexcept
Rank-1 update: A[i*n + j] += alpha * x[i] * y[j] (m x n row-major)
Definition raw.hpp:254
NUM_K_AINLINE void trsv_lower(real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT L, const real *NUM_K_RESTRICT b, idx n) noexcept
Forward substitution: solve Lx = b, L lower triangular (n x n, row-major).
Definition raw.hpp:280
NUM_K_AINLINE void trsv_upper(real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT U, const real *NUM_K_RESTRICT b, idx n) noexcept
Back substitution: solve Ux = b, U upper triangular (n x n, row-major).
Definition raw.hpp:305
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11
BasicVector< real > Vector
Real-valued dense vector with full backend dispatch (CPU + GPU)
Definition vector.hpp:130
Tier-1 kernel: raw-pointer, inline, zero-overhead inner loops.
Parallel execution policy tag. Activates OMP parallel-for / reduction constructs when NUMERICS_HAS_OM...
Definition policy.hpp:43
Sequential execution policy tag. Guarantees no OMP parallel regions; safe to call inside an existing ...
Definition policy.hpp:38