numerics 0.1.0
Loading...
Searching...
No Matches
jacobi_eig.cpp
Go to the documentation of this file.
1/// @file linalg/eigen/backends/omp/jacobi_eig.cpp
2/// @brief OpenMP-parallel cyclic Jacobi eigensolver.
3///
4/// Same algorithm as backends/seq/jacobi_eig.cpp; the off-diagonal update
5/// and eigenvector accumulation loops are parallelised via OpenMP.
6#include "impl.hpp"
7#include <cmath>
8#include <algorithm>
9#include <stdexcept>
10
11#ifdef NUMERICS_HAS_OMP
12 #include <omp.h>
13#endif
14
15namespace num::backends::omp {
16
17EigenResult eig_sym(const Matrix& A_in, real tol, idx max_sweeps) {
18 if (A_in.rows() != A_in.cols())
19 throw std::invalid_argument("eig_sym: matrix must be square");
20
21 constexpr real rotation_tol = 1e-15;
22 idx n = A_in.rows();
23 Matrix A = A_in;
24 Matrix V(n, n, 0.0);
25 for (idx i = 0; i < n; ++i)
26 V(i, i) = 1.0;
27
28 idx sweeps = 0;
29 bool converged = false;
30
31 for (idx sweep = 0; sweep < max_sweeps; ++sweep) {
32 real off = 0;
33 for (idx p = 0; p < n; ++p)
34 for (idx q = p + 1; q < n; ++q)
35 off += A(p, q) * A(p, q);
36
37 if (std::sqrt(2.0 * off) < tol) {
38 converged = true;
39 break;
40 }
41
42 for (idx p = 0; p < n - 1; ++p) {
43 for (idx q = p + 1; q < n; ++q) {
44 real apq = A(p, q);
45 if (std::abs(apq) < rotation_tol)
46 continue;
47
48 real app = A(p, p), aqq = A(q, q);
49 real tau = (aqq - app) / (2.0 * apq);
50 real t = std::copysign(1.0, tau)
51 / (std::abs(tau) + std::sqrt(1.0 + tau * tau));
52 real c = 1.0 / std::sqrt(1.0 + t * t);
53 real s = c * t;
54
55 A(p, p) = c * c * app - 2.0 * c * s * apq + s * s * aqq;
56 A(q, q) = s * s * app + 2.0 * c * s * apq + c * c * aqq;
57 A(p, q) = A(q, p) = 0.0;
58
59#ifdef NUMERICS_HAS_OMP
60 #pragma omp parallel for schedule(static) if (n >= 128)
61#endif
62 for (idx r = 0; r < n; ++r) {
63 if (r == p || r == q)
64 continue;
65 real arp = A(r, p), arq = A(r, q);
66 A(r, p) = A(p, r) = c * arp - s * arq;
67 A(r, q) = A(q, r) = s * arp + c * arq;
68 }
69
70#ifdef NUMERICS_HAS_OMP
71 // Thread overhead dominates for small n; only parallelise above threshold.
72 #pragma omp parallel for schedule(static) if (n >= 128)
73#endif
74 for (idx r = 0; r < n; ++r) {
75 real vrp = V(r, p), vrq = V(r, q);
76 V(r, p) = c * vrp - s * vrq;
77 V(r, q) = s * vrp + c * vrq;
78 }
79 }
80 }
81 ++sweeps;
82 }
83
84 Vector values(n);
85 for (idx i = 0; i < n; ++i)
86 values[i] = A(i, i);
87
88 for (idx i = 0; i < n - 1; ++i) {
89 idx min_j = i;
90 for (idx j = i + 1; j < n; ++j)
91 if (values[j] < values[min_j])
92 min_j = j;
93 if (min_j != i) {
94 std::swap(values[i], values[min_j]);
95 for (idx r = 0; r < n; ++r)
96 std::swap(V(r, i), V(r, min_j));
97 }
98 }
99
100 return {values, V, sweeps, converged};
101}
102
103} // namespace num::backends::omp
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12
constexpr idx rows() const noexcept
Definition matrix.hpp:24
constexpr idx cols() const noexcept
Definition matrix.hpp:25
EigenResult eig_sym(const Matrix &A, real tol, idx max_sweeps)
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11
constexpr real e
Definition math.hpp:43
std::experimental::simd butterfly for FFT.
Full eigendecomposition result: A = V * diag(values) * V^T.