11#if defined(__GNUC__) || defined(__clang__)
12 #define NUM_K_AINLINE [[gnu::always_inline]] inline
13 #define NUM_K_RESTRICT __restrict__
14 #define NUM_K_IVDEP _Pragma("GCC ivdep")
16 #define NUM_K_AINLINE inline
17 #define NUM_K_RESTRICT
24template<std::
floating_po
int T>
27 for (
idx i = 0; i < n; ++i) {
33template<std::
floating_po
int T>
39 for (
idx i = 0; i < n; ++i) {
45template<std::
floating_po
int T>
52 for (
idx i = 0; i < n; ++i) {
53 y[i] = (a * x[i]) + (b * y[i]);
58template<std::
floating_po
int T>
66 for (
idx i = 0; i < n; ++i) {
67 z[i] = (a * x[i]) + (b * y[i]);
72template<std::
floating_po
int T>
78 for (
idx i = 0; i < n; ++i) {
85template<std::
floating_po
int T>
89 for (
idx i = 0; i < n; ++i) {
96template<std::
floating_po
int T>
98 return std::sqrt(
norm_sq(x, n));
102template<std::
floating_po
int T>
106 for (
idx i = 0; i < n; ++i) {
113template<std::
floating_po
int T>
116 for (
idx i = 0; i < n; ++i) {
117 const T v = std::abs(x[i]);
126template<std::
floating_po
int T>
130 for (
idx i = 0; i < n; ++i) {
139template<std::
floating_po
int T>
145 for (
idx i = 0; i < m; ++i) {
147 const T* row = A + (i * n);
149 for (
idx j = 0; j < n; ++j) {
159template<std::
floating_po
int T>
166 for (
idx i = 0; i < m; ++i) {
168 const T axi = alpha * x[i];
170 for (
idx j = 0; j < n; ++j) {
171 row[j] += axi * y[j];
180template<std::
floating_po
int T>
185 for (
idx i = 0; i < n; ++i) {
187 const T* row = L + (i * n);
188 for (
idx j = 0; j < i; ++j) {
199template<std::
floating_po
int T>
204 for (
idx i = n; i-- > 0;) {
206 const T* row = U + (i * n);
207 for (
idx j = i + 1; j < n; ++j) {
NUM_K_AINLINE T sum(const T *NUM_K_RESTRICT x, idx n) noexcept
Scalar sum: return sum x[i].
NUM_K_AINLINE T linf_norm(const T *NUM_K_RESTRICT x, idx n) noexcept
L-infinity norm: max |x[i]|.
NUM_K_AINLINE void trsv_upper(T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT U, const T *NUM_K_RESTRICT b, idx n) noexcept
Back substitution: solve Ux = b, U upper triangular (n x n, row-major).
NUM_K_AINLINE void ger(T *NUM_K_RESTRICT A, const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, T alpha, idx m, idx n) noexcept
Rank-1 update: A[i*n + j] += alpha * x[i] * y[j] (m x n row-major)
NUM_K_AINLINE T dot(const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, idx n) noexcept
dot product: return sum x[i] * y[i]
NUM_K_AINLINE void trsv_lower(T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT L, const T *NUM_K_RESTRICT b, idx n) noexcept
Forward substitution: solve Lx = b, L lower triangular (n x n, row-major).
NUM_K_AINLINE void axpy(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT x, T alpha, idx n) noexcept
y[i] += alpha * x[i]
NUM_K_AINLINE T l1_norm(const T *NUM_K_RESTRICT x, idx n) noexcept
L1 norm: sum |x[i]|.
NUM_K_AINLINE T norm(const T *NUM_K_RESTRICT x, idx n) noexcept
Euclidean norm: sqrt(sum x[i]^2)
NUM_K_AINLINE void axpby(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT x, T a, T b, idx n) noexcept
y[i] = a*x[i] + b*y[i].
NUM_K_AINLINE void matvec(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT A, const T *NUM_K_RESTRICT x, idx m, idx n) noexcept
y[i] = sum_j A[i*n + j] * x[j] (m x n row-major matrix)
NUM_K_AINLINE void axpbyz(T *NUM_K_RESTRICT z, const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, T a, T b, idx n) noexcept
z[i] = a*x[i] + b*y[i].
NUM_K_AINLINE T norm_sq(const T *NUM_K_RESTRICT x, idx n) noexcept
sum x[i]^2 (no sqrt; use for convergence checks to avoid sqrt cost)
NUM_K_AINLINE void scale(T *NUM_K_RESTRICT x, T alpha, idx n) noexcept
x[i] *= alpha