numerics 0.1.0
Loading...
Searching...
No Matches
raw.hpp
Go to the documentation of this file.
1/// @file kernel/raw.hpp
2/// @brief Tier-1 kernel: raw-pointer, inline, zero-overhead inner loops.
3///
4/// Raw kernels assume non-owning, caller-sized buffers and do not allocate.
5#pragma once
6
7#include "core/types.hpp"
8#include <cmath>
9#include <concepts>
10
11#if defined(__GNUC__) || defined(__clang__)
12 #define NUM_K_AINLINE [[gnu::always_inline]] inline
13 #define NUM_K_RESTRICT __restrict__
14 #define NUM_K_IVDEP _Pragma("GCC ivdep")
15#else
16 #define NUM_K_AINLINE inline
17 #define NUM_K_RESTRICT
18 #define NUM_K_IVDEP
19#endif
20
22
23/// @brief x[i] *= alpha
24template<std::floating_point T>
25NUM_K_AINLINE void scale(T* NUM_K_RESTRICT x, T alpha, idx n) noexcept {
27 for (idx i = 0; i < n; ++i) {
28 x[i] *= alpha;
29 }
30}
31
32/// @brief y[i] += alpha * x[i]
33template<std::floating_point T>
35 const T* NUM_K_RESTRICT x,
36 T alpha,
37 idx n) noexcept {
39 for (idx i = 0; i < n; ++i) {
40 y[i] += alpha * x[i];
41 }
42}
43
44/// @brief y[i] = a*x[i] + b*y[i].
45template<std::floating_point T>
47 const T* NUM_K_RESTRICT x,
48 T a,
49 T b,
50 idx n) noexcept {
52 for (idx i = 0; i < n; ++i) {
53 y[i] = (a * x[i]) + (b * y[i]);
54 }
55}
56
57/// @brief z[i] = a*x[i] + b*y[i].
58template<std::floating_point T>
60 const T* NUM_K_RESTRICT x,
61 const T* NUM_K_RESTRICT y,
62 T a,
63 T b,
64 idx n) noexcept {
66 for (idx i = 0; i < n; ++i) {
67 z[i] = (a * x[i]) + (b * y[i]);
68 }
69}
70
71/// @brief dot product: return sum x[i] * y[i]
72template<std::floating_point T>
73[[nodiscard]] NUM_K_AINLINE T dot(const T* NUM_K_RESTRICT x,
74 const T* NUM_K_RESTRICT y,
75 idx n) noexcept {
76 T s = T(0);
78 for (idx i = 0; i < n; ++i) {
79 s += x[i] * y[i];
80 }
81 return s;
82}
83
84/// @brief sum x[i]^2 (no sqrt; use for convergence checks to avoid sqrt cost)
85template<std::floating_point T>
86[[nodiscard]] NUM_K_AINLINE T norm_sq(const T* NUM_K_RESTRICT x, idx n) noexcept {
87 T s = T(0);
89 for (idx i = 0; i < n; ++i) {
90 s += x[i] * x[i];
91 }
92 return s;
93}
94
95/// @brief Euclidean norm: sqrt(sum x[i]^2)
96template<std::floating_point T>
97[[nodiscard]] NUM_K_AINLINE T norm(const T* NUM_K_RESTRICT x, idx n) noexcept {
98 return std::sqrt(norm_sq(x, n));
99}
100
101/// @brief L1 norm: sum |x[i]|
102template<std::floating_point T>
103[[nodiscard]] NUM_K_AINLINE T l1_norm(const T* NUM_K_RESTRICT x, idx n) noexcept {
104 T s = T(0);
106 for (idx i = 0; i < n; ++i) {
107 s += std::abs(x[i]);
108 }
109 return s;
110}
111
112/// @brief L-infinity norm: max |x[i]|
113template<std::floating_point T>
114[[nodiscard]] NUM_K_AINLINE T linf_norm(const T* NUM_K_RESTRICT x, idx n) noexcept {
115 T mx = T(0);
116 for (idx i = 0; i < n; ++i) {
117 const T v = std::abs(x[i]);
118 if (v > mx) {
119 mx = v;
120 }
121 }
122 return mx;
123}
124
125/// @brief Scalar sum: return sum x[i]
126template<std::floating_point T>
127[[nodiscard]] NUM_K_AINLINE T sum(const T* NUM_K_RESTRICT x, idx n) noexcept {
128 T s = T(0);
130 for (idx i = 0; i < n; ++i) {
131 s += x[i];
132 }
133 return s;
134}
135
136/// @brief y[i] = sum_j A[i*n + j] * x[j] (m x n row-major matrix)
137///
138/// y must be pre-allocated with size m. y and x must not alias A.
139template<std::floating_point T>
141 const T* NUM_K_RESTRICT A,
142 const T* NUM_K_RESTRICT x,
143 idx m,
144 idx n) noexcept {
145 for (idx i = 0; i < m; ++i) {
146 T s = T(0);
147 const T* row = A + (i * n);
149 for (idx j = 0; j < n; ++j) {
150 s += row[j] * x[j];
151 }
152 y[i] = s;
153 }
154}
155
156/// @brief Rank-1 update: A[i*n + j] += alpha * x[i] * y[j] (m x n row-major)
157///
158/// Inner loop over j is independent and vectorizable.
159template<std::floating_point T>
161 const T* NUM_K_RESTRICT x,
162 const T* NUM_K_RESTRICT y,
163 T alpha,
164 idx m,
165 idx n) noexcept {
166 for (idx i = 0; i < m; ++i) {
167 T* NUM_K_RESTRICT row = A + (i * n);
168 const T axi = alpha * x[i];
170 for (idx j = 0; j < n; ++j) {
171 row[j] += axi * y[j];
172 }
173 }
174}
175
176/// @brief Forward substitution: solve Lx = b, L lower triangular (n x n,
177/// row-major).
178///
179/// x must be pre-allocated with size n. b and x must not alias each other.
180template<std::floating_point T>
182 const T* NUM_K_RESTRICT L,
183 const T* NUM_K_RESTRICT b,
184 idx n) noexcept {
185 for (idx i = 0; i < n; ++i) {
186 T s = b[i];
187 const T* row = L + (i * n);
188 for (idx j = 0; j < i; ++j) {
189 s -= row[j] * x[j];
190 }
191 x[i] = s / row[i];
192 }
193}
194
195/// @brief Back substitution: solve Ux = b, U upper triangular (n x n,
196/// row-major).
197///
198/// x must be pre-allocated with size n. b and x must not alias each other.
199template<std::floating_point T>
201 const T* NUM_K_RESTRICT U,
202 const T* NUM_K_RESTRICT b,
203 idx n) noexcept {
204 for (idx i = n; i-- > 0;) {
205 T s = b[i];
206 const T* row = U + (i * n);
207 for (idx j = i + 1; j < n; ++j) {
208 s -= row[j] * x[j];
209 }
210 x[i] = s / row[i];
211 }
212}
213
214} // namespace num::kernel::raw
Core type definitions.
NUM_K_AINLINE T sum(const T *NUM_K_RESTRICT x, idx n) noexcept
Scalar sum: return sum x[i].
Definition raw.hpp:127
NUM_K_AINLINE T linf_norm(const T *NUM_K_RESTRICT x, idx n) noexcept
L-infinity norm: max |x[i]|.
Definition raw.hpp:114
NUM_K_AINLINE void trsv_upper(T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT U, const T *NUM_K_RESTRICT b, idx n) noexcept
Back substitution: solve Ux = b, U upper triangular (n x n, row-major).
Definition raw.hpp:200
NUM_K_AINLINE void ger(T *NUM_K_RESTRICT A, const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, T alpha, idx m, idx n) noexcept
Rank-1 update: A[i*n + j] += alpha * x[i] * y[j] (m x n row-major)
Definition raw.hpp:160
NUM_K_AINLINE T dot(const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, idx n) noexcept
dot product: return sum x[i] * y[i]
Definition raw.hpp:73
NUM_K_AINLINE void trsv_lower(T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT L, const T *NUM_K_RESTRICT b, idx n) noexcept
Forward substitution: solve Lx = b, L lower triangular (n x n, row-major).
Definition raw.hpp:181
NUM_K_AINLINE void axpy(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT x, T alpha, idx n) noexcept
y[i] += alpha * x[i]
Definition raw.hpp:34
NUM_K_AINLINE T l1_norm(const T *NUM_K_RESTRICT x, idx n) noexcept
L1 norm: sum |x[i]|.
Definition raw.hpp:103
NUM_K_AINLINE T norm(const T *NUM_K_RESTRICT x, idx n) noexcept
Euclidean norm: sqrt(sum x[i]^2)
Definition raw.hpp:97
NUM_K_AINLINE void axpby(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT x, T a, T b, idx n) noexcept
y[i] = a*x[i] + b*y[i].
Definition raw.hpp:46
NUM_K_AINLINE void matvec(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT A, const T *NUM_K_RESTRICT x, idx m, idx n) noexcept
y[i] = sum_j A[i*n + j] * x[j] (m x n row-major matrix)
Definition raw.hpp:140
NUM_K_AINLINE void axpbyz(T *NUM_K_RESTRICT z, const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, T a, T b, idx n) noexcept
z[i] = a*x[i] + b*y[i].
Definition raw.hpp:59
NUM_K_AINLINE T norm_sq(const T *NUM_K_RESTRICT x, idx n) noexcept
sum x[i]^2 (no sqrt; use for convergence checks to avoid sqrt cost)
Definition raw.hpp:86
NUM_K_AINLINE void scale(T *NUM_K_RESTRICT x, T alpha, idx n) noexcept
x[i] *= alpha
Definition raw.hpp:25
std::size_t idx
Definition types.hpp:11
#define NUM_K_RESTRICT
Definition raw.hpp:17
#define NUM_K_IVDEP
Definition raw.hpp:18
#define NUM_K_AINLINE
Definition raw.hpp:16