50#ifdef NUMERICS_HAS_BLAS
65#if defined(__GNUC__) || defined(__clang__)
66# define NUM_K_AINLINE [[gnu::always_inline]] inline
67# define NUM_K_RESTRICT __restrict__
68# define NUM_K_IVDEP _Pragma("GCC ivdep")
70# define NUM_K_AINLINE inline
71# define NUM_K_RESTRICT
84#ifdef NUMERICS_HAS_BLAS
85 cblas_dscal(
static_cast<int>(n), alpha, x, 1);
88 for (
idx i = 0; i < n; ++i) {
98#ifdef NUMERICS_HAS_BLAS
99 cblas_daxpy(
static_cast<int>(n), alpha, x, 1, y, 1);
102 for (
idx i = 0; i < n; ++i) {
103 y[i] += alpha * x[i];
116 for (
idx i = 0; i < n; ++i) {
117 y[i] = (a * x[i]) + (b * y[i]);
130 for (
idx i = 0; i < n; ++i) {
131 z[i] = (a * x[i]) + (b * y[i]);
139#ifdef NUMERICS_HAS_BLAS
140 return cblas_ddot(
static_cast<int>(n), x, 1, y, 1);
144 for (
idx i = 0; i < n; ++i) {
156 for (
idx i = 0; i < n; ++i) {
165#ifdef NUMERICS_HAS_BLAS
166 return cblas_dnrm2(
static_cast<int>(n), x, 1);
168 return std::sqrt(
norm_sq(x, n));
175#ifdef NUMERICS_HAS_BLAS
176 return cblas_dasum(
static_cast<int>(n), x, 1);
180 for (
idx i = 0; i < n; ++i) {
190#ifdef NUMERICS_HAS_BLAS
192 const int k = cblas_idamax(
static_cast<int>(n), x, 1);
193 return std::abs(x[k]);
196 for (
idx i = 0; i < n; ++i) {
197 const real v = std::abs(x[i]);
213 for (
idx i = 0; i < n; ++i) {
231#ifdef NUMERICS_HAS_BLAS
232 cblas_dgemv(CblasRowMajor, CblasNoTrans,
233 static_cast<int>(m),
static_cast<int>(n),
234 1.0, A,
static_cast<int>(n),
238 for (
idx i = 0; i < m; ++i) {
240 const real* row = A + (i * n);
242 for (
idx j = 0; j < n; ++j) {
258#ifdef NUMERICS_HAS_BLAS
259 cblas_dger(CblasRowMajor,
260 static_cast<int>(m),
static_cast<int>(n),
262 A,
static_cast<int>(n));
264 for (
idx i = 0; i < m; ++i) {
266 const real axi = alpha * x[i];
268 for (
idx j = 0; j < n; ++j) {
269 row[j] += axi * y[j];
284#ifdef NUMERICS_HAS_BLAS
285 std::memcpy(x, b, n *
sizeof(
real));
286 cblas_dtrsv(CblasRowMajor, CblasLower, CblasNoTrans, CblasNonUnit,
287 static_cast<int>(n), L,
static_cast<int>(n), x, 1);
289 for (
idx i = 0; i < n; ++i) {
291 const real* row = L + i * n;
292 for (
idx j = 0; j < i; ++j) {
309#ifdef NUMERICS_HAS_BLAS
310 std::memcpy(x, b, n *
sizeof(
real));
311 cblas_dtrsv(CblasRowMajor, CblasUpper, CblasNoTrans, CblasNonUnit,
312 static_cast<int>(n), U,
static_cast<int>(n), x, 1);
314 for (
idx i = n; i-- > 0;) {
316 const real* row = U + i * n;
317 for (
idx j = i + 1; j < n; ++j) {
NUM_K_AINLINE real l1_norm(const real *NUM_K_RESTRICT x, idx n) noexcept
L1 norm: sum |x[i]|.
NUM_K_AINLINE real linf_norm(const real *NUM_K_RESTRICT x, idx n) noexcept
L-infinity norm: max |x[i]|.
NUM_K_AINLINE real norm_sq(const real *NUM_K_RESTRICT x, idx n) noexcept
sum x[i]^2 (no sqrt; use for convergence checks to avoid sqrt cost)
NUM_K_AINLINE void ger(real *NUM_K_RESTRICT A, const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, real alpha, idx m, idx n) noexcept
Rank-1 update: A[i*n + j] += alpha * x[i] * y[j] (m x n row-major)
NUM_K_AINLINE real norm(const real *NUM_K_RESTRICT x, idx n) noexcept
Euclidean norm: sqrt(sum x[i]^2)
NUM_K_AINLINE void axpby(real *NUM_K_RESTRICT y, const real *NUM_K_RESTRICT x, real a, real b, idx n) noexcept
y[i] = a*x[i] + b*y[i] (fused scale-and-add, one memory pass)
NUM_K_AINLINE void trsv_lower(real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT L, const real *NUM_K_RESTRICT b, idx n) noexcept
Forward substitution: solve Lx = b, L lower triangular (n x n, row-major).
NUM_K_AINLINE void axpbyz(real *NUM_K_RESTRICT z, const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, real a, real b, idx n) noexcept
z[i] = a*x[i] + b*y[i] (fused, three-array, one pass each)
NUM_K_AINLINE void trsv_upper(real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT U, const real *NUM_K_RESTRICT b, idx n) noexcept
Back substitution: solve Ux = b, U upper triangular (n x n, row-major).
NUM_K_AINLINE real sum(const real *NUM_K_RESTRICT x, idx n) noexcept
Scalar sum: return sum x[i].
NUM_K_AINLINE real dot(const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, idx n) noexcept
dot product: return sum x[i] * y[i]
NUM_K_AINLINE void matvec(real *NUM_K_RESTRICT y, const real *NUM_K_RESTRICT A, const real *NUM_K_RESTRICT x, idx m, idx n) noexcept
y[i] = sum_j A[i*n + j] * x[j] (m x n row-major matrix)
NUM_K_AINLINE void axpy(real *NUM_K_RESTRICT y, const real *NUM_K_RESTRICT x, real alpha, idx n) noexcept
y[i] += alpha * x[i]
NUM_K_AINLINE void scale(real *NUM_K_RESTRICT x, real alpha, idx n) noexcept
x[i] *= alpha