21 raw::axpby(y.data(), x.data(), a, b, x.size());
25#ifdef NUMERICS_HAS_OMP
29 #pragma omp parallel for schedule(static)
30 for (
idx i = 0; i < n; ++i) {
31 yd[i] = (a * xd[i]) + (b * yd[i]);
44 raw::axpbyz(z.data(), x.data(), y.data(), a, b, x.size());
49#ifdef NUMERICS_HAS_OMP
54 #pragma omp parallel for schedule(static)
55 for (
idx i = 0; i < n; ++i) {
56 zd[i] = (a * xd[i]) + (b * yd[i]);
Elementwise vector kernels (namespace num::kernel::array)
constexpr idx size() const noexcept
void axpby(real a, const Vector &x, real b, Vector &y, seq_t) noexcept
Sequential: y[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpby)
void axpbyz(real a, const Vector &x, real b, const Vector &y, Vector &z, seq_t) noexcept
Sequential: z[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpbyz)
NUM_K_AINLINE void axpby(real *NUM_K_RESTRICT y, const real *NUM_K_RESTRICT x, real a, real b, idx n) noexcept
y[i] = a*x[i] + b*y[i] (fused scale-and-add, one memory pass)
NUM_K_AINLINE void axpbyz(real *NUM_K_RESTRICT z, const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, real a, real b, idx n) noexcept
z[i] = a*x[i] + b*y[i] (fused, three-array, one pass each)
Tier-1 kernel: raw-pointer, inline, zero-overhead inner loops.
Parallel execution policy tag. Activates OMP parallel-for / reduction constructs when NUMERICS_HAS_OM...
Sequential execution policy tag. Guarantees no OMP parallel regions; safe to call inside an existing ...