numerics 0.1.0
Loading...
Searching...
No Matches
array.cpp
Go to the documentation of this file.
1/// @file kernel/array.cpp
2/// @brief Implementations for num::kernel::array (seq_t and par_t overloads).
3///
4/// seq_t paths call raw:: directly (which handles BLAS dispatch internally
5/// for ops that have cblas equivalents, and uses auto-vectorizable loops
6/// otherwise).
7///
8/// par_t paths use OpenMP parallel-for. When NUMERICS_HAS_OMP is not defined
9/// par_t falls through to the seq_t implementation.
10
11#include "kernel/array.hpp"
12#include "kernel/raw.hpp"
13
14namespace num::kernel::array {
15
16// ---------------------------------------------------------------------------
17// axpby
18// ---------------------------------------------------------------------------
19
20void axpby(real a, const Vector& x, real b, Vector& y, seq_t) noexcept {
21 raw::axpby(y.data(), x.data(), a, b, x.size());
22}
23
24void axpby(real a, const Vector& x, real b, Vector& y, par_t) {
25#ifdef NUMERICS_HAS_OMP
26 const idx n = x.size();
27 const real* xd = x.data();
28 real* yd = y.data();
29 #pragma omp parallel for schedule(static)
30 for (idx i = 0; i < n; ++i) {
31 yd[i] = (a * xd[i]) + (b * yd[i]);
32 }
33#else
34 axpby(a, x, b, y, seq_t{});
35#endif
36}
37
38// ---------------------------------------------------------------------------
39// axpbyz
40// ---------------------------------------------------------------------------
41
42void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z,
43 seq_t) noexcept {
44 raw::axpbyz(z.data(), x.data(), y.data(), a, b, x.size());
45}
46
47void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z,
48 par_t) {
49#ifdef NUMERICS_HAS_OMP
50 const idx n = x.size();
51 const real* xd = x.data();
52 const real* yd = y.data();
53 real* zd = z.data();
54 #pragma omp parallel for schedule(static)
55 for (idx i = 0; i < n; ++i) {
56 zd[i] = (a * xd[i]) + (b * yd[i]);
57 }
58#else
59 axpbyz(a, x, b, y, z, seq_t{});
60#endif
61}
62
63} // namespace num::kernel::array
Elementwise vector kernels (namespace num::kernel::array)
constexpr idx size() const noexcept
Definition vector.hpp:80
void axpby(real a, const Vector &x, real b, Vector &y, seq_t) noexcept
Sequential: y[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpby)
Definition array.cpp:20
void axpbyz(real a, const Vector &x, real b, const Vector &y, Vector &z, seq_t) noexcept
Sequential: z[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpbyz)
Definition array.cpp:42
NUM_K_AINLINE void axpby(real *NUM_K_RESTRICT y, const real *NUM_K_RESTRICT x, real a, real b, idx n) noexcept
y[i] = a*x[i] + b*y[i] (fused scale-and-add, one memory pass)
Definition raw.hpp:113
NUM_K_AINLINE void axpbyz(real *NUM_K_RESTRICT z, const real *NUM_K_RESTRICT x, const real *NUM_K_RESTRICT y, real a, real b, idx n) noexcept
z[i] = a*x[i] + b*y[i] (fused, three-array, one pass each)
Definition raw.hpp:125
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11
Tier-1 kernel: raw-pointer, inline, zero-overhead inner loops.
Parallel execution policy tag. Activates OMP parallel-for / reduction constructs when NUMERICS_HAS_OM...
Definition policy.hpp:43
Sequential execution policy tag. Guarantees no OMP parallel regions; safe to call inside an existing ...
Definition policy.hpp:38