numerics 0.1.0
Loading...
Searching...
No Matches
array.cpp
Go to the documentation of this file.
1/// @file kernel/array.cpp
2/// @brief Implementations for num::kernel::array (seq_t and par_t overloads).
3
4#include "kernel/array.hpp"
5#include "kernel/raw.hpp"
6
7namespace num::kernel::array {
8
9void axpby(real a, const Vector& x, real b, Vector& y, seq_t) noexcept {
10 raw::axpby(y.data(), x.data(), a, b, x.size());
11}
12
13void axpby(real a, const Vector& x, real b, Vector& y, par_t) {
14#ifdef NUMERICS_HAS_OMP
15 const idx n = x.size();
16 const real* xd = x.data();
17 real* yd = y.data();
18 #pragma omp parallel for schedule(static)
19 for (idx i = 0; i < n; ++i) {
20 yd[i] = (a * xd[i]) + (b * yd[i]);
21 }
22#else
23 axpby(a, x, b, y, seq_t{});
24#endif
25}
26
27void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z, seq_t) noexcept {
28 raw::axpbyz(z.data(), x.data(), y.data(), a, b, x.size());
29}
30
31void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z, par_t) {
32#ifdef NUMERICS_HAS_OMP
33 const idx n = x.size();
34 const real* xd = x.data();
35 const real* yd = y.data();
36 real* zd = z.data();
37 #pragma omp parallel for schedule(static)
38 for (idx i = 0; i < n; ++i) {
39 zd[i] = (a * xd[i]) + (b * yd[i]);
40 }
41#else
42 axpbyz(a, x, b, y, z, seq_t{});
43#endif
44}
45
46} // namespace num::kernel::array
Elementwise vector kernels (namespace num::kernel::array)
constexpr idx size() const noexcept
Definition vector.hpp:83
void axpby(real a, const Vector &x, real b, Vector &y, seq_t) noexcept
Sequential: y[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpby)
Definition array.cpp:9
void axpbyz(real a, const Vector &x, real b, const Vector &y, Vector &z, seq_t) noexcept
Sequential: z[i] = a*x[i] + b*y[i].
Definition array.cpp:27
NUM_K_AINLINE void axpby(T *NUM_K_RESTRICT y, const T *NUM_K_RESTRICT x, T a, T b, idx n) noexcept
y[i] = a*x[i] + b*y[i].
Definition raw.hpp:46
NUM_K_AINLINE void axpbyz(T *NUM_K_RESTRICT z, const T *NUM_K_RESTRICT x, const T *NUM_K_RESTRICT y, T a, T b, idx n) noexcept
z[i] = a*x[i] + b*y[i].
Definition raw.hpp:59
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11
Tier-1 kernel: raw-pointer, inline, zero-overhead inner loops.
Parallel execution policy tag.
Definition policy.hpp:13
Sequential execution policy tag.
Definition policy.hpp:10