numerics 0.1.0
Loading...
Searching...
No Matches
array.hpp
Go to the documentation of this file.
1/// @file kernel/array.hpp
2/// @brief Elementwise vector kernels (namespace num::kernel::array)
3///
4/// All operations work element-by-element over a Vector.
5/// The central property is single-pass memory access: fused operations read
6/// each array once, avoiding redundant loads that separate calls would incur.
7///
8/// Dispatched operations (seq_t / par_t overloads + default):
9/// axpby(a, x, b, y) -- y[i] = a*x[i] + b*y[i] (fused; no BLAS eq.)
10/// axpbyz(a, x, b, y, z) -- z[i] = a*x[i] + b*y[i] (fused; no BLAS eq.)
11///
12/// Always-inline template operations (policy not needed; compiler sees body):
13/// map(x, f) -- x[i] = f(x[i])
14/// map(cx, f) -- CVector variant
15/// zip_map(x, y, z, f) -- z[i] = f(x[i], y[i])
16/// reduce(x, init, f) -- left fold over x
17///
18/// Include kernel/kernel.hpp to get all kernel sub-modules together.
19#pragma once
20
21#include "core/types.hpp"
22#include "core/vector.hpp"
23#include "kernel/policy.hpp"
24
26
27// ---------------------------------------------------------------------------
28// axpby: y[i] = a*x[i] + b*y[i]
29// ---------------------------------------------------------------------------
30
31/// @brief Sequential: y[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpby)
32void axpby(real a, const Vector& x, real b, Vector& y, seq_t) noexcept;
33
34/// @brief Parallel: y[i] = a*x[i] + b*y[i] (OMP parallel-for; falls back to
35/// seq_t when NUMERICS_HAS_OMP is not defined)
36void axpby(real a, const Vector& x, real b, Vector& y, par_t);
37
38/// @brief Default policy (par_t if OMP available, seq_t otherwise)
39inline void axpby(real a, const Vector& x, real b, Vector& y) {
40 axpby(a, x, b, y, default_policy{});
41}
42
43// ---------------------------------------------------------------------------
44// axpbyz: z[i] = a*x[i] + b*y[i]
45// ---------------------------------------------------------------------------
46
47/// @brief Sequential: z[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpbyz)
48void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z,
49 seq_t) noexcept;
50
51/// @brief Parallel: z[i] = a*x[i] + b*y[i] (OMP parallel-for)
52void axpbyz(real a, const Vector& x, real b, const Vector& y, Vector& z,
53 par_t);
54
55/// @brief Default policy
56inline void axpbyz(real a, const Vector& x, real b, const Vector& y,
57 Vector& z) {
58 axpbyz(a, x, b, y, z, default_policy{});
59}
60
61// ---------------------------------------------------------------------------
62// Template kernels (always inline; lambda is fully visible to the compiler)
63// ---------------------------------------------------------------------------
64
65/// @brief In-place elementwise map: x[i] = f(x[i])
66///
67/// The compiler inlines f into the loop and can vectorize if f is a simple
68/// arithmetic expression. No policy needed.
69///
70/// @code
71/// num::kernel::array::map(v, [](real x) { return x * x; });
72/// @endcode
73template<typename F>
74void map(Vector& x, F&& f) {
75 real* d = x.data();
76 const idx n = x.size();
77 for (idx i = 0; i < n; ++i) {
78 d[i] = f(d[i]);
79 }
80}
81
82/// @brief In-place elementwise map on CVector
83template<typename F>
84void map(CVector& x, F&& f) {
85 cplx* d = x.data();
86 const idx n = x.size();
87 for (idx i = 0; i < n; ++i) {
88 d[i] = f(d[i]);
89 }
90}
91
92/// @brief Fused binary map: z[i] = f(x[i], y[i])
93///
94/// Single pass over memory; no temporary allocation.
95/// x, y, z must have the same size.
96///
97/// @code
98/// num::kernel::array::zip_map(x, y, z,
99/// [](real a, real b) { return a * b; });
100/// @endcode
101template<typename T, typename F>
102void zip_map(const BasicVector<T>& x, const BasicVector<T>& y,
103 BasicVector<T>& z, F&& f) {
104 const idx n = x.size();
105 for (idx i = 0; i < n; ++i) {
106 z[i] = f(x[i], y[i]);
107 }
108}
109
110/// @brief Single-pass left fold: f(f(f(init, x[0]), x[1]), ..., x[n-1])
111///
112/// @code
113/// real max_val = num::kernel::array::reduce(v, -1e300,
114/// [](real acc, real xi) { return acc > xi ? acc : xi; });
115/// @endcode
116template<typename F>
117[[nodiscard]] real reduce(const Vector& x, real init, F&& f) {
118 const real* d = x.data();
119 const idx n = x.size();
120 real acc = init;
121 for (idx i = 0; i < n; ++i) {
122 acc = f(acc, d[i]);
123 }
124 return acc;
125}
126
127} // namespace num::kernel::array
constexpr idx size() const noexcept
Definition vector.hpp:80
Core type definitions.
Compile-time dispatch policy tags for the kernel module.
void zip_map(const BasicVector< T > &x, const BasicVector< T > &y, BasicVector< T > &z, F &&f)
Fused binary map: z[i] = f(x[i], y[i])
Definition array.hpp:102
void axpby(real a, const Vector &x, real b, Vector &y, seq_t) noexcept
Sequential: y[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpby)
Definition array.cpp:20
void axpbyz(real a, const Vector &x, real b, const Vector &y, Vector &z, seq_t) noexcept
Sequential: z[i] = a*x[i] + b*y[i] (single-pass; calls raw::axpbyz)
Definition array.cpp:42
void map(Vector &x, F &&f)
In-place elementwise map: x[i] = f(x[i])
Definition array.hpp:74
real reduce(const Vector &x, real init, F &&f)
Single-pass left fold: f(f(f(init, x[0]), x[1]), ..., x[n-1])
Definition array.hpp:117
double real
Definition types.hpp:10
std::size_t idx
Definition types.hpp:11
std::complex< real > cplx
Definition types.hpp:12
Parallel execution policy tag. Activates OMP parallel-for / reduction constructs when NUMERICS_HAS_OM...
Definition policy.hpp:43
Sequential execution policy tag. Guarantees no OMP parallel regions; safe to call inside an existing ...
Definition policy.hpp:38
Vector operations.