numerics 0.1.0
Loading...
Searching...
No Matches
matrix.hpp
Go to the documentation of this file.
1/// @file matrix.hpp
2/// @brief Dense row-major matrix templated over scalar type T.
3#pragma once
4
6#include "core/policy.hpp"
7#include "core/vector.hpp"
8#include <algorithm>
9#include <concepts>
10#include <memory>
11#include <type_traits>
12
13namespace num {
14
15/// @brief Dense row-major owning matrix.
16template<std::floating_point T>
18 public:
20 : rows_(0),
21 cols_(0),
22 data_(nullptr) {}
23
25 : rows_(rows),
26 cols_(cols),
27 data_(new T[rows * cols]()) {}
28
30 : rows_(rows),
31 cols_(cols),
32 data_(new T[rows * cols]) {
33 std::fill_n(data_.get(), size(), val);
34 }
35
37 if constexpr (std::is_same_v<T, real>) {
38 if (d_data_) {
39 cuda::free(d_data_);
40 }
41 }
42 }
43
45 : rows_(o.rows_),
46 cols_(o.cols_),
47 data_(new T[o.size()]) {
48 std::copy_n(o.data_.get(), size(), data_.get());
49 }
50
52 : rows_(o.rows_),
53 cols_(o.cols_),
54 data_(std::move(o.data_)),
55 d_data_(o.d_data_) {
56 o.rows_ = o.cols_ = 0;
57 o.d_data_ = nullptr;
58 }
59
61 if (this != &o) {
62 rows_ = o.rows_;
63 cols_ = o.cols_;
64 data_.reset(new T[size()]);
65 std::copy_n(o.data_.get(), size(), data_.get());
66 }
67 return *this;
68 }
69
71 if (this != &o) {
72 if constexpr (std::is_same_v<T, real>) {
73 if (d_data_) {
74 cuda::free(d_data_);
75 }
76 }
77 rows_ = o.rows_;
78 cols_ = o.cols_;
79 data_ = std::move(o.data_);
80 d_data_ = o.d_data_;
81 o.rows_ = o.cols_ = 0;
82 o.d_data_ = nullptr;
83 }
84 return *this;
85 }
86
87 [[nodiscard]] constexpr idx rows() const noexcept { return rows_; }
88 [[nodiscard]] constexpr idx cols() const noexcept { return cols_; }
89 [[nodiscard]] constexpr idx size() const noexcept { return rows_ * cols_; }
90
91 T* data() { return data_.get(); }
92 const T* data() const { return data_.get(); }
93
94 T& operator()(idx i, idx j) { return data_[(i * cols_) + j]; }
95 T operator()(idx i, idx j) const { return data_[(i * cols_) + j]; }
96
97 void to_gpu() {
98 if constexpr (std::is_same_v<T, real>) {
99 if (!d_data_) {
100 d_data_ = cuda::alloc(size());
101 cuda::to_device(d_data_, data_.get(), size());
102 }
103 }
104 }
105
106 void to_cpu() {
107 if constexpr (std::is_same_v<T, real>) {
108 if (d_data_) {
109 cuda::to_host(data_.get(), d_data_, size());
110 cuda::free(d_data_);
111 d_data_ = nullptr;
112 }
113 }
114 }
115
116 T* gpu_data() { return d_data_; }
117 const T* gpu_data() const { return d_data_; }
118 [[nodiscard]] bool on_gpu() const { return d_data_ != nullptr; }
119
120 private:
121 idx rows_ = 0, cols_ = 0;
122 std::unique_ptr<T[]> data_;
123 T* d_data_ = nullptr;
124};
125
126/// @brief Double-precision dense matrix with full backend dispatch (CPU + GPU).
128
129/// @brief y = A * x
130void matvec(const Matrix& A, const Vector& x, Vector& y, Backend b = default_backend);
131
132/// @brief C = A * B
133void matmul(const Matrix& A, const Matrix& B, Matrix& C, Backend b = default_backend);
134
135/// @brief C = alpha*A + beta*B
136void matadd(real alpha,
137 const Matrix& A,
138 real beta,
139 const Matrix& B,
140 Matrix& C,
142
143/// @brief C = A * B (cache-blocked)
144///
145/// Divides A, B, C into BLOCKxBLOCK tiles so the working set fits in L2 cache.
146/// @param block_size Tile edge length (default 64).
147void matmul_blocked(const Matrix& A, const Matrix& B, Matrix& C, idx block_size = 64);
148
149/// @brief C = A * B (register-blocked)
150///
151/// Extends cache blocking with a REGxREG register tile inside each cache tile.
152/// @param block_size Cache tile edge (default 64).
153/// @param reg_size Register tile edge (default 4).
154void matmul_register_blocked(const Matrix& A,
155 const Matrix& B,
156 Matrix& C,
157 idx block_size = 64,
158 idx reg_size = 4);
159
160/// @brief C = A * B (SIMD-accelerated)
161///
162/// Dispatches at compile time: AVX-256 + FMA on x86, NEON on AArch64,
163/// falls back to matmul_blocked if neither is available.
164void matmul_simd(const Matrix& A, const Matrix& B, Matrix& C, idx block_size = 64);
165
166/// @brief y = A * x (SIMD-accelerated)
167void matvec_simd(const Matrix& A, const Vector& x, Vector& y);
168
169} // namespace num
Dense row-major owning matrix.
Definition matrix.hpp:17
constexpr idx rows() const noexcept
Definition matrix.hpp:87
T operator()(idx i, idx j) const
Definition matrix.hpp:95
BasicMatrix(idx rows, idx cols)
Definition matrix.hpp:24
BasicMatrix(const BasicMatrix &o)
Definition matrix.hpp:44
BasicMatrix(BasicMatrix &&o) noexcept
Definition matrix.hpp:51
bool on_gpu() const
Definition matrix.hpp:118
BasicMatrix & operator=(BasicMatrix &&o) noexcept
Definition matrix.hpp:70
const T * data() const
Definition matrix.hpp:92
BasicMatrix(idx rows, idx cols, T val)
Definition matrix.hpp:29
BasicMatrix & operator=(const BasicMatrix &o)
Definition matrix.hpp:60
constexpr idx size() const noexcept
Definition matrix.hpp:89
T & operator()(idx i, idx j)
Definition matrix.hpp:94
constexpr idx cols() const noexcept
Definition matrix.hpp:88
const T * gpu_data() const
Definition matrix.hpp:117
Backend enum and default backend selection.
CUDA kernel wrappers.
void to_device(real *dst, const real *src, idx n)
Copy host to device.
void free(real *ptr)
Free device memory.
real * alloc(idx n)
Allocate device memory.
void to_host(real *dst, const real *src, idx n)
Copy device to host.
void matmul_simd(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (SIMD-accelerated)
Definition matrix.cpp:106
double real
Definition types.hpp:10
Backend
Definition policy.hpp:7
void matvec_simd(const Matrix &A, const Vector &x, Vector &y)
y = A * x (SIMD-accelerated)
Definition matrix.cpp:110
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:248
std::size_t idx
Definition types.hpp:11
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:45
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (cache-blocked)
Definition matrix.cpp:94
void matmul(const Matrix &A, const Matrix &B, Matrix &C, Backend b=default_backend)
C = A * B.
Definition matrix.cpp:20
constexpr Backend default_backend
Definition policy.hpp:53
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C, Backend b=default_backend)
C = alpha*A + beta*B.
Definition matrix.cpp:68
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64, idx reg_size=4)
C = A * B (register-blocked)
Definition matrix.cpp:98
Dense vector storage and operations.