numerics 0.1.0
Loading...
Searching...
No Matches
matrix.cpp
Go to the documentation of this file.
1/// @file core/matrix.cpp
2/// @brief Matrix constructors, GPU lifecycle, and backend dispatch for matrix
3/// ops.
4///
5/// Adding a new backend:
6/// 1. Add the enumerator to enum class Backend in include/core/policy.hpp
7/// 2. Create src/core/backends/<name>/ with impl.hpp and matrix.cpp
8/// 3. Add `case Backend::<name>:` to each switch below
9/// 4. Register the .cpp in cmake/sources.cmake
10
11#include "core/matrix.hpp"
13#include <algorithm>
14
15#include "backends/seq/impl.hpp"
17#include "backends/omp/impl.hpp"
18#include "backends/gpu/impl.hpp"
20
21namespace num {
22
24 : rows_(rows)
25 , cols_(cols)
26 , data_(new real[rows * cols]()) {}
27
28Matrix::Matrix(idx rows, idx cols, real val)
29 : rows_(rows)
30 , cols_(cols)
31 , data_(new real[rows * cols]) {
32 std::fill_n(data_.get(), size(), val);
33}
34
36 if (d_data_)
37 cuda::free(d_data_);
38}
39
41 : rows_(o.rows_)
42 , cols_(o.cols_)
43 , data_(new real[o.size()]) {
44 std::copy_n(o.data_.get(), size(), data_.get());
45}
46
48 : rows_(o.rows_)
49 , cols_(o.cols_)
50 , data_(std::move(o.data_))
51 , d_data_(o.d_data_) {
52 o.rows_ = o.cols_ = 0;
53 o.d_data_ = nullptr;
54}
55
57 if (this != &o) {
58 rows_ = o.rows_;
59 cols_ = o.cols_;
60 data_.reset(new real[size()]);
61 std::copy_n(o.data_.get(), size(), data_.get());
62 }
63 return *this;
64}
65
67 if (this != &o) {
68 if (d_data_)
69 cuda::free(d_data_);
70 rows_ = o.rows_;
71 cols_ = o.cols_;
72 data_ = std::move(o.data_);
73 d_data_ = o.d_data_;
74 o.rows_ = o.cols_ = 0;
75 o.d_data_ = nullptr;
76 }
77 return *this;
78}
79
81 if (!d_data_) {
82 d_data_ = cuda::alloc(size());
83 cuda::to_device(d_data_, data_.get(), size());
84 }
85}
86
88 if (d_data_) {
89 cuda::to_host(data_.get(), d_data_, size());
90 cuda::free(d_data_);
91 d_data_ = nullptr;
92 }
93}
94
95void matmul(const Matrix& A, const Matrix& B, Matrix& C, Backend b) {
96 switch (b) {
97 case Backend::seq:
98 backends::seq::matmul(A, B, C);
99 break;
100 case Backend::blocked:
102 break;
103 case Backend::simd:
104 backends::simd::matmul(A, B, C, 64);
105 break;
106 case Backend::lapack:
107 [[fallthrough]]; // no LAPACK matmul; use BLAS
108 case Backend::blas:
109 backends::blas::matmul(A, B, C);
110 break;
111 case Backend::omp:
112 backends::omp::matmul(A, B, C);
113 break;
114 case Backend::gpu:
115 backends::gpu::matmul(A, B, C);
116 break;
117 }
118}
119
120void matvec(const Matrix& A, const Vector& x, Vector& y, Backend b) {
121 switch (b) {
122 case Backend::seq:
123 backends::seq::matvec(A, x, y);
124 break;
125 case Backend::blocked:
126 backends::seq::matvec(A, x, y);
127 break;
128 case Backend::simd:
129 backends::simd::matvec(A, x, y);
130 break;
131 case Backend::lapack:
132 [[fallthrough]]; // no LAPACK matvec; use BLAS
133 case Backend::blas:
134 backends::blas::matvec(A, x, y);
135 break;
136 case Backend::omp:
137 backends::omp::matvec(A, x, y);
138 break;
139 case Backend::gpu:
140 backends::gpu::matvec(A, x, y);
141 break;
142 }
143}
144
145void matadd(real alpha,
146 const Matrix& A,
147 real beta,
148 const Matrix& B,
149 Matrix& C,
150 Backend b) {
151 switch (b) {
152 case Backend::seq:
153 case Backend::blocked:
154 case Backend::simd:
155 backends::seq::matadd(alpha, A, beta, B, C);
156 break;
157 case Backend::lapack:
158 [[fallthrough]]; // no LAPACK matadd; use BLAS
159 case Backend::blas:
160 backends::blas::matadd(alpha, A, beta, B, C);
161 break;
162 case Backend::omp:
163 backends::omp::matadd(alpha, A, beta, B, C);
164 break;
165 case Backend::gpu:
166 backends::seq::matadd(alpha, A, beta, B, C);
167 break;
168 }
169}
170
171void matmul_blocked(const Matrix& A,
172 const Matrix& B,
173 Matrix& C,
174 idx block_size) {
175 backends::seq::matmul_blocked(A, B, C, block_size);
176}
177
179 const Matrix& B,
180 Matrix& C,
181 idx block_size,
182 idx reg_size) {
183 backends::seq::matmul_register_blocked(A, B, C, block_size, reg_size);
184}
185
186void matmul_simd(const Matrix& A, const Matrix& B, Matrix& C, idx block_size) {
187 backends::simd::matmul(A, B, C, block_size);
188}
189
190void matvec_simd(const Matrix& A, const Vector& x, Vector& y) {
191 backends::simd::matvec(A, x, y);
192}
193
194} // namespace num
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12
constexpr idx size() const noexcept
Definition matrix.hpp:26
Matrix & operator=(const Matrix &)
Definition matrix.cpp:56
void to_cpu()
Definition matrix.cpp:87
void to_gpu()
Definition matrix.cpp:80
Private declarations for the BLAS backend. Only included by src/core/vector.cpp and src/core/matrix....
Private declarations for the GPU (CUDA) backend. Only included by src/core/vector....
Private declarations for the SIMD backend. Only included by src/core/vector.cpp and src/core/matrix....
CUDA kernel wrappers.
Matrix operations.
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:37
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:79
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:59
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:13
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:26
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:42
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:56
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:14
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size, idx reg_size)
Definition matrix.cpp:114
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:14
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:24
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size)
Definition matrix.cpp:77
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:32
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:258
void matmul(const Matrix &A, const Matrix &B, Matrix &C, idx block_size)
Definition matrix.cpp:248
void to_device(real *dst, const real *src, idx n)
Copy host to device.
void free(real *ptr)
Free device memory.
real * alloc(idx n)
Allocate device memory.
void to_host(real *dst, const real *src, idx n)
Copy device to host.
void matmul_simd(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (SIMD-accelerated)
Definition matrix.cpp:186
double real
Definition types.hpp:10
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
@ gpu
CUDA – custom kernels or cuBLAS.
@ omp
OpenMP parallel blocked loops.
@ blocked
Cache-blocked; compiler auto-vectorizes inner loops.
@ simd
Hand-written SIMD intrinsics (AVX2 or NEON)
@ blas
cblas – OpenBLAS, MKL, Apple Accelerate (Level-1/2/3)
@ lapack
LAPACKE – industry-standard factorizations, SVD, eigen.
@ seq
Naive textbook loops – always available.
void matvec_simd(const Matrix &A, const Vector &x, Vector &y)
y = A * x (SIMD-accelerated)
Definition matrix.cpp:190
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:248
std::size_t idx
Definition types.hpp:11
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:120
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (cache-blocked)
Definition matrix.cpp:171
void matmul(const Matrix &A, const Matrix &B, Matrix &C, Backend b=default_backend)
C = A * B.
Definition matrix.cpp:95
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C, Backend b=default_backend)
C = alpha*A + beta*B.
Definition matrix.cpp:145
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64, idx reg_size=4)
C = A * B (register-blocked)
Definition matrix.cpp:178