numerics
Loading...
Searching...
No Matches
matrix.cpp
Go to the documentation of this file.
1/// @file core/matrix.cpp
2/// @brief Matrix constructors, GPU lifecycle, and backend dispatch for matrix ops.
3///
4/// Adding a new backend:
5/// 1. Add the enumerator to enum class Backend in include/core/policy.hpp
6/// 2. Create src/core/backends/<name>/ with impl.hpp and matrix.cpp
7/// 3. Add `case Backend::<name>:` to each switch below
8/// 4. Register the .cpp in cmake/sources.cmake
9
10#include "core/matrix.hpp"
12#include <algorithm>
13
14#include "backends/seq/impl.hpp"
16#include "backends/omp/impl.hpp"
17#include "backends/gpu/impl.hpp"
19
20namespace num {
21
23 : rows_(rows), cols_(cols), data_(new real[rows * cols]()) {}
24
26 : rows_(rows), cols_(cols), data_(new real[rows * cols]) {
27 std::fill_n(data_.get(), size(), val);
28}
29
31 if (d_data_) cuda::free(d_data_);
32}
33
35 : rows_(o.rows_), cols_(o.cols_), data_(new real[o.size()]) {
36 std::copy_n(o.data_.get(), size(), data_.get());
37}
38
40 : rows_(o.rows_), cols_(o.cols_), data_(std::move(o.data_)), d_data_(o.d_data_) {
41 o.rows_ = o.cols_ = 0;
42 o.d_data_ = nullptr;
43}
44
46 if (this != &o) {
47 rows_ = o.rows_;
48 cols_ = o.cols_;
49 data_.reset(new real[size()]);
50 std::copy_n(o.data_.get(), size(), data_.get());
51 }
52 return *this;
53}
54
56 if (this != &o) {
57 if (d_data_) cuda::free(d_data_);
58 rows_ = o.rows_;
59 cols_ = o.cols_;
60 data_ = std::move(o.data_);
61 d_data_ = o.d_data_;
62 o.rows_ = o.cols_ = 0;
63 o.d_data_ = nullptr;
64 }
65 return *this;
66}
67
69 if (!d_data_) {
70 d_data_ = cuda::alloc(size());
71 cuda::to_device(d_data_, data_.get(), size());
72 }
73}
74
76 if (d_data_) {
77 cuda::to_host(data_.get(), d_data_, size());
78 cuda::free(d_data_);
79 d_data_ = nullptr;
80 }
81}
82
83void matmul(const Matrix& A, const Matrix& B, Matrix& C, Backend b) {
84 switch (b) {
85 case Backend::seq: backends::seq::matmul(A, B, C); break;
87 case Backend::simd: backends::simd::matmul(A, B, C, 64); break;
89 case Backend::omp: backends::omp::matmul(A, B, C); break;
90 case Backend::gpu: backends::gpu::matmul(A, B, C); break;
91 }
92}
93
94void matvec(const Matrix& A, const Vector& x, Vector& y, Backend b) {
95 switch (b) {
96 case Backend::seq: backends::seq::matvec(A, x, y); break;
97 case Backend::blocked: backends::seq::matvec(A, x, y); break;
98 case Backend::simd: backends::simd::matvec(A, x, y); break;
99 case Backend::blas: backends::blas::matvec(A, x, y); break;
100 case Backend::omp: backends::omp::matvec(A, x, y); break;
101 case Backend::gpu: backends::gpu::matvec(A, x, y); break;
102 }
103}
104
105void matadd(real alpha, const Matrix& A, real beta, const Matrix& B, Matrix& C,
106 Backend b) {
107 switch (b) {
108 case Backend::seq:
109 case Backend::blocked:
112 case Backend::omp: backends::omp::matadd(alpha, A, beta, B, C); break;
113 case Backend::gpu: backends::seq::matadd(alpha, A, beta, B, C); break;
114 }
115}
116
120
125
129
130void matvec_simd(const Matrix& A, const Vector& x, Vector& y) {
132}
133
134} // namespace num
Dense row-major matrix with optional GPU storage.
Definition matrix.hpp:12
constexpr idx size() const noexcept
Definition matrix.hpp:26
Matrix & operator=(const Matrix &)
Definition matrix.cpp:45
void to_cpu()
Definition matrix.cpp:75
void to_gpu()
Definition matrix.cpp:68
CUDA kernel wrappers.
Matrix operations.
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:35
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:64
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:50
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:13
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:22
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:42
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:56
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:14
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size, idx reg_size)
Definition matrix.cpp:107
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
Definition matrix.cpp:14
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:24
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size)
Definition matrix.cpp:73
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
Definition matrix.cpp:32
void matvec(const Matrix &A, const Vector &x, Vector &y)
Definition matrix.cpp:226
void matmul(const Matrix &A, const Matrix &B, Matrix &C, idx block_size)
Definition matrix.cpp:216
void to_device(real *dst, const real *src, idx n)
Copy host to device.
void free(real *ptr)
Free device memory.
real * alloc(idx n)
Allocate device memory.
void to_host(real *dst, const real *src, idx n)
Copy device to host.
void matmul_simd(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (SIMD-accelerated)
Definition matrix.cpp:126
double real
Definition types.hpp:10
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
@ gpu
CUDA – custom kernels or cuBLAS.
@ omp
OpenMP parallel blocked loops.
@ blocked
Cache-blocked; compiler auto-vectorizes inner loops.
@ simd
Hand-written SIMD intrinsics (AVX2 or NEON)
@ blas
cblas/LAPACKE – OpenBLAS, MKL, Apple Accelerate
@ seq
Naive textbook loops – always available.
void matvec_simd(const Matrix &A, const Vector &x, Vector &y)
y = A * x (SIMD-accelerated)
Definition matrix.cpp:130
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:242
constexpr T ipow(T x) noexcept
Compute x^N at compile time via repeated squaring.
std::size_t idx
Definition types.hpp:11
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:94
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64)
C = A * B (cache-blocked)
Definition matrix.cpp:117
void matmul(const Matrix &A, const Matrix &B, Matrix &C, Backend b=default_backend)
C = A * B.
Definition matrix.cpp:83
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C, Backend b=default_backend)
C = alpha*A + beta*B.
Definition matrix.cpp:105
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size=64, idx reg_size=4)
C = A * B (register-blocked)
Definition matrix.cpp:121
Private declarations for the BLAS backend. Only included by src/core/vector.cpp and src/core/matrix....
Private declarations for the GPU (CUDA) backend. Only included by src/core/vector....
Private declarations for the SIMD backend. Only included by src/core/vector.cpp and src/core/matrix....