9#include "../seq/impl.hpp"
15#ifdef NUMERICS_HAS_OMP
16 constexpr idx BS = 64;
18 std::fill_n(C.
data(), M * N,
real(0));
20 #pragma omp parallel for schedule(dynamic) collapse(2)
21 for (
idx ii = 0; ii < M; ii += BS) {
22 for (
idx jj = 0; jj < N; jj += BS) {
23 const idx i_lim = std::min(ii + BS, M);
24 const idx j_lim = std::min(jj + BS, N);
25 for (
idx kk = 0; kk < K; kk += BS) {
26 const idx k_lim = std::min(kk + BS, K);
27 for (
idx i = ii; i < i_lim; ++i) {
28 for (
idx k = kk; k < k_lim; ++k) {
29 const real a_ik = A(i, k);
30 for (
idx j = jj; j < j_lim; ++j)
31 C(i, j) += a_ik * B(k, j);
43#ifdef NUMERICS_HAS_OMP
44 #pragma omp parallel for schedule(static)
45 for (
idx i = 0; i < A.
rows(); ++i) {
47 for (
idx j = 0; j < A.
cols(); ++j)
48 sum += A(i, j) * x[j];
61#ifdef NUMERICS_HAS_OMP
63 #pragma omp parallel for schedule(static)
64 for (
idx i = 0; i < n; ++i)
Dense row-major matrix with optional GPU storage.
constexpr idx size() const noexcept
constexpr idx rows() const noexcept
constexpr idx cols() const noexcept
void matvec(const Matrix &A, const Vector &x, Vector &y)
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
void matvec(const Matrix &A, const Vector &x, Vector &y)
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
real beta(real a, real b)
B(a, b) – beta function.