15 const idx M =
A.rows(),
K =
A.cols(), N =
B.cols();
17 for (
idx j = 0;
j < N; ++
j) {
25 for (
idx i = 0;
i <
A.rows(); ++
i) {
27 for (
idx j = 0;
j <
A.cols(); ++
j)
28 y[
i] +=
A(
i,
j) * x[
j];
33 for (
idx i = 0;
i <
A.size(); ++
i)
74 const idx M =
A.rows(),
K =
A.cols(), N =
B.cols();
75 std::fill_n(
C.data(),
M * N,
real(0));
109 const idx M =
A.rows(),
K =
A.cols(), N =
B.cols();
110 std::fill_n(
C.data(),
M * N,
real(0));
Dense row-major matrix with optional GPU storage.
void matmul_register_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size, idx reg_size)
void matmul(const Matrix &A, const Matrix &B, Matrix &C)
void matvec(const Matrix &A, const Vector &x, Vector &y)
void matmul_blocked(const Matrix &A, const Matrix &B, Matrix &C, idx block_size)
void matadd(real alpha, const Matrix &A, real beta, const Matrix &B, Matrix &C)
real beta(real a, real b)
B(a, b) – beta function.
constexpr T ipow(T x) noexcept
Compute x^N at compile time via repeated squaring.