82 std::fill_n(C.
data(), M * N,
real(0));
84 for (
idx ii = 0; ii < M; ii += block_size) {
85 const idx i_end = std::min(ii + block_size, M);
86 for (
idx jj = 0; jj < N; jj += block_size) {
87 const idx j_end = std::min(jj + block_size, N);
88 for (
idx kk = 0; kk < K; kk += block_size) {
89 const idx k_end = std::min(kk + block_size, K);
90 for (
idx i = ii; i < i_end; ++i) {
91 for (
idx k = kk; k < k_end; ++k) {
92 const real a_ik = A(i, k);
93 for (
idx j = jj; j < j_end; ++j)
94 C(i, j) += a_ik * B(k, j);
120 std::fill_n(C.
data(), M * N,
real(0));
122 for (
idx ii = 0; ii < M; ii += block_size) {
123 const idx i_lim = std::min(ii + block_size, M);
124 for (
idx jj = 0; jj < N; jj += block_size) {
125 const idx j_lim = std::min(jj + block_size, N);
126 for (
idx kk = 0; kk < K; kk += block_size) {
127 const idx k_lim = std::min(kk + block_size, K);
128 for (
idx ir = ii; ir < i_lim; ir += reg_size) {
129 const idx ri = std::min(ir + reg_size, i_lim);
130 for (
idx jr = jj; jr < j_lim; jr += reg_size) {
131 const idx rj = std::min(jr + reg_size, j_lim);
133 for (
idx i = ir; i < ri; ++i)
134 for (
idx j = jr; j < rj; ++j)
135 c[i - ir][j - jr] = C(i, j);
136 for (
idx k = kk; k < k_lim; ++k) {
137 for (
idx i = ir; i < ri; ++i) {
138 const real a_ik = A(i, k);
139 for (
idx j = jr; j < rj; ++j)
140 c[i - ir][j - jr] += a_ik * B(k, j);
143 for (
idx i = ir; i < ri; ++i)
144 for (
idx j = jr; j < rj; ++j)
145 C(i, j) = c[i - ir][j - jr];