16 : N(N_), h(1.0 / N_), dt(dt_), nu(nu_),
17 u(N_ * N_, 0.0), v(N_ * N_, 0.0), p(N_ * N_, 0.0),
18 u_star(N_ * N_, 0.0), v_star(N_ * N_, 0.0), rhs(N_ * N_, 0.0)
28 const real two_pi = 2.0 * M_PI;
30 for (idx i = 0; i <
N; ++i) {
31 for (idx j = 0; j <
N; ++j) {
33 real y = (j + 0.5) *
h;
34 u[at(i, j)] = (y <= 0.5)
35 ? std::tanh((y - 0.25) / rho)
36 : std::tanh((0.75 - y) / rho);
39 real x = (i + 0.5) *
h;
40 v[at(i, j)] = delta * std::sin(two_pi * x);
45 for (idx k = 0; k <
N *
N; ++k)
p[k] = 0.0;
51 auto t0 = std::chrono::steady_clock::now();
54 if (
nu > 0.0) apply_diffusion();
59 auto t1 = std::chrono::steady_clock::now();
60 stats.
total_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
68void NSSolver::advect() {
69 auto t0 = std::chrono::steady_clock::now();
74#pragma omp parallel for schedule(static) collapse(2)
75 for (idx i = 0; i <
N; ++i) {
76 for (idx j = 0; j <
N; ++j) {
77 real uu =
u[at(i, j)];
78 real vu = 0.25 * (
v[at(wm1(i), j )] +
v[at(i, j )] +
79 v[at(wm1(i), wp1(j))] +
v[at(i, wp1(j))]);
81 real xb = i *
h -
dt * uu;
82 real yb = (j + 0.5) *
h -
dt * vu;
90#pragma omp parallel for schedule(static) collapse(2)
91 for (idx i = 0; i <
N; ++i) {
92 for (idx j = 0; j <
N; ++j) {
93 real vv =
v[at(i, j)];
94 real uv = 0.25 * (
u[at(i, wm1(j))] +
u[at(wp1(i), wm1(j))] +
95 u[at(i, j )] +
u[at(wp1(i), j )]);
97 real xb = (i + 0.5) *
h -
dt * uv;
103 auto t1 = std::chrono::steady_clock::now();
104 stats.
advect_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
109void NSSolver::apply_diffusion() {
110 const double c =
dt *
nu / (
h *
h);
111 const int n =
static_cast<int>(
N);
123void NSSolver::build_rhs() {
126#pragma omp parallel for schedule(static) collapse(2)
127 for (idx i = 0; i <
N; ++i) {
128 for (idx j = 0; j <
N; ++j) {
129 real div_ij = u_star[at(wp1(i), j)] - u_star[at(i, j)]
130 + v_star[at(i, wp1(j))] - v_star[at(i, j)];
131 rhs[at(i, j)] =
scale * div_ij;
138 for (idx k = 0; k <
N *
N; ++k) sum += rhs[k];
140 for (idx k = 0; k <
N *
N; ++k) rhs[k] -= mean;
150void NSSolver::solve_pressure() {
151 auto t0 = std::chrono::steady_clock::now();
153 const real inv_h2 = 1.0 / (
h *
h);
154 const int n =
static_cast<int>(
N);
163 auto result = cg_omp(neg_lap, rhs,
p, 1e-3, 100);
169 for (idx k = 0; k <
N *
N; ++k) sum +=
p[k];
171 for (idx k = 0; k <
N *
N; ++k)
p[k] -= mean;
173 auto t1 = std::chrono::steady_clock::now();
174 stats.
pressure_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
183void NSSolver::project() {
184 auto t0 = std::chrono::steady_clock::now();
188#pragma omp parallel for schedule(static) collapse(2)
189 for (idx i = 0; i <
N; ++i) {
190 for (idx j = 0; j <
N; ++j) {
191 u[at(i, j)] = u_star[at(i, j)] - c * (
p[at(i, j)] -
p[at(wm1(i), j)]);
192 v[at(i, j)] = v_star[at(i, j)] - c * (
p[at(i, j)] -
p[at(i, wm1(j))]);
196 auto t1 = std::chrono::steady_clock::now();
197 stats.
project_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
209 real tol, idx max_iter)
224 for (idx iter = 0; iter < max_iter; ++iter) {
228 if (std::abs(denom) < 1e-15)
break;
230 real alpha = rsold / denom;
235 real res = std::sqrt(rsnew);
238 return {iter + 1, res,
true};
246 return {max_iter, std::sqrt(rsold),
false};
256 real fx = std::fmod(px /
h,
static_cast<real
>(
N));
257 real fy = std::fmod(py /
h - 0.5,
static_cast<real
>(
N));
258 if (fx < 0.0) fx +=
N;
259 if (fy < 0.0) fy +=
N;
261 idx i0 =
static_cast<idx
>(fx) %
N; idx i1 = wp1(i0);
262 real fi = fx - std::floor(fx);
264 idx j0 =
static_cast<idx
>(fy) %
N; idx j1 = wp1(j0);
265 real fj = fy - std::floor(fy);
267 return (1-fi)*(1-fj)*
u[at(i0,j0)] + fi*(1-fj)*
u[at(i1,j0)]
268 + (1-fi)*fj *
u[at(i0,j1)] + fi*fj *
u[at(i1,j1)];
272 real fx = std::fmod(px /
h - 0.5,
static_cast<real
>(
N));
273 real fy = std::fmod(py /
h,
static_cast<real
>(
N));
274 if (fx < 0.0) fx +=
N;
275 if (fy < 0.0) fy +=
N;
277 idx i0 =
static_cast<idx
>(fx) %
N; idx i1 = wp1(i0);
278 real fi = fx - std::floor(fx);
280 idx j0 =
static_cast<idx
>(fy) %
N; idx j1 = wp1(j0);
281 real fj = fy - std::floor(fy);
283 return (1-fi)*(1-fj)*
v[at(i0,j0)] + fi*(1-fj)*
v[at(i1,j0)]
284 + (1-fi)*fj *
v[at(i0,j1)] + fi*fj *
v[at(i1,j1)];
293 real dvdx = (
v[at(i, j)] -
v[at(wm1(i), j)]) /
h;
294 real dudy = (
u[at(i, j)] -
u[at(i, wm1(j))]) /
h;
300 real uc = 0.5 * (
u[at(i, j)] +
u[at(wp1(i), j)]);
301 real vc = 0.5 * (
v[at(i, j)] +
v[at(i, wp1(j))]);
302 return std::sqrt(uc * uc + vc * vc);
num::Vector p
velocity faces + cell-centre pressure, N*N each
NSSolver(idx N_, real dt_, real nu_=0.0)
real vorticity(idx i, idx j) const
Vorticity omega = d_v/d_x - d_u/d_y at grid corner (i*h, j*h).
real interp_u(real px, real py) const
Interpolate x-velocity at physical point (px, py).
real interp_v(real px, real py) const
Interpolate y-velocity at physical point (px, py).
void init_shear_layer(real rho=0.05, real delta=0.05)
real speed(idx i, idx j) const
Velocity magnitude averaged to cell centre (i,j).
void step()
Advance one time step (advect -> pressure -> project).
constexpr idx size() const noexcept
Explicit Euler diffusion steps for 2D uniform grids.
void scale(real *v, idx n, real alpha)
v = alpha * v
void diffusion_step_2d(Vector &u, int N, double coeff, Backend b=best_backend)
constexpr Backend best_backend
Best backend for memory-bound vector ops: blas > omp > blocked.
real beta(real a, real b)
B(a, b) – beta function.
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
void scale(Vector &v, real alpha, Backend b=default_backend)
v *= alpha
real dot(const Vector &x, const Vector &y, Backend b=default_backend)
dot product
void laplacian_stencil_2d_periodic(const BasicVector< T > &x, BasicVector< T > &y, int N)
void axpy(real alpha, const Vector &x, Vector &y, Backend b=default_backend)
y += alpha * x
2-D incompressible Navier-Stokes, periodic MAC grid
Backend enum for linear algebra operations.