numerics
Loading...
Searching...
No Matches
ns_solver.cpp
Go to the documentation of this file.
1/// @file ns_solver.cpp
2/// @brief 2-D incompressible Navier-Stokes -- Chorin projection, periodic MAC grid
3
4#include "ns_solver.hpp"
5#include "core/policy.hpp"
6#include "pde/diffusion.hpp"
7
8#include <cmath>
9#include <chrono>
10
11namespace ns {
12
13// Construction
14
15NSSolver::NSSolver(idx N_, real dt_, real nu_)
16 : N(N_), h(1.0 / N_), dt(dt_), nu(nu_),
17 u(N_ * N_, 0.0), v(N_ * N_, 0.0), p(N_ * N_, 0.0),
18 u_star(N_ * N_, 0.0), v_star(N_ * N_, 0.0), rhs(N_ * N_, 0.0)
19{}
20
21// Initial condition -- double shear layer
22//
23// Two horizontal shear bands centred at y = 0.25 and y = 0.75.
24// A small vertical perturbation seeds Kelvin-Helmholtz roll-up.
25// The initial field is analytically divergence-free.
26
27void NSSolver::init_shear_layer(real rho, real delta) {
28 const real two_pi = 2.0 * M_PI;
29
30 for (idx i = 0; i < N; ++i) {
31 for (idx j = 0; j < N; ++j) {
32 // u[i,j] lives at (i*h, (j+1/2)*h)
33 real y = (j + 0.5) * h;
34 u[at(i, j)] = (y <= 0.5)
35 ? std::tanh((y - 0.25) / rho)
36 : std::tanh((0.75 - y) / rho);
37
38 // v[i,j] lives at ((i+1/2)*h, j*h)
39 real x = (i + 0.5) * h;
40 v[at(i, j)] = delta * std::sin(two_pi * x);
41 }
42 }
43
44 // Zero pressure (correct for divergence-free initial data)
45 for (idx k = 0; k < N * N; ++k) p[k] = 0.0;
46}
47
48// Top-level step
49
51 auto t0 = std::chrono::steady_clock::now();
52
53 advect();
54 if (nu > 0.0) apply_diffusion();
55 build_rhs();
56 solve_pressure();
57 project();
58
59 auto t1 = std::chrono::steady_clock::now();
60 stats.total_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
61}
62
63// Semi-Lagrangian advection
64//
65// For each MAC face, trace a particle backwards in time and interpolate
66// the velocity from the current field. Unconditionally stable for any dt.
67
68void NSSolver::advect() {
69 auto t0 = std::chrono::steady_clock::now();
70
71 // u[i,j] at (i*h, (j+1/2)*h)
72 // Surrounding v-faces for the y-velocity at this point:
73 // v[i-1,j], v[i,j], v[i-1,j+1], v[i,j+1]
74#pragma omp parallel for schedule(static) collapse(2)
75 for (idx i = 0; i < N; ++i) {
76 for (idx j = 0; j < N; ++j) {
77 real uu = u[at(i, j)];
78 real vu = 0.25 * (v[at(wm1(i), j )] + v[at(i, j )] +
79 v[at(wm1(i), wp1(j))] + v[at(i, wp1(j))]);
80
81 real xb = i * h - dt * uu;
82 real yb = (j + 0.5) * h - dt * vu;
83 u_star[at(i, j)] = interp_u(xb, yb);
84 }
85 }
86
87 // v[i,j] at ((i+1/2)*h, j*h)
88 // Surrounding u-faces for the x-velocity at this point:
89 // u[i,j-1], u[i+1,j-1], u[i,j], u[i+1,j]
90#pragma omp parallel for schedule(static) collapse(2)
91 for (idx i = 0; i < N; ++i) {
92 for (idx j = 0; j < N; ++j) {
93 real vv = v[at(i, j)];
94 real uv = 0.25 * (u[at(i, wm1(j))] + u[at(wp1(i), wm1(j))] +
95 u[at(i, j )] + u[at(wp1(i), j )]);
96
97 real xb = (i + 0.5) * h - dt * uv;
98 real yb = j * h - dt * vv;
99 v_star[at(i, j)] = interp_v(xb, yb);
100 }
101 }
102
103 auto t1 = std::chrono::steady_clock::now();
104 stats.advect_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
105}
106
107// Explicit viscosity (forward Euler, stable only when dt <= h^2/(4nu))
108
109void NSSolver::apply_diffusion() {
110 const double c = dt * nu / (h * h);
111 const int n = static_cast<int>(N);
112 num::pde::diffusion_step_2d(u_star, n, c);
113 num::pde::diffusion_step_2d(v_star, n, c);
114}
115
116// RHS: rhs = -div(u*)/dt
117//
118// Discrete divergence on the MAC grid:
119// div(u*)[i,j] = (u*[i+1,j] - u*[i,j]) / h + (v*[i,j+1] - v*[i,j]) / h
120//
121// We form the positive-definite system (-Delta)p = -div(u*)/dt so that CG works.
122
123void NSSolver::build_rhs() {
124 const real scale = -1.0 / (h * dt); // -1/(h*dt) so rhs = -div/dt
125
126#pragma omp parallel for schedule(static) collapse(2)
127 for (idx i = 0; i < N; ++i) {
128 for (idx j = 0; j < N; ++j) {
129 real div_ij = u_star[at(wp1(i), j)] - u_star[at(i, j)]
130 + v_star[at(i, wp1(j))] - v_star[at(i, j)];
131 rhs[at(i, j)] = scale * div_ij;
132 }
133 }
134
135 // Remove mean (periodic Poisson is singular; mean(div) = 0 analytically,
136 // but floating-point errors accumulate -- subtracting keeps CG well-posed).
137 real sum = 0.0;
138 for (idx k = 0; k < N * N; ++k) sum += rhs[k];
139 real mean = sum / static_cast<real>(N * N);
140 for (idx k = 0; k < N * N; ++k) rhs[k] -= mean;
141}
142
143// Pressure solve: (-Delta)p = rhs via CG
144//
145// (-Delta)p[i,j] = (4p[i,j] - p[i+/-1,j] - p[i,j+/-1]) / h^2
146//
147// The operator is positive semi-definite (null space = constants).
148// We initialise p = previous p (warm start) and subtract the mean after.
149
150void NSSolver::solve_pressure() {
151 auto t0 = std::chrono::steady_clock::now();
152
153 const real inv_h2 = 1.0 / (h * h);
154 const int n = static_cast<int>(N);
155
156 // Matrix-free negative Laplacian via laplacian_stencil_2d_periodic (boundary-peeled,
157 // auto-vectorisable inner loop) then negate and scale.
158 auto neg_lap = [&](const num::Vector& pin, num::Vector& out) {
160 num::scale(out, -inv_h2);
161 };
162
163 auto result = cg_omp(neg_lap, rhs, p, /*tol=*/1e-3, /*max_iter=*/100);
164 stats.cg_iters = result.iterations;
165 stats.cg_residual = result.residual;
166
167 // Remove mean from pressure (physics is unchanged; improves stability)
168 real sum = 0.0;
169 for (idx k = 0; k < N * N; ++k) sum += p[k];
170 real mean = sum / static_cast<real>(N * N);
171 for (idx k = 0; k < N * N; ++k) p[k] -= mean;
172
173 auto t1 = std::chrono::steady_clock::now();
174 stats.pressure_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
175}
176
177// Projection: u = u* - dt*gradp
178//
179// Gradient at MAC face (i,j):
180// (d_p/d_x) at u-face = (p[i,j] - p[i-1,j]) / h
181// (d_p/d_y) at v-face = (p[i,j] - p[i,j-1]) / h
182
183void NSSolver::project() {
184 auto t0 = std::chrono::steady_clock::now();
185
186 const real c = dt / h;
187
188#pragma omp parallel for schedule(static) collapse(2)
189 for (idx i = 0; i < N; ++i) {
190 for (idx j = 0; j < N; ++j) {
191 u[at(i, j)] = u_star[at(i, j)] - c * (p[at(i, j)] - p[at(wm1(i), j)]);
192 v[at(i, j)] = v_star[at(i, j)] - c * (p[at(i, j)] - p[at(i, wm1(j))]);
193 }
194 }
195
196 auto t1 = std::chrono::steady_clock::now();
197 stats.project_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();
198}
199
200// CG (runs on zero-mean subspace, tolerates singular periodic Laplacian)
201//
202// Vector ops use num::best_backend -- fastest available policy selected at compile time
203// via if constexpr: blas (Accelerate/OpenBLAS/MKL) > omp > blocked.
204// BLAS wins for cache-resident vectors (N <= 512, ~512 KB) on all platforms.
205
206num::SolverResult NSSolver::cg_omp(
207 std::function<void(const num::Vector&, num::Vector&)> matvec,
208 const num::Vector& b, num::Vector& x,
209 real tol, idx max_iter)
210{
211 const idx n = b.size();
212 num::Vector Ap(n, 0.0), r(n, 0.0), pvec(n, 0.0);
213
214 // r = b - A*x
215 matvec(x, Ap);
216 r = b;
217 num::axpy(-1.0, Ap, r, num::best_backend);
218
219 // p = r
220 pvec = r;
221
222 real rsold = num::dot(r, r, num::best_backend);
223
224 for (idx iter = 0; iter < max_iter; ++iter) {
225 matvec(pvec, Ap);
226
227 real denom = num::dot(pvec, Ap, num::best_backend);
228 if (std::abs(denom) < 1e-15) break;
229
230 real alpha = rsold / denom;
231 num::axpy( alpha, pvec, x, num::best_backend); // x += alpha*p
232 num::axpy(-alpha, Ap, r, num::best_backend); // r -= alpha*Ap
233
234 real rsnew = num::dot(r, r, num::best_backend);
235 real res = std::sqrt(rsnew);
236
237 if (res < tol)
238 return {iter + 1, res, true};
239
240 real beta = rsnew / rsold;
241 num::scale(pvec, beta, num::best_backend); // p *= beta
242 num::axpy(1.0, r, pvec, num::best_backend); // p += r
243 rsold = rsnew;
244 }
245
246 return {max_iter, std::sqrt(rsold), false};
247}
248
249// Bilinear interpolation helpers (periodic)
250//
251// u[k,l] lives at (k*h, (l+1/2)*h) -> x stagger: none, y stagger: +1/2
252// v[k,l] lives at ((k+1/2)*h, l*h) -> x stagger: +1/2, y stagger: none
253
254real NSSolver::interp_u(real px, real py) const {
255 // Convert to u-grid continuous indices, wrap to [0, N)
256 real fx = std::fmod(px / h, static_cast<real>(N));
257 real fy = std::fmod(py / h - 0.5, static_cast<real>(N));
258 if (fx < 0.0) fx += N;
259 if (fy < 0.0) fy += N;
260
261 idx i0 = static_cast<idx>(fx) % N; idx i1 = wp1(i0);
262 real fi = fx - std::floor(fx);
263
264 idx j0 = static_cast<idx>(fy) % N; idx j1 = wp1(j0);
265 real fj = fy - std::floor(fy);
266
267 return (1-fi)*(1-fj)*u[at(i0,j0)] + fi*(1-fj)*u[at(i1,j0)]
268 + (1-fi)*fj *u[at(i0,j1)] + fi*fj *u[at(i1,j1)];
269}
270
271real NSSolver::interp_v(real px, real py) const {
272 real fx = std::fmod(px / h - 0.5, static_cast<real>(N));
273 real fy = std::fmod(py / h, static_cast<real>(N));
274 if (fx < 0.0) fx += N;
275 if (fy < 0.0) fy += N;
276
277 idx i0 = static_cast<idx>(fx) % N; idx i1 = wp1(i0);
278 real fi = fx - std::floor(fx);
279
280 idx j0 = static_cast<idx>(fy) % N; idx j1 = wp1(j0);
281 real fj = fy - std::floor(fy);
282
283 return (1-fi)*(1-fj)*v[at(i0,j0)] + fi*(1-fj)*v[at(i1,j0)]
284 + (1-fi)*fj *v[at(i0,j1)] + fi*fj *v[at(i1,j1)];
285}
286
287// Diagnostics
288
289real NSSolver::vorticity(idx i, idx j) const {
290 // omega = d_v/d_x - d_u/d_y at corner (i*h, j*h)
291 // v[i,j] at ((i+1/2)h, j*h), v[i-1,j] at ((i-1/2)h, j*h)
292 // u[i,j] at (i*h, (j+1/2)h), u[i,j-1] at (i*h, (j-1/2)h)
293 real dvdx = (v[at(i, j)] - v[at(wm1(i), j)]) / h;
294 real dudy = (u[at(i, j)] - u[at(i, wm1(j))]) / h;
295 return dvdx - dudy;
296}
297
298real NSSolver::speed(idx i, idx j) const {
299 // Average faces to cell centre (i+1/2, j+1/2)*h
300 real uc = 0.5 * (u[at(i, j)] + u[at(wp1(i), j)]);
301 real vc = 0.5 * (v[at(i, j)] + v[at(i, wp1(j))]);
302 return std::sqrt(uc * uc + vc * vc);
303}
304
305} // namespace ns
num::Vector p
velocity faces + cell-centre pressure, N*N each
Definition ns_solver.hpp:72
NSSolver(idx N_, real dt_, real nu_=0.0)
Definition ns_solver.cpp:15
const real h
Definition ns_solver.hpp:69
const real nu
Definition ns_solver.hpp:69
num::Vector u
Definition ns_solver.hpp:72
real vorticity(idx i, idx j) const
Vorticity omega = d_v/d_x - d_u/d_y at grid corner (i*h, j*h).
real interp_u(real px, real py) const
Interpolate x-velocity at physical point (px, py).
const idx N
Definition ns_solver.hpp:68
real interp_v(real px, real py) const
Interpolate y-velocity at physical point (px, py).
void init_shear_layer(real rho=0.05, real delta=0.05)
Definition ns_solver.cpp:27
real speed(idx i, idx j) const
Velocity magnitude averaged to cell centre (i,j).
const real dt
Definition ns_solver.hpp:69
void step()
Advance one time step (advect -> pressure -> project).
Definition ns_solver.cpp:50
num::Vector v
Definition ns_solver.hpp:72
constexpr idx size() const noexcept
Definition vector.hpp:77
Explicit Euler diffusion steps for 2D uniform grids.
void scale(real *v, idx n, real alpha)
v = alpha * v
void diffusion_step_2d(Vector &u, int N, double coeff, Backend b=best_backend)
Definition diffusion.hpp:30
double real
Definition types.hpp:10
constexpr Backend best_backend
Best backend for memory-bound vector ops: blas > omp > blocked.
Definition policy.hpp:65
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:242
std::size_t idx
Definition types.hpp:11
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:94
void scale(Vector &v, real alpha, Backend b=default_backend)
v *= alpha
Definition vector.cpp:27
real dot(const Vector &x, const Vector &y, Backend b=default_backend)
dot product
Definition vector.cpp:57
void laplacian_stencil_2d_periodic(const BasicVector< T > &x, BasicVector< T > &y, int N)
Definition stencil.hpp:52
void axpy(real alpha, const Vector &x, Vector &y, Backend b=default_backend)
y += alpha * x
Definition vector.cpp:46
2-D incompressible Navier-Stokes, periodic MAC grid
Backend enum for linear algebra operations.
double total_ms
Definition ns_solver.hpp:37
double advect_ms
Definition ns_solver.hpp:34
double project_ms
Definition ns_solver.hpp:36
real cg_residual
Definition ns_solver.hpp:33
double pressure_ms
Definition ns_solver.hpp:35