numerics/api/ns__solver_8cpp_source.html

/// @file ns_solver.cpp

/// @brief 2-D incompressible Navier-Stokes  -- Chorin projection, periodic MAC grid


#include "ns_solver.hpp"

#include "core/policy.hpp"

#include "pde/diffusion.hpp"


#include <cmath>

#include <chrono>


namespace ns {


//  Construction


NSSolver::NSSolver(idx N_, real dt_, real nu_)

    : N(N_), h(1.0 / N_), dt(dt_), nu(nu_),

      u(N_ * N_, 0.0), v(N_ * N_, 0.0), p(N_ * N_, 0.0),

      u_star(N_ * N_, 0.0), v_star(N_ * N_, 0.0), rhs(N_ * N_, 0.0)

{}


//  Initial condition  -- double shear layer

//

//  Two horizontal shear bands centred at y = 0.25 and y = 0.75.

//  A small vertical perturbation seeds Kelvin-Helmholtz roll-up.

//  The initial field is analytically divergence-free.


void NSSolver::init_shear_layer(real rho, real delta) {

    const real two_pi = 2.0 * M_PI;


    for (idx i = 0; i < N; ++i) {

        for (idx j = 0; j < N; ++j) {

            // u[i,j] lives at (i*h, (j+1/2)*h)

            real y = (j + 0.5) * h;

            u[at(i, j)] = (y <= 0.5)

                ? std::tanh((y - 0.25) / rho)

                : std::tanh((0.75 - y) / rho);


            // v[i,j] lives at ((i+1/2)*h, j*h)

            real x = (i + 0.5) * h;

            v[at(i, j)] = delta * std::sin(two_pi * x);

        }

    }


    // Zero pressure (correct for divergence-free initial data)

    for (idx k = 0; k < N * N; ++k) p[k] = 0.0;

}


//  Top-level step


void NSSolver::step() {

    auto t0 = std::chrono::steady_clock::now();


    advect();

    if (nu > 0.0) apply_diffusion();

    build_rhs();

    solve_pressure();

    project();


    auto t1 = std::chrono::steady_clock::now();

    stats.total_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();

}


//  Semi-Lagrangian advection

//

//  For each MAC face, trace a particle backwards in time and interpolate

//  the velocity from the current field.  Unconditionally stable for any dt.


void NSSolver::advect() {

    auto t0 = std::chrono::steady_clock::now();


    // u[i,j] at (i*h, (j+1/2)*h)

    // Surrounding v-faces for the y-velocity at this point:

    //   v[i-1,j], v[i,j], v[i-1,j+1], v[i,j+1]

#pragma omp parallel for schedule(static) collapse(2)

    for (idx i = 0; i < N; ++i) {

        for (idx j = 0; j < N; ++j) {

            real uu = u[at(i, j)];

            real vu = 0.25 * (v[at(wm1(i), j     )] + v[at(i, j     )] +

                              v[at(wm1(i), wp1(j))] + v[at(i, wp1(j))]);


            real xb = i * h - dt * uu;

            real yb = (j + 0.5) * h - dt * vu;

            u_star[at(i, j)] = interp_u(xb, yb);

        }

    }


    // v[i,j] at ((i+1/2)*h, j*h)

    // Surrounding u-faces for the x-velocity at this point:

    //   u[i,j-1], u[i+1,j-1], u[i,j], u[i+1,j]

#pragma omp parallel for schedule(static) collapse(2)

    for (idx i = 0; i < N; ++i) {

        for (idx j = 0; j < N; ++j) {

            real vv = v[at(i, j)];

            real uv = 0.25 * (u[at(i,     wm1(j))] + u[at(wp1(i), wm1(j))] +

                              u[at(i,     j      )] + u[at(wp1(i), j      )]);


            real xb = (i + 0.5) * h - dt * uv;

            real yb = j * h - dt * vv;

            v_star[at(i, j)] = interp_v(xb, yb);

        }

    }


    auto t1 = std::chrono::steady_clock::now();

    stats.advect_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();

}


//  Explicit viscosity (forward Euler, stable only when dt <= h^2/(4nu))


void NSSolver::apply_diffusion() {

    const double c = dt * nu / (h * h);

    const int    n = static_cast<int>(N);

    num::pde::diffusion_step_2d(u_star, n, c);

    num::pde::diffusion_step_2d(v_star, n, c);

}


//  RHS: rhs = -div(u*)/dt

//

//  Discrete divergence on the MAC grid:

//    div(u*)[i,j] = (u*[i+1,j] - u*[i,j]) / h  +  (v*[i,j+1] - v*[i,j]) / h

//

//  We form the positive-definite system (-Delta)p = -div(u*)/dt so that CG works.


void NSSolver::build_rhs() {

    const real scale = -1.0 / (h * dt);   // -1/(h*dt) so rhs = -div/dt


#pragma omp parallel for schedule(static) collapse(2)

    for (idx i = 0; i < N; ++i) {

        for (idx j = 0; j < N; ++j) {

            real div_ij = u_star[at(wp1(i), j)] - u_star[at(i, j)]

                        + v_star[at(i, wp1(j))] - v_star[at(i, j)];

            rhs[at(i, j)] = scale * div_ij;

        }

    }


    // Remove mean (periodic Poisson is singular; mean(div) = 0 analytically,

    // but floating-point errors accumulate  -- subtracting keeps CG well-posed).

    real sum = 0.0;

    for (idx k = 0; k < N * N; ++k) sum += rhs[k];

    real mean = sum / static_cast<real>(N * N);

    for (idx k = 0; k < N * N; ++k) rhs[k] -= mean;

}


//  Pressure solve: (-Delta)p = rhs   via CG

//

//  (-Delta)p[i,j] = (4p[i,j] - p[i+/-1,j] - p[i,j+/-1]) / h^2

//

//  The operator is positive semi-definite (null space = constants).

//  We initialise p = previous p (warm start) and subtract the mean after.


void NSSolver::solve_pressure() {

    auto t0 = std::chrono::steady_clock::now();


    const real inv_h2 = 1.0 / (h * h);

    const int  n      = static_cast<int>(N);


    // Matrix-free negative Laplacian via laplacian_stencil_2d_periodic (boundary-peeled,

    // auto-vectorisable inner loop) then negate and scale.

    auto neg_lap = [&](const num::Vector& pin, num::Vector& out) {

        num::laplacian_stencil_2d_periodic(pin, out, n);

        num::scale(out, -inv_h2);

    };


    auto result = cg_omp(neg_lap, rhs, p, /*tol=*/1e-3, /*max_iter=*/100);

    stats.cg_iters    = result.iterations;

    stats.cg_residual = result.residual;


    // Remove mean from pressure (physics is unchanged; improves stability)

    real sum = 0.0;

    for (idx k = 0; k < N * N; ++k) sum += p[k];

    real mean = sum / static_cast<real>(N * N);

    for (idx k = 0; k < N * N; ++k) p[k] -= mean;


    auto t1 = std::chrono::steady_clock::now();

    stats.pressure_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();

}


//  Projection: u = u* - dt*gradp

//

//  Gradient at MAC face (i,j):

//    (d_p/d_x) at u-face = (p[i,j] - p[i-1,j]) / h

//    (d_p/d_y) at v-face = (p[i,j] - p[i,j-1]) / h


void NSSolver::project() {

    auto t0 = std::chrono::steady_clock::now();


    const real c = dt / h;


#pragma omp parallel for schedule(static) collapse(2)

    for (idx i = 0; i < N; ++i) {

        for (idx j = 0; j < N; ++j) {

            u[at(i, j)] = u_star[at(i, j)] - c * (p[at(i, j)] - p[at(wm1(i), j)]);

            v[at(i, j)] = v_star[at(i, j)] - c * (p[at(i, j)] - p[at(i, wm1(j))]);

        }

    }


    auto t1 = std::chrono::steady_clock::now();

    stats.project_ms = std::chrono::duration<double, std::milli>(t1 - t0).count();

}


//  CG (runs on zero-mean subspace, tolerates singular periodic Laplacian)

//

//  Vector ops use num::best_backend  -- fastest available policy selected at compile time

//  via if constexpr: blas (Accelerate/OpenBLAS/MKL) > omp > blocked.

//  BLAS wins for cache-resident vectors (N <= 512, ~512 KB) on all platforms.


num::SolverResult NSSolver::cg_omp(

    std::function<void(const num::Vector&, num::Vector&)> matvec,

    const num::Vector& b, num::Vector& x,

    real tol, idx max_iter)

{

    const idx n = b.size();

    num::Vector Ap(n, 0.0), r(n, 0.0), pvec(n, 0.0);


    // r = b - A*x

    matvec(x, Ap);

    r = b;

    num::axpy(-1.0, Ap, r, num::best_backend);


    // p = r

    pvec = r;


    real rsold = num::dot(r, r, num::best_backend);


    for (idx iter = 0; iter < max_iter; ++iter) {

        matvec(pvec, Ap);


        real denom = num::dot(pvec, Ap, num::best_backend);

        if (std::abs(denom) < 1e-15) break;


        real alpha = rsold / denom;

        num::axpy( alpha, pvec, x,   num::best_backend);  // x  += alpha*p

        num::axpy(-alpha, Ap,   r,   num::best_backend);  // r  -= alpha*Ap


        real rsnew = num::dot(r, r, num::best_backend);

        real res   = std::sqrt(rsnew);


        if (res < tol)

            return {iter + 1, res, true};


        real beta = rsnew / rsold;

        num::scale(pvec, beta, num::best_backend);        // p  *= beta

        num::axpy(1.0, r, pvec, num::best_backend);       // p  += r

        rsold = rsnew;

    }


    return {max_iter, std::sqrt(rsold), false};

}


//  Bilinear interpolation helpers (periodic)

//

//  u[k,l] lives at (k*h, (l+1/2)*h)  -> x stagger: none, y stagger: +1/2

//  v[k,l] lives at ((k+1/2)*h, l*h)  -> x stagger: +1/2,   y stagger: none


real NSSolver::interp_u(real px, real py) const {

    // Convert to u-grid continuous indices, wrap to [0, N)

    real fx = std::fmod(px / h,          static_cast<real>(N));

    real fy = std::fmod(py / h - 0.5,    static_cast<real>(N));

    if (fx < 0.0) fx += N;

    if (fy < 0.0) fy += N;


    idx  i0 = static_cast<idx>(fx) % N;   idx i1 = wp1(i0);

    real fi = fx - std::floor(fx);


    idx  j0 = static_cast<idx>(fy) % N;   idx j1 = wp1(j0);

    real fj = fy - std::floor(fy);


    return (1-fi)*(1-fj)*u[at(i0,j0)] + fi*(1-fj)*u[at(i1,j0)]

         + (1-fi)*fj    *u[at(i0,j1)] + fi*fj    *u[at(i1,j1)];

}


real NSSolver::interp_v(real px, real py) const {

    real fx = std::fmod(px / h - 0.5,    static_cast<real>(N));

    real fy = std::fmod(py / h,          static_cast<real>(N));

    if (fx < 0.0) fx += N;

    if (fy < 0.0) fy += N;


    idx  i0 = static_cast<idx>(fx) % N;   idx i1 = wp1(i0);

    real fi = fx - std::floor(fx);


    idx  j0 = static_cast<idx>(fy) % N;   idx j1 = wp1(j0);

    real fj = fy - std::floor(fy);


    return (1-fi)*(1-fj)*v[at(i0,j0)] + fi*(1-fj)*v[at(i1,j0)]

         + (1-fi)*fj    *v[at(i0,j1)] + fi*fj    *v[at(i1,j1)];

}


//  Diagnostics


real NSSolver::vorticity(idx i, idx j) const {

    // omega = d_v/d_x - d_u/d_y  at corner (i*h, j*h)

    // v[i,j] at ((i+1/2)h, j*h),  v[i-1,j] at ((i-1/2)h, j*h)

    // u[i,j] at (i*h, (j+1/2)h),  u[i,j-1] at (i*h, (j-1/2)h)

    real dvdx = (v[at(i, j)] - v[at(wm1(i), j)]) / h;

    real dudy = (u[at(i, j)] - u[at(i, wm1(j))]) / h;

    return dvdx - dudy;

}


real NSSolver::speed(idx i, idx j) const {

    // Average faces to cell centre (i+1/2, j+1/2)*h

    real uc = 0.5 * (u[at(i, j)] + u[at(wp1(i), j)]);

    real vc = 0.5 * (v[at(i, j)] + v[at(i, wp1(j))]);

    return std::sqrt(uc * uc + vc * vc);

}


} // namespace ns


ns::NSSolver::p
num::Vector p
velocity faces + cell-centre pressure, N*N each
Definition ns_solver.hpp:72

ns::NSSolver::NSSolver
NSSolver(idx N_, real dt_, real nu_=0.0)
Definition ns_solver.cpp:15

ns::NSSolver::stats
Stats stats
Definition ns_solver.hpp:74

ns::NSSolver::h
const real h
Definition ns_solver.hpp:69

ns::NSSolver::nu
const real nu
Definition ns_solver.hpp:69

ns::NSSolver::u
num::Vector u
Definition ns_solver.hpp:72

ns::NSSolver::vorticity
real vorticity(idx i, idx j) const
Vorticity omega = d_v/d_x - d_u/d_y at grid corner (i*h, j*h).
Definition ns_solver.cpp:289

ns::NSSolver::interp_u
real interp_u(real px, real py) const
Interpolate x-velocity at physical point (px, py).
Definition ns_solver.cpp:254

ns::NSSolver::N
const idx N
Definition ns_solver.hpp:68

ns::NSSolver::interp_v
real interp_v(real px, real py) const
Interpolate y-velocity at physical point (px, py).
Definition ns_solver.cpp:271

ns::NSSolver::init_shear_layer
void init_shear_layer(real rho=0.05, real delta=0.05)
Definition ns_solver.cpp:27

ns::NSSolver::speed
real speed(idx i, idx j) const
Velocity magnitude averaged to cell centre (i,j).
Definition ns_solver.cpp:298

ns::NSSolver::dt
const real dt
Definition ns_solver.hpp:69

ns::NSSolver::step
void step()
Advance one time step (advect -> pressure -> project).
Definition ns_solver.cpp:50

ns::NSSolver::v
num::Vector v
Definition ns_solver.hpp:72

num::BasicVector< real >

num::BasicVector::size
constexpr idx size() const noexcept
Definition vector.hpp:77

diffusion.hpp
Explicit Euler diffusion steps for 2D uniform grids.

ns
Definition ns_solver.cpp:11

num::cuda::scale
void scale(real *v, idx n, real alpha)
v = alpha * v
Definition cuda_stubs.cpp:14

num::pde::diffusion_step_2d
void diffusion_step_2d(Vector &u, int N, double coeff, Backend b=best_backend)
Definition diffusion.hpp:30

num::real
double real
Definition types.hpp:10

num::best_backend
constexpr Backend best_backend
Best backend for memory-bound vector ops: blas > omp > blocked.
Definition policy.hpp:65

num::beta
real beta(real a, real b)
B(a, b) – beta function.
Definition math.hpp:242

num::idx
std::size_t idx
Definition types.hpp:11

num::matvec
void matvec(const Matrix &A, const Vector &x, Vector &y, Backend b=default_backend)
y = A * x
Definition matrix.cpp:94

num::scale
void scale(Vector &v, real alpha, Backend b=default_backend)
v *= alpha
Definition vector.cpp:27

num::dot
real dot(const Vector &x, const Vector &y, Backend b=default_backend)
dot product
Definition vector.cpp:57

num::laplacian_stencil_2d_periodic
void laplacian_stencil_2d_periodic(const BasicVector< T > &x, BasicVector< T > &y, int N)
Definition stencil.hpp:52

num::axpy
void axpy(real alpha, const Vector &x, Vector &y, Backend b=default_backend)
y += alpha * x
Definition vector.cpp:46

ns_solver.hpp
2-D incompressible Navier-Stokes, periodic MAC grid

policy.hpp
Backend enum for linear algebra operations.

ns::Stats::total_ms
double total_ms
Definition ns_solver.hpp:37

ns::Stats::advect_ms
double advect_ms
Definition ns_solver.hpp:34

ns::Stats::project_ms
double project_ms
Definition ns_solver.hpp:36

ns::Stats::cg_iters
idx cg_iters
Definition ns_solver.hpp:32

ns::Stats::cg_residual
real cg_residual
Definition ns_solver.hpp:33

ns::Stats::pressure_ms
double pressure_ms
Definition ns_solver.hpp:35

num::SolverResult
Definition solver_result.hpp:8