numerics
Loading...
Searching...
No Matches
cell_list.hpp
Go to the documentation of this file.
1/// @file cell_list.hpp
2/// @brief Cache-coherent 2D cell list for O(1) amortized neighbour queries
3///
4/// @par Algorithm (CS theory: counting sort + prefix sums)
5///
6/// The standard spatial hash maps cell -> linked list of particles.
7/// Each linked-list traversal chases pointers through scattered heap memory,
8/// causing a cache miss per particle in the inner neighbour loop.
9///
10/// The cell list replaces this with a **counting sort**:
11///
12/// build() -- O(n + C) where C = total grid cells
13/// 1. Assign each particle i to cell id_i = cy_i*nx + cx_i O(n)
14/// 2. count[id]++ for each particle O(n)
15/// 3. Prefix sum: start[c] = Sigma count[0..c-1] O(C)
16/// 4. Scatter: sorted[start[id_i] + offset++] = i O(n)
17/// -> sorted[] contains particle indices grouped by cell.
18/// start[c]..start[c+1] is the contiguous range for cell c.
19///
20/// query(px, py, f) -- O(k) sequential reads
21/// For each of the 9 cells in the 3x3 neighbourhood:
22/// iterate sorted[start[c]..start[c+1]) <- sequential memory
23/// No pointer chasing. Particles in the same cell sit next to each
24/// other in sorted[], so the processor prefetches them correctly.
25///
26/// iterate_pairs(f) -- O(n*k/2) Newton's 3rd law half-shell
27/// Visits each unique unordered pair {i,j} exactly once.
28/// Based on the "forward half-shell" technique from molecular dynamics
29/// (Plimpton 1995, LAMMPS):
30///
31/// for each cell (cx, cy):
32/// intra-cell: pairs (a, b) where a < b in sorted[] index
33/// inter-cell: cross-product with 4 "forward" neighbours:
34/// (cx+1, cy-1), (cx+1, cy), (cx+1, cy+1), (cx, cy+1)
35///
36/// These 4 offsets + self cover all 9 neighbour directions uniquely:
37/// - dx > 0 -> handled when processing the left cell ✓
38/// - dx = 0, dy > 0 -> handled by (0,+1) offset ✓
39/// - dx < 0 or dy < 0 -> handled from the other side ✓
40/// Every pair appears in exactly one (cell, forward-offset) combination.
41///
42/// @par Caller responsibilities
43/// - Particles must lie within the domain passed to the constructor.
44/// - get_pos(i) callable must return {x, y} for particle i.
45/// - Distance check (r < cutoff) must still be done in the callback.
46///
47/// @tparam Scalar float or double
48#pragma once
49
50#include <vector>
51#include <cmath>
52#include <algorithm>
53#include <cassert>
54#include <utility>
55
56namespace num {
57
58/// Lightweight read-only range over a contiguous int array (C++17-safe span).
59struct IntRange {
60 const int* first;
61 const int* last;
62 const int* begin() const noexcept { return first; }
63 const int* end() const noexcept { return last; }
64 int size() const noexcept { return static_cast<int>(last - first); }
65 bool empty() const noexcept { return first == last; }
66};
67
68template<typename Scalar>
70public:
71 /// @param cell_size Width of one cell (use kernel support radius 2h).
72 /// @param xmin,xmax,ymin,ymax Simulation domain. Particles outside
73 /// are clamped to the boundary cell (safe, just no missed neighbours).
75 Scalar xmin, Scalar xmax,
76 Scalar ymin, Scalar ymax)
77 : cs_(cell_size), xmin_(xmin), ymin_(ymin)
78 {
79 // +2 padding cells on each axis prevents boundary checks in inner loops
80 nx_ = static_cast<int>(std::ceil((xmax - xmin) / cs_)) + 2;
81 ny_ = static_cast<int>(std::ceil((ymax - ymin) / cs_)) + 2;
82 const int total = nx_ * ny_;
83 start_.assign(total + 1, 0);
84 count_.assign(total, 0);
85 }
86
87 /// @brief Rebuild the cell list from n particles.
88 ///
89 /// PosAccessor: callable int -> std::pair<Scalar,Scalar> (x, y)
90 ///
91 /// Complexity: O(n + C) (two passes over particles + one prefix sum)
92 template<typename PosAccessor>
93 void build(PosAccessor&& get_pos, int n) {
94 sorted_.resize(n);
95 const int total = nx_ * ny_;
96
97 // Pass 1: count particles per cell
98 std::fill(count_.begin(), count_.end(), 0);
99 for (int i = 0; i < n; ++i)
100 ++count_[cell_id_of(get_pos(i))];
101
102 // Prefix sum -> start_[c] = first index in sorted_ for cell c
103 start_[0] = 0;
104 for (int c = 0; c < total; ++c)
105 start_[c + 1] = start_[c] + count_[c];
106
107 // Pass 2: scatter into sorted[] (counting sort, stable)
108 std::fill(count_.begin(), count_.end(), 0);
109 for (int i = 0; i < n; ++i) {
110 const int cid = cell_id_of(get_pos(i));
111 sorted_[start_[cid] + count_[cid]] = i;
112 ++count_[cid];
113 }
114 }
115
116 /// @brief Point query: calls f(int j) for every particle in the 3x3
117 /// cell neighbourhood of (px, py).
118 ///
119 /// Caller must still verify |r_ij| < cutoff -- this returns candidates.
120 template<typename F>
121 void query(Scalar px, Scalar py, F&& f) const {
122 const int cx = cell_x(px);
123 const int cy = cell_y(py);
124 for (int dy = -1; dy <= 1; ++dy) {
125 const int qy = cy + dy;
126 if (qy < 0 || qy >= ny_) continue;
127 for (int dx = -1; dx <= 1; ++dx) {
128 const int qx = cx + dx;
129 if (qx < 0 || qx >= nx_) continue;
130 const int cid = qy * nx_ + qx;
131 for (int k = start_[cid]; k < start_[cid + 1]; ++k)
132 f(sorted_[k]);
133 }
134 }
135 }
136
137 /// @brief Newton's 3rd law pair traversal.
138 ///
139 /// Calls f(int i, int j) for every unique unordered pair {i,j} whose
140 /// cells lie within the 3x3 neighbourhood. Each pair appears exactly
141 /// once. Caller can apply equal-and-opposite contributions to i and j.
142 ///
143 /// Complexity: O(n*k/2) where k = average neighbour count.
144 template<typename F>
145 void iterate_pairs(F&& f) const {
146 // 4 "forward" inter-cell offsets that, together with intra-cell
147 // pairs, cover all unique pairs in the 3x3 neighbourhood exactly once.
148 static constexpr int FDX[4] = {+1, 0, +1, -1};
149 static constexpr int FDY[4] = { 0, +1, +1, +1};
150
151 for (int cy = 0; cy < ny_; ++cy) {
152 for (int cx = 0; cx < nx_; ++cx) {
153 const int cid = cy * nx_ + cx;
154 const int beg = start_[cid];
155 const int end = start_[cid + 1];
156 if (beg == end) continue;
157
158 // 1. Intra-cell: pairs where a comes before b in sorted[]
159 for (int a = beg; a < end; ++a) {
160 for (int b = a + 1; b < end; ++b) {
161 f(sorted_[a], sorted_[b]);
162 }
163 }
164
165 // 2. Inter-cell: self x each forward neighbour cell
166 for (int d = 0; d < 4; ++d) {
167 const int ncx = cx + FDX[d];
168 const int ncy = cy + FDY[d];
169 if (ncx < 0 || ncx >= nx_ || ncy < 0 || ncy >= ny_) continue;
170 const int ncid = ncy * nx_ + ncx;
171 const int nbeg = start_[ncid];
172 const int nend = start_[ncid + 1];
173 if (nbeg == nend) continue;
174 for (int a = beg; a < end; ++a) {
175 for (int b = nbeg; b < nend; ++b) {
176 f(sorted_[a], sorted_[b]);
177 }
178 }
179 }
180 }
181 }
182 }
183
184 /// @brief Direct access to sorted particle indices for cell (cx, cy).
185 IntRange cell_particles(int cx, int cy) const noexcept {
186 const int cid = cy * nx_ + cx;
187 return { sorted_.data() + start_[cid],
188 sorted_.data() + start_[cid + 1] };
189 }
190
191 int nx() const noexcept { return nx_; }
192 int ny() const noexcept { return ny_; }
193 int n_particles() const noexcept { return static_cast<int>(sorted_.size()); }
194
195private:
196 Scalar cs_, xmin_, ymin_;
197 int nx_, ny_;
198
199 std::vector<int> sorted_; ///< Particle indices sorted by cell id
200 std::vector<int> start_; ///< start_[c] = first position in sorted_ for cell c
201 std::vector<int> count_; ///< Counting-sort scratch buffer
202
203 int cell_x(Scalar x) const noexcept {
204 // +1 for padding offset; clamp to [0, nx_-1]
205 const int cx = static_cast<int>(std::floor((x - xmin_) / cs_)) + 1;
206 return cx < 0 ? 0 : (cx >= nx_ ? nx_ - 1 : cx);
207 }
208 int cell_y(Scalar y) const noexcept {
209 const int cy = static_cast<int>(std::floor((y - ymin_) / cs_)) + 1;
210 return cy < 0 ? 0 : (cy >= ny_ ? ny_ - 1 : cy);
211 }
212 int cell_id_of(std::pair<Scalar, Scalar> p) const noexcept {
213 return cell_y(p.second) * nx_ + cell_x(p.first);
214 }
215};
216
217} // namespace num
int nx() const noexcept
void query(Scalar px, Scalar py, F &&f) const
Point query: calls f(int j) for every particle in the 3x3 cell neighbourhood of (px,...
void iterate_pairs(F &&f) const
Newton's 3rd law pair traversal.
CellList2D(Scalar cell_size, Scalar xmin, Scalar xmax, Scalar ymin, Scalar ymax)
Definition cell_list.hpp:74
int n_particles() const noexcept
IntRange cell_particles(int cx, int cy) const noexcept
Direct access to sorted particle indices for cell (cx, cy).
int ny() const noexcept
void build(PosAccessor &&get_pos, int n)
Rebuild the cell list from n particles.
Definition cell_list.hpp:93
constexpr T ipow(T x) noexcept
Compute x^N at compile time via repeated squaring.
Lightweight read-only range over a contiguous int array (C++17-safe span).
Definition cell_list.hpp:59
const int * begin() const noexcept
Definition cell_list.hpp:62
bool empty() const noexcept
Definition cell_list.hpp:65
const int * end() const noexcept
Definition cell_list.hpp:63
const int * last
Definition cell_list.hpp:61
int size() const noexcept
Definition cell_list.hpp:64
const int * first
Definition cell_list.hpp:60