numerics 0.1.0
Loading...
Searching...
No Matches
policy.hpp
Go to the documentation of this file.
1/// @file policy.hpp
2/// @brief Backend enum for linear algebra operations.
3///
4/// Each module defines its own backend enum for the choices relevant to it.
5/// This enum covers linalg (vector, matrix, solvers, factorization, eigen,
6/// svd). Other modules define their own -- e.g. spectral/ uses FFTBackend.
7///
8/// num::seq -- naive serial C++ (always available)
9/// num::blocked -- cache-blocked, no intrinsics (compiler auto-vectorizes)
10/// num::simd -- hand-written SIMD (AVX2 on x86, NEON on
11/// AArch64) num::blas -- cblas / LAPACKE (optional,
12/// NUMERICS_HAS_BLAS) num::omp -- OpenMP parallel (optional,
13/// NUMERICS_HAS_OMP) num::gpu -- CUDA (optional, NUMERICS_HAS_CUDA)
14#pragma once
15
16namespace num {
17
18/// @brief Selects which backend handles a linalg operation.
19enum class Backend {
20 seq, ///< Naive textbook loops -- always available
21 blocked, ///< Cache-blocked; compiler auto-vectorizes inner loops
22 simd, ///< Hand-written SIMD intrinsics (AVX2 or NEON)
23 blas, ///< cblas -- OpenBLAS, MKL, Apple Accelerate (Level-1/2/3)
24 omp, ///< OpenMP parallel blocked loops
25 gpu, ///< CUDA -- custom kernels or cuBLAS
26 lapack, ///< LAPACKE -- industry-standard factorizations, SVD, eigen
27};
28
29// Convenience constants -- use these at call sites:
30// matmul(A, B, C, num::blas);
31inline constexpr Backend seq = Backend::seq;
32inline constexpr Backend blocked = Backend::blocked;
33inline constexpr Backend simd = Backend::simd;
34inline constexpr Backend blas = Backend::blas;
35inline constexpr Backend omp = Backend::omp;
36inline constexpr Backend gpu = Backend::gpu;
37inline constexpr Backend lapack = Backend::lapack;
38
39// Compile-time capability flags
40/// True when a BLAS/cblas library was found at configure time.
41inline constexpr bool has_blas =
42#if defined(NUMERICS_HAS_BLAS)
43 true;
44#else
45 false;
46#endif
47
48/// True when LAPACKE was found at configure time.
49inline constexpr bool has_lapack =
50#if defined(NUMERICS_HAS_LAPACK)
51 true;
52#else
53 false;
54#endif
55
56/// True when OpenMP was found at configure time.
57inline constexpr bool has_omp =
58#if defined(NUMERICS_HAS_OMP)
59 true;
60#else
61 false;
62#endif
63
64/// True when a SIMD ISA was detected (AVX2 on x86-64, NEON on AArch64).
65inline constexpr bool has_simd =
66#if defined(NUMERICS_HAS_SIMD)
67 true;
68#else
69 false;
70#endif
71
72// Default and best backends
73//
74// Full hierarchy for dense vector/matrix operations:
75// blas > omp > simd > blocked > seq
76//
77// Rationale:
78// blas -- hardware-tuned BLAS (OpenBLAS / MKL / Accelerate): fastest for
79// large n; uses BLAS-3 blocking and multi-threading internally.
80// omp -- our parallel blocked loops: good when BLAS is absent and the
81// machine has multiple cores; thread overhead amortises for n >
82// ~256.
83// simd -- our hand-written AVX2/NEON kernels: wins over blocked for
84// single-threaded workloads when OMP threads would hurt.
85// blocked -- cache-blocked C++; compiler auto-vectorises inner loops; always
86// faster than naive seq for n beyond L1 cache.
87// seq -- textbook loops; reference only, never selected automatically.
88
89/// Default backend for dense vector/matrix ops (matmul, matvec, dot, axpy,
90/// etc.). Automatically selected at configure time: blas > omp > simd >
91/// blocked.
92inline constexpr Backend default_backend =
93#if defined(NUMERICS_HAS_BLAS)
95#elif defined(NUMERICS_HAS_OMP)
97#elif defined(NUMERICS_HAS_SIMD)
99#else
101#endif
102
103/// Best backend for memory-bound vector ops (dot, axpy, scale, norm).
104/// Identical to default_backend -- both follow blas > omp > simd > blocked.
106
107// Full hierarchy for factorizations, SVD, and eigensolvers:
108// lapack > omp > seq
109//
110// Rationale:
111// lapack -- LAPACKE (dgetrf, dgeqrf, dgesdd, dsyevd, etc.):
112// decades-optimised,
113// BLAS-3 internally, fastest for n > ~64.
114// omp -- our parallel Jacobi eigensolver; meaningful when LAPACK is
115// absent. seq -- our textbook implementations (Doolittle LU, Householder
116// QR, etc.).
117//
118// Note: blas and blocked are NOT in this chain -- there is no BLAS-level LU/QR/
119// SVD/eigen API (those are LAPACK-level). If Backend::blas or Backend::blocked
120// reaches a factorization dispatcher it silently falls through to seq.
121
122/// Best backend for factorizations, SVD, and eigensolvers.
123/// Prefers LAPACKE (industry-standard), then omp, then seq.
124inline constexpr Backend lapack_backend =
125#if defined(NUMERICS_HAS_LAPACK)
127#elif defined(NUMERICS_HAS_OMP)
129#else
131#endif
132
133} // namespace num
constexpr Backend simd
Definition policy.hpp:33
Backend
Selects which backend handles a linalg operation.
Definition policy.hpp:19
@ gpu
CUDA – custom kernels or cuBLAS.
@ omp
OpenMP parallel blocked loops.
@ blocked
Cache-blocked; compiler auto-vectorizes inner loops.
@ simd
Hand-written SIMD intrinsics (AVX2 or NEON)
@ blas
cblas – OpenBLAS, MKL, Apple Accelerate (Level-1/2/3)
@ lapack
LAPACKE – industry-standard factorizations, SVD, eigen.
@ seq
Naive textbook loops – always available.
constexpr Backend best_backend
Definition policy.hpp:105
constexpr Backend lapack_backend
Definition policy.hpp:124
constexpr Backend gpu
Definition policy.hpp:36
constexpr bool has_lapack
True when LAPACKE was found at configure time.
Definition policy.hpp:49
constexpr bool has_blas
True when a BLAS/cblas library was found at configure time.
Definition policy.hpp:41
constexpr Backend default_backend
Definition policy.hpp:92
constexpr bool has_simd
True when a SIMD ISA was detected (AVX2 on x86-64, NEON on AArch64).
Definition policy.hpp:65
constexpr bool has_omp
True when OpenMP was found at configure time.
Definition policy.hpp:57
constexpr Backend blas
Definition policy.hpp:34
constexpr Backend lapack
Definition policy.hpp:37
constexpr Backend omp
Definition policy.hpp:35
constexpr Backend seq
Definition policy.hpp:31
constexpr Backend blocked
Definition policy.hpp:32