16#ifdef NUMERICS_HAS_STD_SIMD
18#include "../seq/impl.hpp"
19#include <experimental/simd>
24namespace stdx = std::experimental;
29static constexpr double TWO_PI = 6.283185307179586476925286766559;
36 std::vector<std::vector<num::cplx>> twiddles;
38 FFTPlanImpl(
int n_,
bool inv) : n(n_), invert(inv) {
39 if (n_ == 0 || (n_ & (n_ - 1)))
40 throw std::invalid_argument(
"FFTPlan: length must be a power of two");
41 for (
int len = 2; len <= n_; len <<= 1) {
42 double ang = TWO_PI /
static_cast<double>(len) * (inv ? 1.0 : -1.0);
43 num::cplx wlen{std::cos(ang), std::sin(ang)};
44 std::vector<num::cplx> tw(len / 2);
46 for (
int j = 0; j < len / 2; ++j) { tw[j] = w; w *= wlen; }
47 twiddles.push_back(std::move(tw));
52 using vd = stdx::simd<double, stdx::simd_abi::native<double>>;
53 constexpr int W =
static_cast<int>(vd::size());
59 for (
int len = 2; len <= n; len <<= 1, ++stage) {
61 const num::cplx* tw = twiddles[stage].data();
63 for (
int i = 0; i < n; i += len) {
68 for (; j + W <= hlen; j += W) {
70 vd ur([&](
int k) ->
double {
return up[j+k].real(); });
71 vd ui([&](
int k) ->
double {
return up[j+k].imag(); });
72 vd vr([&](
int k) ->
double {
return vp[j+k].real(); });
73 vd vi([&](
int k) ->
double {
return vp[j+k].imag(); });
74 vd wr([&](
int k) ->
double {
return tw[j+k].real(); });
75 vd wi([&](
int k) ->
double {
return tw[j+k].imag(); });
78 vd tr = vr * wr - vi * wi;
79 vd ti = vr * wi + vi * wr;
82 for (
int k = 0; k < W; ++k) {
83 up[j+k] = {ur[k] + tr[k], ui[k] + ti[k]};
84 vp[j+k] = {ur[k] - tr[k], ui[k] - ti[k]};
88 for (; j < hlen; ++j) {
102 int n =
static_cast<int>(in.
size());
103 for (
int i = 0; i < n; ++i) out[i] = in[i];
104 FFTPlanImpl plan(n,
false);
109 int n =
static_cast<int>(in.
size());
110 for (
int i = 0; i < n; ++i) out[i] = in[i];
111 FFTPlanImpl plan(n,
true);
116 int n =
static_cast<int>(in.
size());
118 for (
int i = 0; i < n; ++i) tmp[i] = {in[i], 0.0};
119 FFTPlanImpl plan(n,
false);
121 for (
int k = 0; k < n / 2 + 1; ++k) out[k] = tmp[k];
126 for (
int k = 0; k < n / 2 + 1; ++k) tmp[k] = in[k];
127 for (
int k = 1; k < (n - 1) / 2 + 1; ++k)
128 tmp[n - k] = std::conj(in[k]);
129 FFTPlanImpl plan(n,
true);
131 for (
int i = 0; i < n; ++i) out[i] = tmp[i].
real();
Dense vector with optional GPU storage, templated over scalar type T.
constexpr idx size() const noexcept
FFT interface with backend dispatch.
void bit_reverse(num::CVector &a)
void ifft(const CVector &in, CVector &out, FFTBackend b=default_fft_backend)
Inverse complex DFT (unnormalised: result = n * true_inverse).
void fft(const CVector &in, CVector &out, FFTBackend b=default_fft_backend)
Forward complex DFT. out must be pre-allocated to in.size().
@ stdsimd
std::experimental::simd butterfly – requires NUMERICS_HAS_STD_SIMD
void irfft(const CVector &in, int n, Vector &out, FFTBackend b=default_fft_backend)
Complex-to-real inverse DFT (unnormalised).
void rfft(const Vector &in, CVector &out, FFTBackend b=default_fft_backend)
Real-to-complex forward DFT. out must be pre-allocated to n/2+1.
std::complex< real > cplx