22static constexpr double TWO_PI = 6.283185307179586476925286766559;
26#ifdef NUMERICS_HAS_AVX2
29static inline void butterfly(
num::cplx* __restrict__ u,
33 auto* ud =
reinterpret_cast<double*
>(u);
34 auto* vd =
reinterpret_cast<double*
>(v);
35 auto* wd =
reinterpret_cast<const double*
>(tw);
38 for (; j + 1 < hlen; j += 2) {
39 __m256d U = _mm256_loadu_pd(ud + 2 * j);
40 __m256d V = _mm256_loadu_pd(vd + 2 * j);
41 __m256d W = _mm256_loadu_pd(wd + 2 * j);
43 __m256d Wre = _mm256_unpacklo_pd(W, W);
44 __m256d Wim = _mm256_unpackhi_pd(W, W);
45 __m256d Vsw = _mm256_permute_pd(V, 0x5);
46 __m256d T = _mm256_addsub_pd(_mm256_mul_pd(V, Wre), _mm256_mul_pd(Vsw, Wim));
47 _mm256_storeu_pd(ud + 2 * j, _mm256_add_pd(U, T));
48 _mm256_storeu_pd(vd + 2 * j, _mm256_sub_pd(U, T));
51 for (; j < hlen; ++j) {
59#elif defined(NUMERICS_HAS_NEON)
62static inline void butterfly(
num::cplx* __restrict__ u,
66 auto* ud =
reinterpret_cast<double*
>(u);
67 auto* vd =
reinterpret_cast<double*
>(v);
68 auto* wd =
reinterpret_cast<const double*
>(tw);
71 for (; j + 1 < hlen; j += 2) {
73 float64x2x2_t U = vld2q_f64(ud + 2 * j);
74 float64x2x2_t V = vld2q_f64(vd + 2 * j);
75 float64x2x2_t W = vld2q_f64(wd + 2 * j);
79 float64x2_t Tr = vfmsq_f64(vmulq_f64(V.val[0], W.val[0]), V.val[1], W.val[1]);
81 float64x2_t Ti = vfmaq_f64(vmulq_f64(V.val[0], W.val[1]), V.val[1], W.val[0]);
84 Ru.val[0] = vaddq_f64(U.val[0], Tr);
85 Ru.val[1] = vaddq_f64(U.val[1], Ti);
86 Rv.val[0] = vsubq_f64(U.val[0], Tr);
87 Rv.val[1] = vsubq_f64(U.val[1], Ti);
88 vst2q_f64(ud + 2 * j, Ru);
89 vst2q_f64(vd + 2 * j, Rv);
92 for (; j < hlen; ++j) {
104 for (
int j = 0; j < hlen; ++j) {
125 if (n_ == 0 || (n_ & (n_ - 1)))
126 throw std::invalid_argument(
"FFTPlan: length must be a power of two");
127 for (
int len = 2; len <= n_; len <<= 1) {
128 double ang = TWO_PI /
static_cast<double>(len) * (inv ? 1.0 : -1.0);
129 num::cplx wlen{std::cos(ang), std::sin(ang)};
130 std::vector<num::cplx> tw(len / 2);
132 for (
int j = 0; j < len / 2; ++j) {
144 for (
int len = 2; len <=
n; len <<= 1, ++stage) {
147 for (
int i = 0; i <
n; i += len)
148 butterfly(data + i, data + i + hlen, tw, hlen);
154 int n =
static_cast<int>(in.
size());
155 for (
int i = 0; i < n; ++i)
162 int n =
static_cast<int>(in.
size());
163 for (
int i = 0; i < n; ++i)
170 int n =
static_cast<int>(in.
size());
172 for (
int i = 0; i < n; ++i)
173 tmp[i] = {in[i], 0.0};
176 for (
int k = 0; k < n / 2 + 1; ++k)
182 for (
int k = 0; k < n / 2 + 1; ++k)
184 for (
int k = 1; k < (n - 1) / 2 + 1; ++k)
185 tmp[n - k] = std::conj(in[k]);
188 for (
int i = 0; i < n; ++i)
189 out[i] = tmp[i].real();