39static constexpr double TWO_PI = 6.283185307179586476925286766559;
43#ifdef NUMERICS_HAS_AVX2
46static inline void butterfly(
num::cplx* __restrict__ u,
50 auto* ud =
reinterpret_cast<double*
>(u);
51 auto* vd =
reinterpret_cast<double*
>(v);
52 auto* wd =
reinterpret_cast<const double*
>(tw);
55 for (; j + 1 < hlen; j += 2) {
56 __m256d U = _mm256_loadu_pd(ud + 2 * j);
57 __m256d V = _mm256_loadu_pd(vd + 2 * j);
58 __m256d W = _mm256_loadu_pd(wd + 2 * j);
60 __m256d Wre = _mm256_unpacklo_pd(W, W);
61 __m256d Wim = _mm256_unpackhi_pd(W, W);
62 __m256d Vsw = _mm256_permute_pd(V, 0x5);
63 __m256d T = _mm256_addsub_pd(_mm256_mul_pd(V, Wre),
64 _mm256_mul_pd(Vsw, Wim));
65 _mm256_storeu_pd(ud + 2 * j, _mm256_add_pd(U, T));
66 _mm256_storeu_pd(vd + 2 * j, _mm256_sub_pd(U, T));
69 for (; j < hlen; ++j) {
77#elif defined(NUMERICS_HAS_NEON)
80static inline void butterfly(
num::cplx* __restrict__ u,
84 auto* ud =
reinterpret_cast<double*
>(u);
85 auto* vd =
reinterpret_cast<double*
>(v);
86 auto* wd =
reinterpret_cast<const double*
>(tw);
89 for (; j + 1 < hlen; j += 2) {
91 float64x2x2_t U = vld2q_f64(ud + 2 * j);
92 float64x2x2_t V = vld2q_f64(vd + 2 * j);
93 float64x2x2_t W = vld2q_f64(wd + 2 * j);
98 vfmsq_f64(vmulq_f64(V.val[0], W.val[0]), V.val[1], W.val[1]);
101 vfmaq_f64(vmulq_f64(V.val[0], W.val[1]), V.val[1], W.val[0]);
103 float64x2x2_t Ru, Rv;
104 Ru.val[0] = vaddq_f64(U.val[0], Tr);
105 Ru.val[1] = vaddq_f64(U.val[1], Ti);
106 Rv.val[0] = vsubq_f64(U.val[0], Tr);
107 Rv.val[1] = vsubq_f64(U.val[1], Ti);
108 vst2q_f64(ud + 2 * j, Ru);
109 vst2q_f64(vd + 2 * j, Rv);
112 for (; j < hlen; ++j) {
123static inline void butterfly(
num::cplx* u,
127 for (
int j = 0; j < hlen; ++j) {
148 if (n_ == 0 || (n_ & (n_ - 1)))
149 throw std::invalid_argument(
150 "FFTPlan: length must be a power of two");
151 for (
int len = 2; len <= n_; len <<= 1) {
152 double ang = TWO_PI /
static_cast<double>(len) * (inv ? 1.0 : -1.0);
153 num::cplx wlen{std::cos(ang), std::sin(ang)};
154 std::vector<num::cplx> tw(len / 2);
156 for (
int j = 0; j < len / 2; ++j) {
168 for (
int len = 2; len <=
n; len <<= 1, ++stage) {
171 for (
int i = 0; i <
n; i += len)
172 butterfly(data + i, data + i + hlen, tw, hlen);
180 int n =
static_cast<int>(in.
size());
181 for (
int i = 0; i < n; ++i)
188 int n =
static_cast<int>(in.
size());
189 for (
int i = 0; i < n; ++i)
196 int n =
static_cast<int>(in.
size());
198 for (
int i = 0; i < n; ++i)
199 tmp[i] = {in[i], 0.0};
202 for (
int k = 0; k < n / 2 + 1; ++k)
208 for (
int k = 0; k < n / 2 + 1; ++k)
210 for (
int k = 1; k < (n - 1) / 2 + 1; ++k)
211 tmp[n - k] = std::conj(in[k]);
214 for (
int i = 0; i < n; ++i)
215 out[i] = tmp[i].real();