Last active
January 27, 2021 02:01
-
-
Save cyanreg/bbf25c8a8dfb910ed3b9ae7663983ca6 to your computer and use it in GitHub Desktop.
8-point FFT (permuted input)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void fft8(FFTComplex *z) | |
{ | |
FFTSample r1 = z[0].re - z[4].re; | |
FFTSample r2 = z[0].im - z[4].im; | |
FFTSample r3 = z[1].re - z[5].re; | |
FFTSample r4 = z[1].im - z[5].im; | |
FFTSample r5 = z[2].re - z[6].re; | |
FFTSample r6 = z[2].im - z[6].im; | |
FFTSample r7 = z[3].re - z[7].re; | |
FFTSample r8 = z[3].im - z[7].im; | |
FFTSample q1 = z[0].re + z[4].re; | |
FFTSample q2 = z[0].im + z[4].im; | |
FFTSample q3 = z[1].re + z[5].re; | |
FFTSample q4 = z[1].im + z[5].im; | |
FFTSample q5 = z[2].re + z[6].re; | |
FFTSample q6 = z[2].im + z[6].im; | |
FFTSample q7 = z[3].re + z[7].re; | |
FFTSample q8 = z[3].im + z[7].im; | |
FFTSample s3 = q1 - q3; | |
FFTSample s1 = q1 + q3; | |
FFTSample s4 = q2 - q4; | |
FFTSample s2 = q2 + q4; | |
FFTSample s7 = q5 - q7; | |
FFTSample s5 = q5 + q7; | |
FFTSample s8 = q6 - q8; | |
FFTSample s6 = q6 + q8; | |
FFTSample e1 = s1 * -1; | |
FFTSample e2 = s2 * -1; | |
FFTSample e3 = s3 * -1; | |
FFTSample e4 = s4 * -1; | |
FFTSample e5 = s5 * 1; | |
FFTSample e6 = s6 * 1; | |
FFTSample e7 = s7 * -1; | |
FFTSample e8 = s8 * 1; | |
FFTSample w1 = e5 - e1; | |
FFTSample w2 = e6 - e2; | |
FFTSample w3 = e8 - e3; | |
FFTSample w4 = e7 - e4; | |
FFTSample w5 = s1 - e5; | |
FFTSample w6 = s2 - e6; | |
FFTSample w7 = s3 - e8; | |
FFTSample w8 = s4 - e7; | |
z[0].re = w1; | |
z[0].im = w2; | |
z[2].re = w3; | |
z[2].im = w4; | |
z[4].re = w5; | |
z[4].im = w6; | |
z[6].re = w7; | |
z[6].im = w8; | |
FFTSample z1 = r1 - r4; | |
FFTSample z2 = r1 + r4; | |
FFTSample z3 = r3 - r2; | |
FFTSample z4 = r3 + r2; | |
FFTSample z5 = r5 - r6; | |
FFTSample z6 = r5 + r6; | |
FFTSample z7 = r7 - r8; | |
FFTSample z8 = r7 + r8; | |
z3 *= -1; | |
z5 *= -M_SQRT1_2; | |
z6 *= -M_SQRT1_2; | |
z7 *= M_SQRT1_2; | |
z8 *= M_SQRT1_2; | |
FFTSample t5 = z7 - z6; | |
FFTSample t6 = z8 + z5; | |
FFTSample t7 = z8 - z5; | |
FFTSample t8 = z7 + z6; | |
FFTSample u1 = z2 + t5; | |
FFTSample u2 = z3 + t6; | |
FFTSample u3 = z1 - t7; | |
FFTSample u4 = z4 + t8; | |
FFTSample u5 = z2 - t5; | |
FFTSample u6 = z3 - t6; | |
FFTSample u7 = z1 + t7; | |
FFTSample u8 = z4 - t8; | |
z[1].re = u1; | |
z[1].im = u2; | |
z[3].re = u3; | |
z[3].im = u4; | |
z[5].re = u5; | |
z[5].im = u6; | |
z[7].re = u7; | |
z[7].im = u8; | |
/* 11 instructions = 20 instructions total */ | |
} | |
#if 0 | |
; 8-point in-place complex FFT | |
; %1 - even coefficients | |
; %2 - odd coefficients | |
; %3 - temporary | |
; %4 - temporary | |
%macro FFT8_AVX 4 | |
subps %3, %1, %2 ; r1, r2, r3, r4, r5, r6, r7, r8 | |
addps %1, %2 ; q1, q2, q3, q4, q5, q6, q7, q8 | |
vpermilps %2, %3, [perm8_odd1] ; r4, r4, r2, r2, r6, r6, r8, r8 | |
shufps %4, %1, %1, q3322 ; q1, q1, q2, q2, q5, q5, q6, q6 | |
movsldup %3, %3 ; r1, r1, r3, r3, r5, r5, r7, r7 | |
shufps %1, %1, q1100 ; q3, q3, q4, q4, q7, q7, q8, q8 | |
addsubps %3, %2 ; z1, z2, z3, z4, z5, z6, z7, z8 | |
addsubps %1, %4 ; s3, s1, s4, s2, s7, s5, s8, s6 | |
mulps %3, [mult8_odd] ; z * mult8_odd | |
vpermilps %1, [perm8_even] ; s1, s2, s3, s4, s5, s6, s8, s7 | |
shufps %2, %3, %3, q2332 ; c, r, a, p, z7, z8, z8, z7 | |
xorps %4, %1, [mask8_mmmmpppm] ; e1, e2, e3, e4, e5, e6, e8, e7 | |
vpermilps %3, %3, [perm8_odd2] ; z2, z3, z1, z4, z6, z5, z5, z6 | |
vperm2f128 %1, %4, q0003 ; e5, e6, e8, e7, s1, s2, s3, s4 | |
addsubps %2, %3 ; c, r, a, p, t5, t6, t7, t8 | |
subps %1, %4 ; w1, w2, w3, w4, w5, w6, w7, w8 | |
vperm2f128 %2, %2, q0101 ; t5, t6, t7, t8, t5, t6, t7, t8 | |
vperm2f128 %3, %3, q0000 ; z2, z3, z1, z4, z2, z3, z1, z4 | |
xorps %2, [mask8_ppmpmmpm] ; t5, t6, -t7, t8, -t5, -t6, t7, -t8 | |
addps %2, %3, %2 ; u1, u2, u3, u4, u5, u6, u7, u8 | |
%endmacro | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment