ref: 5beef6501a7b94814a8c732cfc1e521276bcbb56
parent: 03e0d3a6a3860a1ef9ca91b9e979de4eed9dc92e
author: Jean-Marc Valin <jeanmarcv@google.com>
date: Wed Feb 26 06:05:21 EST 2025
Gradual downshift in FFT to improve accuracy
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -125,6 +125,12 @@
#define OPUS_FAST_INT64 0
#endif
+#ifdef FIXED_POINT
+#define ARG_FIXED(arg) , arg
+#else
+#define ARG_FIXED(arg)
+#endif
+
#define PRINT_MIPS(file)
#ifdef FIXED_POINT
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -534,7 +534,30 @@
#endif /* CUSTOM_MODES */
-void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
+#ifdef FIXED_POINT
+static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
+ int shift;
+ shift = IMIN(step, *total);
+ *total -= shift;
+ if (shift == 1) {
+ int i;
+ for (i=0;i<N;i++) {
+ x[i].r = SHR32(x[i].r, 1);
+ x[i].i = SHR32(x[i].i, 1);
+ }
+ } else if (shift>0) {
+ int i;
+ for (i=0;i<N;i++) {
+ x[i].r = PSHR32(x[i].r, shift);
+ x[i].i = PSHR32(x[i].i, shift);
+ }
+ }
+}
+#else
+#define fft_downshift(x, N, total, step)
+#endif
+
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
{
int m2, m;
int p;
@@ -564,16 +587,20 @@
switch (st->factors[2*i])
{
case 2:
+ fft_downshift(fout, st->nfft, &downshift, 1);
kf_bfly2(fout, m, fstride[i]);
break;
case 4:
+ fft_downshift(fout, st->nfft, &downshift, 2);
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#ifndef RADIX_TWO_ONLY
case 3:
+ fft_downshift(fout, st->nfft, &downshift, 2);
kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
case 5:
+ fft_downshift(fout, st->nfft, &downshift, 3);
kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
break;
#endif
@@ -580,6 +607,7 @@
}
m = m2;
}
+ fft_downshift(fout, st->nfft, &downshift, downshift);
}
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
@@ -598,10 +626,10 @@
for (i=0;i<st->nfft;i++)
{
kiss_fft_cpx x = fin[i];
- fout[st->bitrev[i]].r = SHR32(S_MUL2(x.r, scale), scale_shift);
- fout[st->bitrev[i]].i = SHR32(S_MUL2(x.i, scale), scale_shift);
+ fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
+ fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
}
- opus_fft_impl(st, fout);
+ opus_fft_impl(st, fout ARG_FIXED(scale_shift));
}
@@ -614,7 +642,7 @@
fout[st->bitrev[i]] = fin[i];
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
- opus_fft_impl(st, fout);
+ opus_fft_impl(st, fout ARG_FIXED(0));
for (i=0;i<st->nfft;i++)
fout[i].i = -fout[i].i;
}
--- a/celt/kiss_fft.h
+++ b/celt/kiss_fft.h
@@ -159,7 +159,7 @@
void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
-void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift));
void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg, int arch);
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -221,16 +221,11 @@
}
#ifdef FIXED_POINT
headroom = IMAX(0, IMIN(scale_shift, 28-celt_ilog2(maxval)));
- for(i=0;i<N4;i++)
- {
- f2[i].r = PSHR32(f2[i].r, scale_shift-headroom);
- f2[i].i = PSHR32(f2[i].i, scale_shift-headroom);
- }
#endif
}
/* N/4 complex FFT, does not downscale anymore */
- opus_fft_impl(st, f2);
+ opus_fft_impl(st, f2 ARG_FIXED(scale_shift-headroom));
/* Post-rotate */
{
@@ -302,7 +297,7 @@
}
}
- opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
+ opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)) ARG_FIXED(0));
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
--
⑨