Libav
|
00001 /* 00002 * FFT/IFFT transforms 00003 * Copyright (c) 2008 Loren Merritt 00004 * Copyright (c) 2002 Fabrice Bellard 00005 * Partly based on libdjbfft by D. J. Bernstein 00006 * 00007 * This file is part of FFmpeg. 00008 * 00009 * FFmpeg is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * FFmpeg is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with FFmpeg; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 00029 #include <stdlib.h> 00030 #include <string.h> 00031 #include "libavutil/mathematics.h" 00032 #include "fft.h" 00033 00034 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ 00035 #if !CONFIG_HARDCODED_TABLES 00036 COSTABLE(16); 00037 COSTABLE(32); 00038 COSTABLE(64); 00039 COSTABLE(128); 00040 COSTABLE(256); 00041 COSTABLE(512); 00042 COSTABLE(1024); 00043 COSTABLE(2048); 00044 COSTABLE(4096); 00045 COSTABLE(8192); 00046 COSTABLE(16384); 00047 COSTABLE(32768); 00048 COSTABLE(65536); 00049 #endif 00050 COSTABLE_CONST FFTSample * const ff_cos_tabs[] = { 00051 NULL, NULL, NULL, NULL, 00052 ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024, 00053 ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536, 00054 }; 00055 00056 static int split_radix_permutation(int i, int n, int inverse) 00057 { 00058 int m; 00059 if(n <= 2) return i&1; 00060 m = n >> 1; 00061 if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; 00062 m >>= 1; 00063 if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; 00064 else return split_radix_permutation(i, m, inverse)*4 - 1; 00065 } 00066 00067 av_cold void ff_init_ff_cos_tabs(int index) 00068 { 00069 #if !CONFIG_HARDCODED_TABLES 00070 int i; 00071 int m = 1<<index; 00072 double freq = 2*M_PI/m; 00073 FFTSample *tab = ff_cos_tabs[index]; 00074 for(i=0; i<=m/4; i++) 00075 tab[i] = cos(i*freq); 00076 for(i=1; i<m/4; i++) 00077 tab[m/2-i] = tab[i]; 00078 #endif 00079 } 00080 00081 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) 00082 { 00083 int i, j, m, n; 00084 float alpha, c1, s1, s2; 00085 int av_unused has_vectors; 00086 00087 if (nbits < 2 || nbits > 16) 00088 goto fail; 00089 s->nbits = nbits; 00090 n = 1 << nbits; 00091 00092 s->tmp_buf = NULL; 00093 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 00094 if (!s->exptab) 00095 goto fail; 00096 s->revtab = av_malloc(n * sizeof(uint16_t)); 00097 if (!s->revtab) 00098 goto fail; 00099 s->inverse = inverse; 00100 00101 s2 = inverse ? 1.0 : -1.0; 00102 00103 s->fft_permute = ff_fft_permute_c; 00104 s->fft_calc = ff_fft_calc_c; 00105 #if CONFIG_MDCT 00106 s->imdct_calc = ff_imdct_calc_c; 00107 s->imdct_half = ff_imdct_half_c; 00108 s->mdct_calc = ff_mdct_calc_c; 00109 #endif 00110 s->exptab1 = NULL; 00111 s->split_radix = 1; 00112 00113 if (ARCH_ARM) ff_fft_init_arm(s); 00114 if (HAVE_ALTIVEC) ff_fft_init_altivec(s); 00115 if (HAVE_MMX) ff_fft_init_mmx(s); 00116 00117 if (s->split_radix) { 00118 for(j=4; j<=nbits; j++) { 00119 ff_init_ff_cos_tabs(j); 00120 } 00121 for(i=0; i<n; i++) 00122 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; 00123 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); 00124 } else { 00125 int np, nblocks, np2, l; 00126 FFTComplex *q; 00127 00128 for(i=0; i<(n/2); i++) { 00129 alpha = 2 * M_PI * (float)i / (float)n; 00130 c1 = cos(alpha); 00131 s1 = sin(alpha) * s2; 00132 s->exptab[i].re = c1; 00133 s->exptab[i].im = s1; 00134 } 00135 00136 np = 1 << nbits; 00137 nblocks = np >> 3; 00138 np2 = np >> 1; 00139 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 00140 if (!s->exptab1) 00141 goto fail; 00142 q = s->exptab1; 00143 do { 00144 for(l = 0; l < np2; l += 2 * nblocks) { 00145 *q++ = s->exptab[l]; 00146 *q++ = s->exptab[l + nblocks]; 00147 00148 q->re = -s->exptab[l].im; 00149 q->im = s->exptab[l].re; 00150 q++; 00151 q->re = -s->exptab[l + nblocks].im; 00152 q->im = s->exptab[l + nblocks].re; 00153 q++; 00154 } 00155 nblocks = nblocks >> 1; 00156 } while (nblocks != 0); 00157 av_freep(&s->exptab); 00158 00159 /* compute bit reverse table */ 00160 for(i=0;i<n;i++) { 00161 m=0; 00162 for(j=0;j<nbits;j++) { 00163 m |= ((i >> j) & 1) << (nbits-j-1); 00164 } 00165 s->revtab[i]=m; 00166 } 00167 } 00168 00169 return 0; 00170 fail: 00171 av_freep(&s->revtab); 00172 av_freep(&s->exptab); 00173 av_freep(&s->exptab1); 00174 av_freep(&s->tmp_buf); 00175 return -1; 00176 } 00177 00178 void ff_fft_permute_c(FFTContext *s, FFTComplex *z) 00179 { 00180 int j, k, np; 00181 FFTComplex tmp; 00182 const uint16_t *revtab = s->revtab; 00183 np = 1 << s->nbits; 00184 00185 if (s->tmp_buf) { 00186 /* TODO: handle split-radix permute in a more optimal way, probably in-place */ 00187 for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; 00188 memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); 00189 return; 00190 } 00191 00192 /* reverse */ 00193 for(j=0;j<np;j++) { 00194 k = revtab[j]; 00195 if (k < j) { 00196 tmp = z[k]; 00197 z[k] = z[j]; 00198 z[j] = tmp; 00199 } 00200 } 00201 } 00202 00203 av_cold void ff_fft_end(FFTContext *s) 00204 { 00205 av_freep(&s->revtab); 00206 av_freep(&s->exptab); 00207 av_freep(&s->exptab1); 00208 av_freep(&s->tmp_buf); 00209 } 00210 00211 #define sqrthalf (float)M_SQRT1_2 00212 00213 #define BF(x,y,a,b) {\ 00214 x = a - b;\ 00215 y = a + b;\ 00216 } 00217 00218 #define BUTTERFLIES(a0,a1,a2,a3) {\ 00219 BF(t3, t5, t5, t1);\ 00220 BF(a2.re, a0.re, a0.re, t5);\ 00221 BF(a3.im, a1.im, a1.im, t3);\ 00222 BF(t4, t6, t2, t6);\ 00223 BF(a3.re, a1.re, a1.re, t4);\ 00224 BF(a2.im, a0.im, a0.im, t6);\ 00225 } 00226 00227 // force loading all the inputs before storing any. 00228 // this is slightly slower for small data, but avoids store->load aliasing 00229 // for addresses separated by large powers of 2. 00230 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ 00231 FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ 00232 BF(t3, t5, t5, t1);\ 00233 BF(a2.re, a0.re, r0, t5);\ 00234 BF(a3.im, a1.im, i1, t3);\ 00235 BF(t4, t6, t2, t6);\ 00236 BF(a3.re, a1.re, r1, t4);\ 00237 BF(a2.im, a0.im, i0, t6);\ 00238 } 00239 00240 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ 00241 t1 = a2.re * wre + a2.im * wim;\ 00242 t2 = a2.im * wre - a2.re * wim;\ 00243 t5 = a3.re * wre - a3.im * wim;\ 00244 t6 = a3.im * wre + a3.re * wim;\ 00245 BUTTERFLIES(a0,a1,a2,a3)\ 00246 } 00247 00248 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\ 00249 t1 = a2.re;\ 00250 t2 = a2.im;\ 00251 t5 = a3.re;\ 00252 t6 = a3.im;\ 00253 BUTTERFLIES(a0,a1,a2,a3)\ 00254 } 00255 00256 /* z[0...8n-1], w[1...2n-1] */ 00257 #define PASS(name)\ 00258 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ 00259 {\ 00260 FFTSample t1, t2, t3, t4, t5, t6;\ 00261 int o1 = 2*n;\ 00262 int o2 = 4*n;\ 00263 int o3 = 6*n;\ 00264 const FFTSample *wim = wre+o1;\ 00265 n--;\ 00266 \ 00267 TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ 00268 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ 00269 do {\ 00270 z += 2;\ 00271 wre += 2;\ 00272 wim -= 2;\ 00273 TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ 00274 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ 00275 } while(--n);\ 00276 } 00277 00278 PASS(pass) 00279 #undef BUTTERFLIES 00280 #define BUTTERFLIES BUTTERFLIES_BIG 00281 PASS(pass_big) 00282 00283 #define DECL_FFT(n,n2,n4)\ 00284 static void fft##n(FFTComplex *z)\ 00285 {\ 00286 fft##n2(z);\ 00287 fft##n4(z+n4*2);\ 00288 fft##n4(z+n4*3);\ 00289 pass(z,ff_cos_##n,n4/2);\ 00290 } 00291 00292 static void fft4(FFTComplex *z) 00293 { 00294 FFTSample t1, t2, t3, t4, t5, t6, t7, t8; 00295 00296 BF(t3, t1, z[0].re, z[1].re); 00297 BF(t8, t6, z[3].re, z[2].re); 00298 BF(z[2].re, z[0].re, t1, t6); 00299 BF(t4, t2, z[0].im, z[1].im); 00300 BF(t7, t5, z[2].im, z[3].im); 00301 BF(z[3].im, z[1].im, t4, t8); 00302 BF(z[3].re, z[1].re, t3, t7); 00303 BF(z[2].im, z[0].im, t2, t5); 00304 } 00305 00306 static void fft8(FFTComplex *z) 00307 { 00308 FFTSample t1, t2, t3, t4, t5, t6, t7, t8; 00309 00310 fft4(z); 00311 00312 BF(t1, z[5].re, z[4].re, -z[5].re); 00313 BF(t2, z[5].im, z[4].im, -z[5].im); 00314 BF(t3, z[7].re, z[6].re, -z[7].re); 00315 BF(t4, z[7].im, z[6].im, -z[7].im); 00316 BF(t8, t1, t3, t1); 00317 BF(t7, t2, t2, t4); 00318 BF(z[4].re, z[0].re, z[0].re, t1); 00319 BF(z[4].im, z[0].im, z[0].im, t2); 00320 BF(z[6].re, z[2].re, z[2].re, t7); 00321 BF(z[6].im, z[2].im, z[2].im, t8); 00322 00323 TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); 00324 } 00325 00326 #if !CONFIG_SMALL 00327 static void fft16(FFTComplex *z) 00328 { 00329 FFTSample t1, t2, t3, t4, t5, t6; 00330 00331 fft8(z); 00332 fft4(z+8); 00333 fft4(z+12); 00334 00335 TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); 00336 TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); 00337 TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); 00338 TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); 00339 } 00340 #else 00341 DECL_FFT(16,8,4) 00342 #endif 00343 DECL_FFT(32,16,8) 00344 DECL_FFT(64,32,16) 00345 DECL_FFT(128,64,32) 00346 DECL_FFT(256,128,64) 00347 DECL_FFT(512,256,128) 00348 #if !CONFIG_SMALL 00349 #define pass pass_big 00350 #endif 00351 DECL_FFT(1024,512,256) 00352 DECL_FFT(2048,1024,512) 00353 DECL_FFT(4096,2048,1024) 00354 DECL_FFT(8192,4096,2048) 00355 DECL_FFT(16384,8192,4096) 00356 DECL_FFT(32768,16384,8192) 00357 DECL_FFT(65536,32768,16384) 00358 00359 static void (* const fft_dispatch[])(FFTComplex*) = { 00360 fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, 00361 fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, 00362 }; 00363 00364 void ff_fft_calc_c(FFTContext *s, FFTComplex *z) 00365 { 00366 fft_dispatch[s->nbits-2](z); 00367 } 00368