Libav
|
00001 /* 00002 * FFT/IFFT transforms 00003 * AltiVec-enabled 00004 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> 00005 * Based on code Copyright (c) 2002 Fabrice Bellard 00006 * 00007 * This file is part of FFmpeg. 00008 * 00009 * FFmpeg is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * FFmpeg is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with FFmpeg; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 #include "libavcodec/fft.h" 00024 #include "dsputil_ppc.h" 00025 #include "util_altivec.h" 00026 #include "dsputil_altivec.h" 00027 00039 static void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) 00040 { 00041 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); 00042 register const vector float vczero = (const vector float)vec_splat_u32(0.); 00043 00044 int ln = s->nbits; 00045 int j, np, np2; 00046 int nblocks, nloops; 00047 register FFTComplex *p, *q; 00048 FFTComplex *cptr, *cptr1; 00049 int k; 00050 00051 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); 00052 00053 np = 1 << ln; 00054 00055 { 00056 vector float *r, a, b, a1, c1, c2; 00057 00058 r = (vector float *)&z[0]; 00059 00060 c1 = vcii(p,p,n,n); 00061 00062 if (s->inverse) { 00063 c2 = vcii(p,p,n,p); 00064 } else { 00065 c2 = vcii(p,p,p,n); 00066 } 00067 00068 j = (np >> 2); 00069 do { 00070 a = vec_ld(0, r); 00071 a1 = vec_ld(sizeof(vector float), r); 00072 00073 b = vec_perm(a,a,vcprmle(1,0,3,2)); 00074 a = vec_madd(a,c1,b); 00075 /* do the pass 0 butterfly */ 00076 00077 b = vec_perm(a1,a1,vcprmle(1,0,3,2)); 00078 b = vec_madd(a1,c1,b); 00079 /* do the pass 0 butterfly */ 00080 00081 /* multiply third by -i */ 00082 b = vec_perm(b,b,vcprmle(2,3,1,0)); 00083 00084 /* do the pass 1 butterfly */ 00085 vec_st(vec_madd(b,c2,a), 0, r); 00086 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r); 00087 00088 r += 2; 00089 } while (--j != 0); 00090 } 00091 /* pass 2 .. ln-1 */ 00092 00093 nblocks = np >> 3; 00094 nloops = 1 << 2; 00095 np2 = np >> 1; 00096 00097 cptr1 = s->exptab1; 00098 do { 00099 p = z; 00100 q = z + nloops; 00101 j = nblocks; 00102 do { 00103 cptr = cptr1; 00104 k = nloops >> 1; 00105 do { 00106 vector float a,b,c,t1; 00107 00108 a = vec_ld(0, (float*)p); 00109 b = vec_ld(0, (float*)q); 00110 00111 /* complex mul */ 00112 c = vec_ld(0, (float*)cptr); 00113 /* cre*re cim*re */ 00114 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero); 00115 c = vec_ld(sizeof(vector float), (float*)cptr); 00116 /* -cim*im cre*im */ 00117 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1); 00118 00119 /* butterfly */ 00120 vec_st(vec_add(a,b), 0, (float*)p); 00121 vec_st(vec_sub(a,b), 0, (float*)q); 00122 00123 p += 2; 00124 q += 2; 00125 cptr += 4; 00126 } while (--k); 00127 00128 p += nloops; 00129 q += nloops; 00130 } while (--j); 00131 cptr1 += nloops * 2; 00132 nblocks = nblocks >> 1; 00133 nloops = nloops << 1; 00134 } while (nblocks != 0); 00135 00136 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); 00137 } 00138 00139 av_cold void ff_fft_init_altivec(FFTContext *s) 00140 { 00141 s->fft_calc = ff_fft_calc_altivec; 00142 s->split_radix = 0; 00143 }