• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/dct-test.c

Go to the documentation of this file.
00001 /*
00002  * (c) 2001 Fabrice Bellard
00003  *     2007 Marc Hoffman <marc.hoffman@analog.com>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #include "libavutil/common.h"
00036 #include "libavutil/lfg.h"
00037 
00038 #include "simple_idct.h"
00039 #include "aandcttab.h"
00040 #include "faandct.h"
00041 #include "faanidct.h"
00042 #include "x86/idct_xvid.h"
00043 #include "dctref.h"
00044 
00045 #undef printf
00046 
00047 void ff_mmx_idct(DCTELEM *data);
00048 void ff_mmxext_idct(DCTELEM *data);
00049 
00050 void odivx_idct_c(short *block);
00051 
00052 // BFIN
00053 void ff_bfin_idct(DCTELEM *block);
00054 void ff_bfin_fdct(DCTELEM *block);
00055 
00056 // ALTIVEC
00057 void fdct_altivec(DCTELEM *block);
00058 //void idct_altivec(DCTELEM *block);?? no routine
00059 
00060 // ARM
00061 void ff_j_rev_dct_arm(DCTELEM *data);
00062 void ff_simple_idct_arm(DCTELEM *data);
00063 void ff_simple_idct_armv5te(DCTELEM *data);
00064 void ff_simple_idct_armv6(DCTELEM *data);
00065 void ff_simple_idct_neon(DCTELEM *data);
00066 
00067 void ff_simple_idct_axp(DCTELEM *data);
00068 
00069 struct algo {
00070   const char *name;
00071   enum { FDCT, IDCT } is_idct;
00072   void (* func) (DCTELEM *block);
00073   void (* ref)  (DCTELEM *block);
00074   enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
00075   int  mm_support;
00076 };
00077 
00078 #ifndef FAAN_POSTSCALE
00079 #define FAAN_SCALE SCALE_PERM
00080 #else
00081 #define FAAN_SCALE NO_PERM
00082 #endif
00083 
00084 static int cpu_flags;
00085 
00086 struct algo algos[] = {
00087   {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
00088   {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
00089   {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
00090   {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
00091   {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
00092   {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
00093   {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
00094   {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},
00095 
00096 #if HAVE_MMX
00097   {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
00098 #if HAVE_MMX2
00099   {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMX2},
00100   {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
00101 #endif
00102 
00103 #if CONFIG_GPL
00104   {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
00105   {"LIBMPEG2-MMX2",   1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMX2},
00106 #endif
00107   {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
00108   {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
00109   {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMX2},
00110   {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
00111 #endif
00112 
00113 #if HAVE_ALTIVEC
00114   {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
00115 #endif
00116 
00117 #if ARCH_BFIN
00118   {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
00119   {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
00120 #endif
00121 
00122 #if ARCH_ARM
00123   {"SIMPLE-ARM",      1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
00124   {"INT-ARM",         1, ff_j_rev_dct_arm,   ff_ref_idct, MMX_PERM },
00125 #if HAVE_ARMV5TE
00126   {"SIMPLE-ARMV5TE",  1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
00127 #endif
00128 #if HAVE_ARMV6
00129   {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
00130 #endif
00131 #if HAVE_NEON
00132   {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
00133 #endif
00134 #endif /* ARCH_ARM */
00135 
00136 #if ARCH_ALPHA
00137   {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
00138 #endif
00139 
00140   { 0 }
00141 };
00142 
00143 #define AANSCALE_BITS 12
00144 
00145 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00146 
00147 static int64_t gettime(void)
00148 {
00149     struct timeval tv;
00150     gettimeofday(&tv,NULL);
00151     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00152 }
00153 
00154 #define NB_ITS 20000
00155 #define NB_ITS_SPEED 50000
00156 
00157 static short idct_mmx_perm[64];
00158 
00159 static short idct_simple_mmx_perm[64]={
00160         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00161         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00162         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00163         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00164         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00165         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00166         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00167         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00168 };
00169 
00170 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00171 
00172 static void idct_mmx_init(void)
00173 {
00174     int i;
00175 
00176     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
00177     for (i = 0; i < 64; i++) {
00178         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00179 //        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
00180     }
00181 }
00182 
00183 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
00184 DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
00185 DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
00186 
00187 static inline void mmx_emms(void)
00188 {
00189 #if HAVE_MMX
00190     if (cpu_flags & FF_MM_MMX)
00191         __asm__ volatile ("emms\n\t");
00192 #endif
00193 }
00194 
00195 static void dct_error(const char *name, int is_idct,
00196                void (*fdct_func)(DCTELEM *block),
00197                void (*fdct_ref)(DCTELEM *block), int form, int test)
00198 {
00199     int it, i, scale;
00200     int err_inf, v;
00201     int64_t err2, ti, ti1, it1;
00202     int64_t sysErr[64], sysErrMax=0;
00203     int maxout=0;
00204     int blockSumErrMax=0, blockSumErr;
00205     AVLFG prng;
00206 
00207     av_lfg_init(&prng, 1);
00208 
00209     err_inf = 0;
00210     err2 = 0;
00211     for(i=0; i<64; i++) sysErr[i]=0;
00212     for(it=0;it<NB_ITS;it++) {
00213         for(i=0;i<64;i++)
00214             block1[i] = 0;
00215         switch(test){
00216         case 0:
00217             for(i=0;i<64;i++)
00218                 block1[i] = (av_lfg_get(&prng) % 512) -256;
00219             if (is_idct){
00220                 ff_ref_fdct(block1);
00221 
00222                 for(i=0;i<64;i++)
00223                     block1[i]>>=3;
00224             }
00225         break;
00226         case 1:{
00227             int num = av_lfg_get(&prng) % 10 + 1;
00228             for(i=0;i<num;i++)
00229                 block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
00230         }break;
00231         case 2:
00232             block1[0] = av_lfg_get(&prng) % 4096 - 2048;
00233             block1[63]= (block1[0]&1)^1;
00234         break;
00235         }
00236 
00237 #if 0 // simulate mismatch control
00238 { int sum=0;
00239         for(i=0;i<64;i++)
00240            sum+=block1[i];
00241 
00242         if((sum&1)==0) block1[63]^=1;
00243 }
00244 #endif
00245 
00246         for(i=0; i<64; i++)
00247             block_org[i]= block1[i];
00248 
00249         if (form == MMX_PERM) {
00250             for(i=0;i<64;i++)
00251                 block[idct_mmx_perm[i]] = block1[i];
00252             } else if (form == MMX_SIMPLE_PERM) {
00253             for(i=0;i<64;i++)
00254                 block[idct_simple_mmx_perm[i]] = block1[i];
00255 
00256         } else if (form == SSE2_PERM) {
00257             for(i=0; i<64; i++)
00258                 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
00259         } else if (form == PARTTRANS_PERM) {
00260             for(i=0; i<64; i++)
00261                 block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
00262         } else {
00263             for(i=0; i<64; i++)
00264                 block[i]= block1[i];
00265         }
00266 #if 0 // simulate mismatch control for tested IDCT but not the ref
00267 { int sum=0;
00268         for(i=0;i<64;i++)
00269            sum+=block[i];
00270 
00271         if((sum&1)==0) block[63]^=1;
00272 }
00273 #endif
00274 
00275         fdct_func(block);
00276         mmx_emms();
00277 
00278         if (form == SCALE_PERM) {
00279             for(i=0; i<64; i++) {
00280                 scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
00281                 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
00282             }
00283         }
00284 
00285         fdct_ref(block1);
00286 
00287         blockSumErr=0;
00288         for(i=0;i<64;i++) {
00289             v = abs(block[i] - block1[i]);
00290             if (v > err_inf)
00291                 err_inf = v;
00292             err2 += v * v;
00293             sysErr[i] += block[i] - block1[i];
00294             blockSumErr += v;
00295             if( abs(block[i])>maxout) maxout=abs(block[i]);
00296         }
00297         if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00298 #if 0 // print different matrix pairs
00299         if(blockSumErr){
00300             printf("\n");
00301             for(i=0; i<64; i++){
00302                 if((i&7)==0) printf("\n");
00303                 printf("%4d ", block_org[i]);
00304             }
00305             for(i=0; i<64; i++){
00306                 if((i&7)==0) printf("\n");
00307                 printf("%4d ", block[i] - block1[i]);
00308             }
00309         }
00310 #endif
00311     }
00312     for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
00313 
00314 #if 1 // dump systematic errors
00315     for(i=0; i<64; i++){
00316         if(i%8==0) printf("\n");
00317         printf("%7d ", (int)sysErr[i]);
00318     }
00319     printf("\n");
00320 #endif
00321 
00322     printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00323            is_idct ? "IDCT" : "DCT",
00324            name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00325 #if 1 //Speed test
00326     /* speed test */
00327     for(i=0;i<64;i++)
00328         block1[i] = 0;
00329     switch(test){
00330     case 0:
00331         for(i=0;i<64;i++)
00332             block1[i] = av_lfg_get(&prng) % 512 -256;
00333         if (is_idct){
00334             ff_ref_fdct(block1);
00335 
00336             for(i=0;i<64;i++)
00337                 block1[i]>>=3;
00338         }
00339     break;
00340     case 1:{
00341     case 2:
00342         block1[0] = av_lfg_get(&prng) % 512 -256;
00343         block1[1] = av_lfg_get(&prng) % 512 -256;
00344         block1[2] = av_lfg_get(&prng) % 512 -256;
00345         block1[3] = av_lfg_get(&prng) % 512 -256;
00346     }break;
00347     }
00348 
00349     if (form == MMX_PERM) {
00350         for(i=0;i<64;i++)
00351             block[idct_mmx_perm[i]] = block1[i];
00352     } else if(form == MMX_SIMPLE_PERM) {
00353         for(i=0;i<64;i++)
00354             block[idct_simple_mmx_perm[i]] = block1[i];
00355     } else {
00356         for(i=0; i<64; i++)
00357             block[i]= block1[i];
00358     }
00359 
00360     ti = gettime();
00361     it1 = 0;
00362     do {
00363         for(it=0;it<NB_ITS_SPEED;it++) {
00364             for(i=0; i<64; i++)
00365                 block[i]= block1[i];
00366 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00367 // do not memcpy especially not fastmemcpy because it does movntq !!!
00368             fdct_func(block);
00369         }
00370         it1 += NB_ITS_SPEED;
00371         ti1 = gettime() - ti;
00372     } while (ti1 < 1000000);
00373     mmx_emms();
00374 
00375     printf("%s %s: %0.1f kdct/s\n",
00376            is_idct ? "IDCT" : "DCT",
00377            name, (double)it1 * 1000.0 / (double)ti1);
00378 #endif
00379 }
00380 
00381 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
00382 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
00383 
00384 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00385 {
00386     static int init;
00387     static double c8[8][8];
00388     static double c4[4][4];
00389     double block1[64], block2[64], block3[64];
00390     double s, sum, v;
00391     int i, j, k;
00392 
00393     if (!init) {
00394         init = 1;
00395 
00396         for(i=0;i<8;i++) {
00397             sum = 0;
00398             for(j=0;j<8;j++) {
00399                 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00400                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00401                 sum += c8[i][j] * c8[i][j];
00402             }
00403         }
00404 
00405         for(i=0;i<4;i++) {
00406             sum = 0;
00407             for(j=0;j<4;j++) {
00408                 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00409                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00410                 sum += c4[i][j] * c4[i][j];
00411             }
00412         }
00413     }
00414 
00415     /* butterfly */
00416     s = 0.5 * sqrt(2.0);
00417     for(i=0;i<4;i++) {
00418         for(j=0;j<8;j++) {
00419             block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00420             block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00421         }
00422     }
00423 
00424     /* idct8 on lines */
00425     for(i=0;i<8;i++) {
00426         for(j=0;j<8;j++) {
00427             sum = 0;
00428             for(k=0;k<8;k++)
00429                 sum += c8[k][j] * block1[8*i+k];
00430             block2[8*i+j] = sum;
00431         }
00432     }
00433 
00434     /* idct4 */
00435     for(i=0;i<8;i++) {
00436         for(j=0;j<4;j++) {
00437             /* top */
00438             sum = 0;
00439             for(k=0;k<4;k++)
00440                 sum += c4[k][j] * block2[8*(2*k)+i];
00441             block3[8*(2*j)+i] = sum;
00442 
00443             /* bottom */
00444             sum = 0;
00445             for(k=0;k<4;k++)
00446                 sum += c4[k][j] * block2[8*(2*k+1)+i];
00447             block3[8*(2*j+1)+i] = sum;
00448         }
00449     }
00450 
00451     /* clamp and store the result */
00452     for(i=0;i<8;i++) {
00453         for(j=0;j<8;j++) {
00454             v = block3[8*i+j];
00455             if (v < 0)
00456                 v = 0;
00457             else if (v > 255)
00458                 v = 255;
00459             dest[i * linesize + j] = (int)rint(v);
00460         }
00461     }
00462 }
00463 
00464 static void idct248_error(const char *name,
00465                     void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00466 {
00467     int it, i, it1, ti, ti1, err_max, v;
00468 
00469     AVLFG prng;
00470 
00471     av_lfg_init(&prng, 1);
00472 
00473     /* just one test to see if code is correct (precision is less
00474        important here) */
00475     err_max = 0;
00476     for(it=0;it<NB_ITS;it++) {
00477 
00478         /* XXX: use forward transform to generate values */
00479         for(i=0;i<64;i++)
00480             block1[i] = av_lfg_get(&prng) % 256 - 128;
00481         block1[0] += 1024;
00482 
00483         for(i=0; i<64; i++)
00484             block[i]= block1[i];
00485         idct248_ref(img_dest1, 8, block);
00486 
00487         for(i=0; i<64; i++)
00488             block[i]= block1[i];
00489         idct248_put(img_dest, 8, block);
00490 
00491         for(i=0;i<64;i++) {
00492             v = abs((int)img_dest[i] - (int)img_dest1[i]);
00493             if (v == 255)
00494                 printf("%d %d\n", img_dest[i], img_dest1[i]);
00495             if (v > err_max)
00496                 err_max = v;
00497         }
00498 #if 0
00499         printf("ref=\n");
00500         for(i=0;i<8;i++) {
00501             int j;
00502             for(j=0;j<8;j++) {
00503                 printf(" %3d", img_dest1[i*8+j]);
00504             }
00505             printf("\n");
00506         }
00507 
00508         printf("out=\n");
00509         for(i=0;i<8;i++) {
00510             int j;
00511             for(j=0;j<8;j++) {
00512                 printf(" %3d", img_dest[i*8+j]);
00513             }
00514             printf("\n");
00515         }
00516 #endif
00517     }
00518     printf("%s %s: err_inf=%d\n",
00519            1 ? "IDCT248" : "DCT248",
00520            name, err_max);
00521 
00522     ti = gettime();
00523     it1 = 0;
00524     do {
00525         for(it=0;it<NB_ITS_SPEED;it++) {
00526             for(i=0; i<64; i++)
00527                 block[i]= block1[i];
00528 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00529 // do not memcpy especially not fastmemcpy because it does movntq !!!
00530             idct248_put(img_dest, 8, block);
00531         }
00532         it1 += NB_ITS_SPEED;
00533         ti1 = gettime() - ti;
00534     } while (ti1 < 1000000);
00535     mmx_emms();
00536 
00537     printf("%s %s: %0.1f kdct/s\n",
00538            1 ? "IDCT248" : "DCT248",
00539            name, (double)it1 * 1000.0 / (double)ti1);
00540 }
00541 
00542 static void help(void)
00543 {
00544     printf("dct-test [-i] [<test-number>]\n"
00545            "test-number 0 -> test with random matrixes\n"
00546            "            1 -> test with random sparse matrixes\n"
00547            "            2 -> do 3. test from mpeg4 std\n"
00548            "-i          test IDCT implementations\n"
00549            "-4          test IDCT248 implementations\n");
00550 }
00551 
00552 int main(int argc, char **argv)
00553 {
00554     int test_idct = 0, test_248_dct = 0;
00555     int c,i;
00556     int test=1;
00557     cpu_flags = mm_support();
00558 
00559     ff_ref_dct_init();
00560     idct_mmx_init();
00561 
00562     for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00563     for(i=0;i<MAX_NEG_CROP;i++) {
00564         cropTbl[i] = 0;
00565         cropTbl[i + MAX_NEG_CROP + 256] = 255;
00566     }
00567 
00568     for(;;) {
00569         c = getopt(argc, argv, "ih4");
00570         if (c == -1)
00571             break;
00572         switch(c) {
00573         case 'i':
00574             test_idct = 1;
00575             break;
00576         case '4':
00577             test_248_dct = 1;
00578             break;
00579         default :
00580         case 'h':
00581             help();
00582             return 0;
00583         }
00584     }
00585 
00586     if(optind <argc) test= atoi(argv[optind]);
00587 
00588     printf("ffmpeg DCT/IDCT test\n");
00589 
00590     if (test_248_dct) {
00591         idct248_error("SIMPLE-C", ff_simple_idct248_put);
00592     } else {
00593       for (i=0;algos[i].name;i++)
00594         if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
00595           dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
00596         }
00597     }
00598     return 0;
00599 }

Generated on Fri Sep 16 2011 17:17:35 for FFmpeg by  doxygen 1.7.1