Libav

libpostproc/postprocess.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use the Subversion log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 //#undef HAVE_MMX2
00083 //#define HAVE_AMD3DNOW
00084 //#undef HAVE_MMX
00085 //#undef ARCH_X86
00086 //#define DEBUG_BRIGHTNESS
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090 
00091 unsigned postproc_version(void)
00092 {
00093     return LIBPOSTPROC_VERSION_INT;
00094 }
00095 
00096 const char *postproc_configuration(void)
00097 {
00098     return FFMPEG_CONFIGURATION;
00099 }
00100 
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106 
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110 
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00116 
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127 
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129 
00130 
00131 static struct PPFilter filters[]=
00132 {
00133     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00134     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00135 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00136     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00137     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00138     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00139     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00140     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00141     {"dr", "dering",                1, 5, 6, DERING},
00142     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00143     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00147     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00148     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00149     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00150     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00151     {NULL, NULL,0,0,0,0} //End Marker
00152 };
00153 
00154 static const char *replaceTable[]=
00155 {
00156     "default",      "hb:a,vb:a,dr:a",
00157     "de",           "hb:a,vb:a,dr:a",
00158     "fast",         "h1:a,v1:a,dr:a",
00159     "fa",           "h1:a,v1:a,dr:a",
00160     "ac",           "ha:a:128:7,va:a,dr:a",
00161     NULL //End Marker
00162 };
00163 
00164 
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168     __asm__ volatile(   "prefetchnta (%0)\n\t"
00169         : : "r" (p)
00170     );
00171 }
00172 
00173 static inline void prefetcht0(void *p)
00174 {
00175     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00176         : : "r" (p)
00177     );
00178 }
00179 
00180 static inline void prefetcht1(void *p)
00181 {
00182     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00183         : : "r" (p)
00184     );
00185 }
00186 
00187 static inline void prefetcht2(void *p)
00188 {
00189     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00190         : : "r" (p)
00191     );
00192 }
00193 #endif
00194 
00195 /* The horizontal functions exist only in C because the MMX
00196  * code is faster with vertical filters and transposing. */
00197 
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203     int numEq= 0;
00204     int y;
00205     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206     const int dcThreshold= dcOffset*2 + 1;
00207 
00208     for(y=0; y<BLOCK_SIZE; y++){
00209         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216         src+= stride;
00217     }
00218     return numEq > c->ppMode.flatnessThreshold;
00219 }
00220 
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226     int numEq= 0;
00227     int y;
00228     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229     const int dcThreshold= dcOffset*2 + 1;
00230 
00231     src+= stride*4; // src points to begin of the 8x8 Block
00232     for(y=0; y<BLOCK_SIZE-1; y++){
00233         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241         src+= stride;
00242     }
00243     return numEq > c->ppMode.flatnessThreshold;
00244 }
00245 
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248     int i;
00249 #if 1
00250     for(i=0; i<2; i++){
00251         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00252         src += stride;
00253         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259     }
00260 #else
00261     for(i=0; i<8; i++){
00262         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00263         src += stride;
00264     }
00265 #endif
00266     return 1;
00267 }
00268 
00269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00270 {
00271 #if 1
00272 #if 1
00273     int x;
00274     src+= stride*4;
00275     for(x=0; x<BLOCK_SIZE; x+=4){
00276         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00277         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00278         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00279         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00280     }
00281 #else
00282     int x;
00283     src+= stride*3;
00284     for(x=0; x<BLOCK_SIZE; x++){
00285         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00286     }
00287 #endif
00288     return 1;
00289 #else
00290     int x;
00291     src+= stride*4;
00292     for(x=0; x<BLOCK_SIZE; x++){
00293         int min=255;
00294         int max=0;
00295         int y;
00296         for(y=0; y<8; y++){
00297             int v= src[x + y*stride];
00298             if(v>max) max=v;
00299             if(v<min) min=v;
00300         }
00301         if(max-min > 2*QP) return 0;
00302     }
00303     return 1;
00304 #endif
00305 }
00306 
00307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309     if( isHorizDC_C(src, stride, c) ){
00310         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00311             return 1;
00312         else
00313             return 0;
00314     }else{
00315         return 2;
00316     }
00317 }
00318 
00319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00320 {
00321     if( isVertDC_C(src, stride, c) ){
00322         if( isVertMinMaxOk_C(src, stride, c->QP) )
00323             return 1;
00324         else
00325             return 0;
00326     }else{
00327         return 2;
00328     }
00329 }
00330 
00331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00332 {
00333     int y;
00334     for(y=0; y<BLOCK_SIZE; y++){
00335         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00336 
00337         if(FFABS(middleEnergy) < 8*c->QP){
00338             const int q=(dst[3] - dst[4])/2;
00339             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00340             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00341 
00342             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00343             d= FFMAX(d, 0);
00344 
00345             d= (5*d + 32) >> 6;
00346             d*= FFSIGN(-middleEnergy);
00347 
00348             if(q>0)
00349             {
00350                 d= d<0 ? 0 : d;
00351                 d= d>q ? q : d;
00352             }
00353             else
00354             {
00355                 d= d>0 ? 0 : d;
00356                 d= d<q ? q : d;
00357             }
00358 
00359             dst[3]-= d;
00360             dst[4]+= d;
00361         }
00362         dst+= stride;
00363     }
00364 }
00365 
00370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00371 {
00372     int y;
00373     for(y=0; y<BLOCK_SIZE; y++){
00374         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00375         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00376 
00377         int sums[10];
00378         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00379         sums[1] = sums[0] - first  + dst[3];
00380         sums[2] = sums[1] - first  + dst[4];
00381         sums[3] = sums[2] - first  + dst[5];
00382         sums[4] = sums[3] - first  + dst[6];
00383         sums[5] = sums[4] - dst[0] + dst[7];
00384         sums[6] = sums[5] - dst[1] + last;
00385         sums[7] = sums[6] - dst[2] + last;
00386         sums[8] = sums[7] - dst[3] + last;
00387         sums[9] = sums[8] - dst[4] + last;
00388 
00389         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00390         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00391         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00392         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00393         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00394         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00395         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00396         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00397 
00398         dst+= stride;
00399     }
00400 }
00401 
00410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00411 {
00412     int y;
00413     static uint64_t *lut= NULL;
00414     if(lut==NULL)
00415     {
00416         int i;
00417         lut = av_malloc(256*8);
00418         for(i=0; i<256; i++)
00419         {
00420             int v= i < 128 ? 2*i : 2*(i-256);
00421 /*
00422 //Simulate 112242211 9-Tap filter
00423             uint64_t a= (v/16)  & 0xFF;
00424             uint64_t b= (v/8)   & 0xFF;
00425             uint64_t c= (v/4)   & 0xFF;
00426             uint64_t d= (3*v/8) & 0xFF;
00427 */
00428 //Simulate piecewise linear interpolation
00429             uint64_t a= (v/16)   & 0xFF;
00430             uint64_t b= (v*3/16) & 0xFF;
00431             uint64_t c= (v*5/16) & 0xFF;
00432             uint64_t d= (7*v/16) & 0xFF;
00433             uint64_t A= (0x100 - a)&0xFF;
00434             uint64_t B= (0x100 - b)&0xFF;
00435             uint64_t C= (0x100 - c)&0xFF;
00436             uint64_t D= (0x100 - c)&0xFF;
00437 
00438             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00439                        (D<<24) | (C<<16) | (B<<8)  | (A);
00440             //lut[i] = (v<<32) | (v<<24);
00441         }
00442     }
00443 
00444     for(y=0; y<BLOCK_SIZE; y++){
00445         int a= src[1] - src[2];
00446         int b= src[3] - src[4];
00447         int c= src[5] - src[6];
00448 
00449         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00450 
00451         if(d < QP){
00452             int v = d * FFSIGN(-b);
00453 
00454             src[1] +=v/8;
00455             src[2] +=v/4;
00456             src[3] +=3*v/8;
00457             src[4] -=3*v/8;
00458             src[5] -=v/4;
00459             src[6] -=v/8;
00460         }
00461         src+=stride;
00462     }
00463 }
00464 
00468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00469     int y;
00470     const int QP= c->QP;
00471     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00472     const int dcThreshold= dcOffset*2 + 1;
00473 //START_TIMER
00474     src+= step*4; // src points to begin of the 8x8 Block
00475     for(y=0; y<8; y++){
00476         int numEq= 0;
00477 
00478         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00479         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00480         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00481         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00482         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00483         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00484         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00485         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00486         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00487         if(numEq > c->ppMode.flatnessThreshold){
00488             int min, max, x;
00489 
00490             if(src[0] > src[step]){
00491                 max= src[0];
00492                 min= src[step];
00493             }else{
00494                 max= src[step];
00495                 min= src[0];
00496             }
00497             for(x=2; x<8; x+=2){
00498                 if(src[x*step] > src[(x+1)*step]){
00499                         if(src[x    *step] > max) max= src[ x   *step];
00500                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00501                 }else{
00502                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00503                         if(src[ x   *step] < min) min= src[ x   *step];
00504                 }
00505             }
00506             if(max-min < 2*QP){
00507                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00508                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00509 
00510                 int sums[10];
00511                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00512                 sums[1] = sums[0] - first       + src[3*step];
00513                 sums[2] = sums[1] - first       + src[4*step];
00514                 sums[3] = sums[2] - first       + src[5*step];
00515                 sums[4] = sums[3] - first       + src[6*step];
00516                 sums[5] = sums[4] - src[0*step] + src[7*step];
00517                 sums[6] = sums[5] - src[1*step] + last;
00518                 sums[7] = sums[6] - src[2*step] + last;
00519                 sums[8] = sums[7] - src[3*step] + last;
00520                 sums[9] = sums[8] - src[4*step] + last;
00521 
00522                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00523                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00524                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00525                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00526                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00527                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00528                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00529                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00530             }
00531         }else{
00532             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00533 
00534             if(FFABS(middleEnergy) < 8*QP){
00535                 const int q=(src[3*step] - src[4*step])/2;
00536                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00537                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00538 
00539                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00540                 d= FFMAX(d, 0);
00541 
00542                 d= (5*d + 32) >> 6;
00543                 d*= FFSIGN(-middleEnergy);
00544 
00545                 if(q>0){
00546                     d= d<0 ? 0 : d;
00547                     d= d>q ? q : d;
00548                 }else{
00549                     d= d>0 ? 0 : d;
00550                     d= d<q ? q : d;
00551                 }
00552 
00553                 src[3*step]-= d;
00554                 src[4*step]+= d;
00555             }
00556         }
00557 
00558         src += stride;
00559     }
00560 /*if(step==16){
00561     STOP_TIMER("step16")
00562 }else{
00563     STOP_TIMER("stepX")
00564 }*/
00565 }
00566 
00567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00568 //Plain C versions
00569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00570 #define COMPILE_C
00571 #endif
00572 
00573 #if HAVE_ALTIVEC
00574 #define COMPILE_ALTIVEC
00575 #endif //HAVE_ALTIVEC
00576 
00577 #if ARCH_X86
00578 
00579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00580 #define COMPILE_MMX
00581 #endif
00582 
00583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00584 #define COMPILE_MMX2
00585 #endif
00586 
00587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00588 #define COMPILE_3DNOW
00589 #endif
00590 #endif /* ARCH_X86 */
00591 
00592 #undef HAVE_MMX
00593 #define HAVE_MMX 0
00594 #undef HAVE_MMX2
00595 #define HAVE_MMX2 0
00596 #undef HAVE_AMD3DNOW
00597 #define HAVE_AMD3DNOW 0
00598 #undef HAVE_ALTIVEC
00599 #define HAVE_ALTIVEC 0
00600 
00601 #ifdef COMPILE_C
00602 #define RENAME(a) a ## _C
00603 #include "postprocess_template.c"
00604 #endif
00605 
00606 #ifdef COMPILE_ALTIVEC
00607 #undef RENAME
00608 #undef HAVE_ALTIVEC
00609 #define HAVE_ALTIVEC 1
00610 #define RENAME(a) a ## _altivec
00611 #include "postprocess_altivec_template.c"
00612 #include "postprocess_template.c"
00613 #endif
00614 
00615 //MMX versions
00616 #ifdef COMPILE_MMX
00617 #undef RENAME
00618 #undef HAVE_MMX
00619 #define HAVE_MMX 1
00620 #define RENAME(a) a ## _MMX
00621 #include "postprocess_template.c"
00622 #endif
00623 
00624 //MMX2 versions
00625 #ifdef COMPILE_MMX2
00626 #undef RENAME
00627 #undef HAVE_MMX
00628 #undef HAVE_MMX2
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 1
00631 #define RENAME(a) a ## _MMX2
00632 #include "postprocess_template.c"
00633 #endif
00634 
00635 //3DNOW versions
00636 #ifdef COMPILE_3DNOW
00637 #undef RENAME
00638 #undef HAVE_MMX
00639 #undef HAVE_MMX2
00640 #undef HAVE_AMD3DNOW
00641 #define HAVE_MMX 1
00642 #define HAVE_MMX2 0
00643 #define HAVE_AMD3DNOW 1
00644 #define RENAME(a) a ## _3DNow
00645 #include "postprocess_template.c"
00646 #endif
00647 
00648 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00649 
00650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00651         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00652 {
00653     PPContext *c= (PPContext *)vc;
00654     PPMode *ppMode= (PPMode *)vm;
00655     c->ppMode= *ppMode; //FIXME
00656 
00657     // Using ifs here as they are faster than function pointers although the
00658     // difference would not be measurable here but it is much better because
00659     // someone might exchange the CPU whithout restarting MPlayer ;)
00660 #if CONFIG_RUNTIME_CPUDETECT
00661 #if ARCH_X86
00662     // ordered per speed fastest first
00663     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00664         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00666         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00667     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00668         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00669     else
00670         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00671 #else
00672 #if HAVE_ALTIVEC
00673     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00674             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00675     else
00676 #endif
00677             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #else //CONFIG_RUNTIME_CPUDETECT
00680 #if   HAVE_MMX2
00681             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00682 #elif HAVE_AMD3DNOW
00683             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00684 #elif HAVE_MMX
00685             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00686 #elif HAVE_ALTIVEC
00687             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00688 #else
00689             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00690 #endif
00691 #endif //!CONFIG_RUNTIME_CPUDETECT
00692 }
00693 
00694 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00695 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00696 
00697 /* -pp Command line Help
00698 */
00699 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00700 const char *const pp_help=
00701 #else
00702 const char pp_help[] =
00703 #endif
00704 "Available postprocessing filters:\n"
00705 "Filters                        Options\n"
00706 "short  long name       short   long option     Description\n"
00707 "*      *               a       autoq           CPU power dependent enabler\n"
00708 "                       c       chrom           chrominance filtering enabled\n"
00709 "                       y       nochrom         chrominance filtering disabled\n"
00710 "                       n       noluma          luma filtering disabled\n"
00711 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00712 "       1. difference factor: default=32, higher -> more deblocking\n"
00713 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00714 "                       the h & v deblocking filters share these\n"
00715 "                       so you can't set different thresholds for h / v\n"
00716 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00717 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00718 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00719 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00720 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00721 "dr     dering                                  deringing filter\n"
00722 "al     autolevels                              automatic brightness / contrast\n"
00723 "                       f        fullyrange     stretch luminance to (0..255)\n"
00724 "lb     linblenddeint                           linear blend deinterlacer\n"
00725 "li     linipoldeint                            linear interpolating deinterlace\n"
00726 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00727 "md     mediandeint                             median deinterlacer\n"
00728 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00729 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00730 "de     default                                 hb:a,vb:a,dr:a\n"
00731 "fa     fast                                    h1:a,v1:a,dr:a\n"
00732 "ac                                             ha:a:128:7,va:a,dr:a\n"
00733 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00734 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00735 "fq     forceQuant      <quantizer>             force quantizer\n"
00736 "Usage:\n"
00737 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00738 "long form example:\n"
00739 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00740 "short form example:\n"
00741 "vb:a/hb:a/lb                                   de,-vb\n"
00742 "more examples:\n"
00743 "tn:64:128:256\n"
00744 "\n"
00745 ;
00746 
00747 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00748 {
00749     char temp[GET_MODE_BUFFER_SIZE];
00750     char *p= temp;
00751     static const char filterDelimiters[] = ",/";
00752     static const char optionDelimiters[] = ":";
00753     struct PPMode *ppMode;
00754     char *filterToken;
00755 
00756     ppMode= av_malloc(sizeof(PPMode));
00757 
00758     ppMode->lumMode= 0;
00759     ppMode->chromMode= 0;
00760     ppMode->maxTmpNoise[0]= 700;
00761     ppMode->maxTmpNoise[1]= 1500;
00762     ppMode->maxTmpNoise[2]= 3000;
00763     ppMode->maxAllowedY= 234;
00764     ppMode->minAllowedY= 16;
00765     ppMode->baseDcDiff= 256/8;
00766     ppMode->flatnessThreshold= 56-16-1;
00767     ppMode->maxClippedThreshold= 0.01;
00768     ppMode->error=0;
00769 
00770     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00771     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00772 
00773     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00774 
00775     for(;;){
00776         char *filterName;
00777         int q= 1000000; //PP_QUALITY_MAX;
00778         int chrom=-1;
00779         int luma=-1;
00780         char *option;
00781         char *options[OPTIONS_ARRAY_SIZE];
00782         int i;
00783         int filterNameOk=0;
00784         int numOfUnknownOptions=0;
00785         int enable=1; //does the user want us to enabled or disabled the filter
00786 
00787         filterToken= strtok(p, filterDelimiters);
00788         if(filterToken == NULL) break;
00789         p+= strlen(filterToken) + 1; // p points to next filterToken
00790         filterName= strtok(filterToken, optionDelimiters);
00791         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00792 
00793         if(*filterName == '-'){
00794             enable=0;
00795             filterName++;
00796         }
00797 
00798         for(;;){ //for all options
00799             option= strtok(NULL, optionDelimiters);
00800             if(option == NULL) break;
00801 
00802             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00803             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00804             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00805             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00806             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00807             else{
00808                 options[numOfUnknownOptions] = option;
00809                 numOfUnknownOptions++;
00810             }
00811             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00812         }
00813         options[numOfUnknownOptions] = NULL;
00814 
00815         /* replace stuff from the replace Table */
00816         for(i=0; replaceTable[2*i]!=NULL; i++){
00817             if(!strcmp(replaceTable[2*i], filterName)){
00818                 int newlen= strlen(replaceTable[2*i + 1]);
00819                 int plen;
00820                 int spaceLeft;
00821 
00822                 if(p==NULL) p= temp, *p=0;      //last filter
00823                 else p--, *p=',';               //not last filter
00824 
00825                 plen= strlen(p);
00826                 spaceLeft= p - temp + plen;
00827                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00828                     ppMode->error++;
00829                     break;
00830                 }
00831                 memmove(p + newlen, p, plen+1);
00832                 memcpy(p, replaceTable[2*i + 1], newlen);
00833                 filterNameOk=1;
00834             }
00835         }
00836 
00837         for(i=0; filters[i].shortName!=NULL; i++){
00838             if(   !strcmp(filters[i].longName, filterName)
00839                || !strcmp(filters[i].shortName, filterName)){
00840                 ppMode->lumMode &= ~filters[i].mask;
00841                 ppMode->chromMode &= ~filters[i].mask;
00842 
00843                 filterNameOk=1;
00844                 if(!enable) break; // user wants to disable it
00845 
00846                 if(q >= filters[i].minLumQuality && luma)
00847                     ppMode->lumMode|= filters[i].mask;
00848                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00849                     if(q >= filters[i].minChromQuality)
00850                             ppMode->chromMode|= filters[i].mask;
00851 
00852                 if(filters[i].mask == LEVEL_FIX){
00853                     int o;
00854                     ppMode->minAllowedY= 16;
00855                     ppMode->maxAllowedY= 234;
00856                     for(o=0; options[o]!=NULL; o++){
00857                         if(  !strcmp(options[o],"fullyrange")
00858                            ||!strcmp(options[o],"f")){
00859                             ppMode->minAllowedY= 0;
00860                             ppMode->maxAllowedY= 255;
00861                             numOfUnknownOptions--;
00862                         }
00863                     }
00864                 }
00865                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00866                 {
00867                     int o;
00868                     int numOfNoises=0;
00869 
00870                     for(o=0; options[o]!=NULL; o++){
00871                         char *tail;
00872                         ppMode->maxTmpNoise[numOfNoises]=
00873                             strtol(options[o], &tail, 0);
00874                         if(tail!=options[o]){
00875                             numOfNoises++;
00876                             numOfUnknownOptions--;
00877                             if(numOfNoises >= 3) break;
00878                         }
00879                     }
00880                 }
00881                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00882                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00883                     int o;
00884 
00885                     for(o=0; options[o]!=NULL && o<2; o++){
00886                         char *tail;
00887                         int val= strtol(options[o], &tail, 0);
00888                         if(tail==options[o]) break;
00889 
00890                         numOfUnknownOptions--;
00891                         if(o==0) ppMode->baseDcDiff= val;
00892                         else ppMode->flatnessThreshold= val;
00893                     }
00894                 }
00895                 else if(filters[i].mask == FORCE_QUANT){
00896                     int o;
00897                     ppMode->forcedQuant= 15;
00898 
00899                     for(o=0; options[o]!=NULL && o<1; o++){
00900                         char *tail;
00901                         int val= strtol(options[o], &tail, 0);
00902                         if(tail==options[o]) break;
00903 
00904                         numOfUnknownOptions--;
00905                         ppMode->forcedQuant= val;
00906                     }
00907                 }
00908             }
00909         }
00910         if(!filterNameOk) ppMode->error++;
00911         ppMode->error += numOfUnknownOptions;
00912     }
00913 
00914     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00915     if(ppMode->error){
00916         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00917         av_free(ppMode);
00918         return NULL;
00919     }
00920     return ppMode;
00921 }
00922 
00923 void pp_free_mode(pp_mode *mode){
00924     av_free(mode);
00925 }
00926 
00927 static void reallocAlign(void **p, int alignment, int size){
00928     av_free(*p);
00929     *p= av_mallocz(size);
00930 }
00931 
00932 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00933     int mbWidth = (width+15)>>4;
00934     int mbHeight= (height+15)>>4;
00935     int i;
00936 
00937     c->stride= stride;
00938     c->qpStride= qpStride;
00939 
00940     reallocAlign((void **)&c->tempDst, 8, stride*24);
00941     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00942     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00943     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00944     for(i=0; i<256; i++)
00945             c->yHistogram[i]= width*height/64*15/256;
00946 
00947     for(i=0; i<3; i++){
00948         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
00949         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00950         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00951     }
00952 
00953     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00954     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00955     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00956     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00957 }
00958 
00959 static const char * context_to_name(void * ptr) {
00960     return "postproc";
00961 }
00962 
00963 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00964 
00965 pp_context *pp_get_context(int width, int height, int cpuCaps){
00966     PPContext *c= av_malloc(sizeof(PPContext));
00967     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00968     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00969 
00970     memset(c, 0, sizeof(PPContext));
00971     c->av_class = &av_codec_context_class;
00972     c->cpuCaps= cpuCaps;
00973     if(cpuCaps&PP_FORMAT){
00974         c->hChromaSubSample= cpuCaps&0x3;
00975         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00976     }else{
00977         c->hChromaSubSample= 1;
00978         c->vChromaSubSample= 1;
00979     }
00980 
00981     reallocBuffers(c, width, height, stride, qpStride);
00982 
00983     c->frameNum=-1;
00984 
00985     return c;
00986 }
00987 
00988 void pp_free_context(void *vc){
00989     PPContext *c = (PPContext*)vc;
00990     int i;
00991 
00992     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00993     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00994 
00995     av_free(c->tempBlocks);
00996     av_free(c->yHistogram);
00997     av_free(c->tempDst);
00998     av_free(c->tempSrc);
00999     av_free(c->deintTemp);
01000     av_free(c->stdQPTable);
01001     av_free(c->nonBQPTable);
01002     av_free(c->forcedQPTable);
01003 
01004     memset(c, 0, sizeof(PPContext));
01005 
01006     av_free(c);
01007 }
01008 
01009 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
01010                      uint8_t * dst[3], const int dstStride[3],
01011                      int width, int height,
01012                      const QP_STORE_T *QP_store,  int QPStride,
01013                      pp_mode *vm,  void *vc, int pict_type)
01014 {
01015     int mbWidth = (width+15)>>4;
01016     int mbHeight= (height+15)>>4;
01017     PPMode *mode = (PPMode*)vm;
01018     PPContext *c = (PPContext*)vc;
01019     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01020     int absQPStride = FFABS(QPStride);
01021 
01022     // c->stride and c->QPStride are always positive
01023     if(c->stride < minStride || c->qpStride < absQPStride)
01024         reallocBuffers(c, width, height,
01025                        FFMAX(minStride, c->stride),
01026                        FFMAX(c->qpStride, absQPStride));
01027 
01028     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01029         int i;
01030         QP_store= c->forcedQPTable;
01031         absQPStride = QPStride = 0;
01032         if(mode->lumMode & FORCE_QUANT)
01033             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01034         else
01035             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01036     }
01037 
01038     if(pict_type & PP_PICT_TYPE_QP2){
01039         int i;
01040         const int count= mbHeight * absQPStride;
01041         for(i=0; i<(count>>2); i++){
01042             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01043         }
01044         for(i<<=2; i<count; i++){
01045             c->stdQPTable[i] = QP_store[i]>>1;
01046         }
01047         QP_store= c->stdQPTable;
01048         QPStride= absQPStride;
01049     }
01050 
01051     if(0){
01052         int x,y;
01053         for(y=0; y<mbHeight; y++){
01054             for(x=0; x<mbWidth; x++){
01055                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01056             }
01057             av_log(c, AV_LOG_INFO, "\n");
01058         }
01059         av_log(c, AV_LOG_INFO, "\n");
01060     }
01061 
01062     if((pict_type&7)!=3){
01063         if (QPStride >= 0){
01064             int i;
01065             const int count= mbHeight * QPStride;
01066             for(i=0; i<(count>>2); i++){
01067                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01068             }
01069             for(i<<=2; i<count; i++){
01070                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01071             }
01072         } else {
01073             int i,j;
01074             for(i=0; i<mbHeight; i++) {
01075                 for(j=0; j<absQPStride; j++) {
01076                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01077                 }
01078             }
01079         }
01080     }
01081 
01082     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01083            mode->lumMode, mode->chromMode);
01084 
01085     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01086                 width, height, QP_store, QPStride, 0, mode, c);
01087 
01088     width  = (width )>>c->hChromaSubSample;
01089     height = (height)>>c->vChromaSubSample;
01090 
01091     if(mode->chromMode){
01092         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01093                     width, height, QP_store, QPStride, 1, mode, c);
01094         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01095                     width, height, QP_store, QPStride, 2, mode, c);
01096     }
01097     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01098         linecpy(dst[1], src[1], height, srcStride[1]);
01099         linecpy(dst[2], src[2], height, srcStride[2]);
01100     }else{
01101         int y;
01102         for(y=0; y<height; y++){
01103             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01104             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01105         }
01106     }
01107 }
01108