Libav 0.7.1
libpostproc/postprocess.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of Libav.
00007  *
00008  * Libav is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * Libav is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with Libav; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 //#undef HAVE_MMX2
00083 //#define HAVE_AMD3DNOW
00084 //#undef HAVE_MMX
00085 //#undef ARCH_X86
00086 //#define DEBUG_BRIGHTNESS
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090 
00091 unsigned postproc_version(void)
00092 {
00093     return LIBPOSTPROC_VERSION_INT;
00094 }
00095 
00096 const char *postproc_configuration(void)
00097 {
00098     return LIBAV_CONFIGURATION;
00099 }
00100 
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104     return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106 
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110 
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00116 
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127 
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129 
00130 
00131 static struct PPFilter filters[]=
00132 {
00133     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00134     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00135 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00136     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00137     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00138     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00139     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00140     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00141     {"dr", "dering",                1, 5, 6, DERING},
00142     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00143     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00147     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00148     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00149     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00150     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00151     {NULL, NULL,0,0,0,0} //End Marker
00152 };
00153 
00154 static const char *replaceTable[]=
00155 {
00156     "default",      "hb:a,vb:a,dr:a",
00157     "de",           "hb:a,vb:a,dr:a",
00158     "fast",         "h1:a,v1:a,dr:a",
00159     "fa",           "h1:a,v1:a,dr:a",
00160     "ac",           "ha:a:128:7,va:a,dr:a",
00161     NULL //End Marker
00162 };
00163 
00164 
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168     __asm__ volatile(   "prefetchnta (%0)\n\t"
00169         : : "r" (p)
00170     );
00171 }
00172 
00173 static inline void prefetcht0(void *p)
00174 {
00175     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00176         : : "r" (p)
00177     );
00178 }
00179 
00180 static inline void prefetcht1(void *p)
00181 {
00182     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00183         : : "r" (p)
00184     );
00185 }
00186 
00187 static inline void prefetcht2(void *p)
00188 {
00189     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00190         : : "r" (p)
00191     );
00192 }
00193 #endif
00194 
00195 /* The horizontal functions exist only in C because the MMX
00196  * code is faster with vertical filters and transposing. */
00197 
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203     int numEq= 0;
00204     int y;
00205     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206     const int dcThreshold= dcOffset*2 + 1;
00207 
00208     for(y=0; y<BLOCK_SIZE; y++){
00209         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216         src+= stride;
00217     }
00218     return numEq > c->ppMode.flatnessThreshold;
00219 }
00220 
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226     int numEq= 0;
00227     int y;
00228     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229     const int dcThreshold= dcOffset*2 + 1;
00230 
00231     src+= stride*4; // src points to begin of the 8x8 Block
00232     for(y=0; y<BLOCK_SIZE-1; y++){
00233         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241         src+= stride;
00242     }
00243     return numEq > c->ppMode.flatnessThreshold;
00244 }
00245 
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248     int i;
00249 #if 1
00250     for(i=0; i<2; i++){
00251         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00252         src += stride;
00253         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259     }
00260 #else
00261     for(i=0; i<8; i++){
00262         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00263         src += stride;
00264     }
00265 #endif
00266     return 1;
00267 }
00268 
00269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00270 {
00271 #if 1
00272 #if 1
00273     int x;
00274     src+= stride*4;
00275     for(x=0; x<BLOCK_SIZE; x+=4){
00276         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00277         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00278         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00279         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00280     }
00281 #else
00282     int x;
00283     src+= stride*3;
00284     for(x=0; x<BLOCK_SIZE; x++){
00285         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00286     }
00287 #endif
00288     return 1;
00289 #else
00290     int x;
00291     src+= stride*4;
00292     for(x=0; x<BLOCK_SIZE; x++){
00293         int min=255;
00294         int max=0;
00295         int y;
00296         for(y=0; y<8; y++){
00297             int v= src[x + y*stride];
00298             if(v>max) max=v;
00299             if(v<min) min=v;
00300         }
00301         if(max-min > 2*QP) return 0;
00302     }
00303     return 1;
00304 #endif
00305 }
00306 
00307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309     if( isHorizDC_C(src, stride, c) ){
00310         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00311             return 1;
00312         else
00313             return 0;
00314     }else{
00315         return 2;
00316     }
00317 }
00318 
00319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00320 {
00321     if( isVertDC_C(src, stride, c) ){
00322         if( isVertMinMaxOk_C(src, stride, c->QP) )
00323             return 1;
00324         else
00325             return 0;
00326     }else{
00327         return 2;
00328     }
00329 }
00330 
00331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00332 {
00333     int y;
00334     for(y=0; y<BLOCK_SIZE; y++){
00335         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00336 
00337         if(FFABS(middleEnergy) < 8*c->QP){
00338             const int q=(dst[3] - dst[4])/2;
00339             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00340             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00341 
00342             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00343             d= FFMAX(d, 0);
00344 
00345             d= (5*d + 32) >> 6;
00346             d*= FFSIGN(-middleEnergy);
00347 
00348             if(q>0)
00349             {
00350                 d= d<0 ? 0 : d;
00351                 d= d>q ? q : d;
00352             }
00353             else
00354             {
00355                 d= d>0 ? 0 : d;
00356                 d= d<q ? q : d;
00357             }
00358 
00359             dst[3]-= d;
00360             dst[4]+= d;
00361         }
00362         dst+= stride;
00363     }
00364 }
00365 
00370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00371 {
00372     int y;
00373     for(y=0; y<BLOCK_SIZE; y++){
00374         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00375         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00376 
00377         int sums[10];
00378         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00379         sums[1] = sums[0] - first  + dst[3];
00380         sums[2] = sums[1] - first  + dst[4];
00381         sums[3] = sums[2] - first  + dst[5];
00382         sums[4] = sums[3] - first  + dst[6];
00383         sums[5] = sums[4] - dst[0] + dst[7];
00384         sums[6] = sums[5] - dst[1] + last;
00385         sums[7] = sums[6] - dst[2] + last;
00386         sums[8] = sums[7] - dst[3] + last;
00387         sums[9] = sums[8] - dst[4] + last;
00388 
00389         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00390         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00391         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00392         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00393         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00394         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00395         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00396         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00397 
00398         dst+= stride;
00399     }
00400 }
00401 
00410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00411 {
00412     int y;
00413     static uint64_t *lut= NULL;
00414     if(lut==NULL)
00415     {
00416         int i;
00417         lut = av_malloc(256*8);
00418         for(i=0; i<256; i++)
00419         {
00420             int v= i < 128 ? 2*i : 2*(i-256);
00421 /*
00422 //Simulate 112242211 9-Tap filter
00423             uint64_t a= (v/16)  & 0xFF;
00424             uint64_t b= (v/8)   & 0xFF;
00425             uint64_t c= (v/4)   & 0xFF;
00426             uint64_t d= (3*v/8) & 0xFF;
00427 */
00428 //Simulate piecewise linear interpolation
00429             uint64_t a= (v/16)   & 0xFF;
00430             uint64_t b= (v*3/16) & 0xFF;
00431             uint64_t c= (v*5/16) & 0xFF;
00432             uint64_t d= (7*v/16) & 0xFF;
00433             uint64_t A= (0x100 - a)&0xFF;
00434             uint64_t B= (0x100 - b)&0xFF;
00435             uint64_t C= (0x100 - c)&0xFF;
00436             uint64_t D= (0x100 - c)&0xFF;
00437 
00438             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00439                        (D<<24) | (C<<16) | (B<<8)  | (A);
00440             //lut[i] = (v<<32) | (v<<24);
00441         }
00442     }
00443 
00444     for(y=0; y<BLOCK_SIZE; y++){
00445         int a= src[1] - src[2];
00446         int b= src[3] - src[4];
00447         int c= src[5] - src[6];
00448 
00449         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00450 
00451         if(d < QP){
00452             int v = d * FFSIGN(-b);
00453 
00454             src[1] +=v/8;
00455             src[2] +=v/4;
00456             src[3] +=3*v/8;
00457             src[4] -=3*v/8;
00458             src[5] -=v/4;
00459             src[6] -=v/8;
00460         }
00461         src+=stride;
00462     }
00463 }
00464 
00468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00469     int y;
00470     const int QP= c->QP;
00471     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00472     const int dcThreshold= dcOffset*2 + 1;
00473 //START_TIMER
00474     src+= step*4; // src points to begin of the 8x8 Block
00475     for(y=0; y<8; y++){
00476         int numEq= 0;
00477 
00478         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00479         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00480         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00481         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00482         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00483         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00484         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00485         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00486         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00487         if(numEq > c->ppMode.flatnessThreshold){
00488             int min, max, x;
00489 
00490             if(src[0] > src[step]){
00491                 max= src[0];
00492                 min= src[step];
00493             }else{
00494                 max= src[step];
00495                 min= src[0];
00496             }
00497             for(x=2; x<8; x+=2){
00498                 if(src[x*step] > src[(x+1)*step]){
00499                         if(src[x    *step] > max) max= src[ x   *step];
00500                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00501                 }else{
00502                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00503                         if(src[ x   *step] < min) min= src[ x   *step];
00504                 }
00505             }
00506             if(max-min < 2*QP){
00507                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00508                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00509 
00510                 int sums[10];
00511                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00512                 sums[1] = sums[0] - first       + src[3*step];
00513                 sums[2] = sums[1] - first       + src[4*step];
00514                 sums[3] = sums[2] - first       + src[5*step];
00515                 sums[4] = sums[3] - first       + src[6*step];
00516                 sums[5] = sums[4] - src[0*step] + src[7*step];
00517                 sums[6] = sums[5] - src[1*step] + last;
00518                 sums[7] = sums[6] - src[2*step] + last;
00519                 sums[8] = sums[7] - src[3*step] + last;
00520                 sums[9] = sums[8] - src[4*step] + last;
00521 
00522                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00523                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00524                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00525                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00526                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00527                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00528                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00529                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00530             }
00531         }else{
00532             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00533 
00534             if(FFABS(middleEnergy) < 8*QP){
00535                 const int q=(src[3*step] - src[4*step])/2;
00536                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00537                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00538 
00539                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00540                 d= FFMAX(d, 0);
00541 
00542                 d= (5*d + 32) >> 6;
00543                 d*= FFSIGN(-middleEnergy);
00544 
00545                 if(q>0){
00546                     d= d<0 ? 0 : d;
00547                     d= d>q ? q : d;
00548                 }else{
00549                     d= d>0 ? 0 : d;
00550                     d= d<q ? q : d;
00551                 }
00552 
00553                 src[3*step]-= d;
00554                 src[4*step]+= d;
00555             }
00556         }
00557 
00558         src += stride;
00559     }
00560 /*if(step==16){
00561     STOP_TIMER("step16")
00562 }else{
00563     STOP_TIMER("stepX")
00564 }*/
00565 }
00566 
00567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00568 //Plain C versions
00569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00570 #define COMPILE_C
00571 #endif
00572 
00573 #if HAVE_ALTIVEC
00574 #define COMPILE_ALTIVEC
00575 #endif //HAVE_ALTIVEC
00576 
00577 #if ARCH_X86
00578 
00579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00580 #define COMPILE_MMX
00581 #endif
00582 
00583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00584 #define COMPILE_MMX2
00585 #endif
00586 
00587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00588 #define COMPILE_3DNOW
00589 #endif
00590 #endif /* ARCH_X86 */
00591 
00592 #undef HAVE_MMX
00593 #define HAVE_MMX 0
00594 #undef HAVE_MMX2
00595 #define HAVE_MMX2 0
00596 #undef HAVE_AMD3DNOW
00597 #define HAVE_AMD3DNOW 0
00598 #undef HAVE_ALTIVEC
00599 #define HAVE_ALTIVEC 0
00600 
00601 #ifdef COMPILE_C
00602 #define RENAME(a) a ## _C
00603 #include "postprocess_template.c"
00604 #endif
00605 
00606 #ifdef COMPILE_ALTIVEC
00607 #undef RENAME
00608 #undef HAVE_ALTIVEC
00609 #define HAVE_ALTIVEC 1
00610 #define RENAME(a) a ## _altivec
00611 #include "postprocess_altivec_template.c"
00612 #include "postprocess_template.c"
00613 #endif
00614 
00615 //MMX versions
00616 #ifdef COMPILE_MMX
00617 #undef RENAME
00618 #undef HAVE_MMX
00619 #define HAVE_MMX 1
00620 #define RENAME(a) a ## _MMX
00621 #include "postprocess_template.c"
00622 #endif
00623 
00624 //MMX2 versions
00625 #ifdef COMPILE_MMX2
00626 #undef RENAME
00627 #undef HAVE_MMX
00628 #undef HAVE_MMX2
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 1
00631 #define RENAME(a) a ## _MMX2
00632 #include "postprocess_template.c"
00633 #endif
00634 
00635 //3DNOW versions
00636 #ifdef COMPILE_3DNOW
00637 #undef RENAME
00638 #undef HAVE_MMX
00639 #undef HAVE_MMX2
00640 #undef HAVE_AMD3DNOW
00641 #define HAVE_MMX 1
00642 #define HAVE_MMX2 0
00643 #define HAVE_AMD3DNOW 1
00644 #define RENAME(a) a ## _3DNow
00645 #include "postprocess_template.c"
00646 #endif
00647 
00648 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00649 
00650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00651         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00652 {
00653     PPContext *c= (PPContext *)vc;
00654     PPMode *ppMode= (PPMode *)vm;
00655     c->ppMode= *ppMode; //FIXME
00656 
00657     // Using ifs here as they are faster than function pointers although the
00658     // difference would not be measurable here but it is much better because
00659     // someone might exchange the CPU whithout restarting MPlayer ;)
00660 #if CONFIG_RUNTIME_CPUDETECT
00661 #if ARCH_X86
00662     // ordered per speed fastest first
00663     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00664         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00666         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00667     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00668         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00669     else
00670         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00671 #else
00672 #if HAVE_ALTIVEC
00673     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00674             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00675     else
00676 #endif
00677             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #else //CONFIG_RUNTIME_CPUDETECT
00680 #if   HAVE_MMX2
00681             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00682 #elif HAVE_AMD3DNOW
00683             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00684 #elif HAVE_MMX
00685             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00686 #elif HAVE_ALTIVEC
00687             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00688 #else
00689             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00690 #endif
00691 #endif //!CONFIG_RUNTIME_CPUDETECT
00692 }
00693 
00694 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00695 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00696 
00697 /* -pp Command line Help
00698 */
00699 const char pp_help[] =
00700 "Available postprocessing filters:\n"
00701 "Filters                        Options\n"
00702 "short  long name       short   long option     Description\n"
00703 "*      *               a       autoq           CPU power dependent enabler\n"
00704 "                       c       chrom           chrominance filtering enabled\n"
00705 "                       y       nochrom         chrominance filtering disabled\n"
00706 "                       n       noluma          luma filtering disabled\n"
00707 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00708 "       1. difference factor: default=32, higher -> more deblocking\n"
00709 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00710 "                       the h & v deblocking filters share these\n"
00711 "                       so you can't set different thresholds for h / v\n"
00712 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00713 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00714 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00715 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00716 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00717 "dr     dering                                  deringing filter\n"
00718 "al     autolevels                              automatic brightness / contrast\n"
00719 "                       f        fullyrange     stretch luminance to (0..255)\n"
00720 "lb     linblenddeint                           linear blend deinterlacer\n"
00721 "li     linipoldeint                            linear interpolating deinterlace\n"
00722 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00723 "md     mediandeint                             median deinterlacer\n"
00724 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00725 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00726 "de     default                                 hb:a,vb:a,dr:a\n"
00727 "fa     fast                                    h1:a,v1:a,dr:a\n"
00728 "ac                                             ha:a:128:7,va:a,dr:a\n"
00729 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00730 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00731 "fq     forceQuant      <quantizer>             force quantizer\n"
00732 "Usage:\n"
00733 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00734 "long form example:\n"
00735 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00736 "short form example:\n"
00737 "vb:a/hb:a/lb                                   de,-vb\n"
00738 "more examples:\n"
00739 "tn:64:128:256\n"
00740 "\n"
00741 ;
00742 
00743 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00744 {
00745     char temp[GET_MODE_BUFFER_SIZE];
00746     char *p= temp;
00747     static const char filterDelimiters[] = ",/";
00748     static const char optionDelimiters[] = ":";
00749     struct PPMode *ppMode;
00750     char *filterToken;
00751 
00752     ppMode= av_malloc(sizeof(PPMode));
00753 
00754     ppMode->lumMode= 0;
00755     ppMode->chromMode= 0;
00756     ppMode->maxTmpNoise[0]= 700;
00757     ppMode->maxTmpNoise[1]= 1500;
00758     ppMode->maxTmpNoise[2]= 3000;
00759     ppMode->maxAllowedY= 234;
00760     ppMode->minAllowedY= 16;
00761     ppMode->baseDcDiff= 256/8;
00762     ppMode->flatnessThreshold= 56-16-1;
00763     ppMode->maxClippedThreshold= 0.01;
00764     ppMode->error=0;
00765 
00766     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00767     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00768 
00769     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00770 
00771     for(;;){
00772         char *filterName;
00773         int q= 1000000; //PP_QUALITY_MAX;
00774         int chrom=-1;
00775         int luma=-1;
00776         char *option;
00777         char *options[OPTIONS_ARRAY_SIZE];
00778         int i;
00779         int filterNameOk=0;
00780         int numOfUnknownOptions=0;
00781         int enable=1; //does the user want us to enabled or disabled the filter
00782 
00783         filterToken= strtok(p, filterDelimiters);
00784         if(filterToken == NULL) break;
00785         p+= strlen(filterToken) + 1; // p points to next filterToken
00786         filterName= strtok(filterToken, optionDelimiters);
00787         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00788 
00789         if(*filterName == '-'){
00790             enable=0;
00791             filterName++;
00792         }
00793 
00794         for(;;){ //for all options
00795             option= strtok(NULL, optionDelimiters);
00796             if(option == NULL) break;
00797 
00798             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00799             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00800             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00801             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00802             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00803             else{
00804                 options[numOfUnknownOptions] = option;
00805                 numOfUnknownOptions++;
00806             }
00807             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00808         }
00809         options[numOfUnknownOptions] = NULL;
00810 
00811         /* replace stuff from the replace Table */
00812         for(i=0; replaceTable[2*i]!=NULL; i++){
00813             if(!strcmp(replaceTable[2*i], filterName)){
00814                 int newlen= strlen(replaceTable[2*i + 1]);
00815                 int plen;
00816                 int spaceLeft;
00817 
00818                 if(p==NULL) p= temp, *p=0;      //last filter
00819                 else p--, *p=',';               //not last filter
00820 
00821                 plen= strlen(p);
00822                 spaceLeft= p - temp + plen;
00823                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00824                     ppMode->error++;
00825                     break;
00826                 }
00827                 memmove(p + newlen, p, plen+1);
00828                 memcpy(p, replaceTable[2*i + 1], newlen);
00829                 filterNameOk=1;
00830             }
00831         }
00832 
00833         for(i=0; filters[i].shortName!=NULL; i++){
00834             if(   !strcmp(filters[i].longName, filterName)
00835                || !strcmp(filters[i].shortName, filterName)){
00836                 ppMode->lumMode &= ~filters[i].mask;
00837                 ppMode->chromMode &= ~filters[i].mask;
00838 
00839                 filterNameOk=1;
00840                 if(!enable) break; // user wants to disable it
00841 
00842                 if(q >= filters[i].minLumQuality && luma)
00843                     ppMode->lumMode|= filters[i].mask;
00844                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00845                     if(q >= filters[i].minChromQuality)
00846                             ppMode->chromMode|= filters[i].mask;
00847 
00848                 if(filters[i].mask == LEVEL_FIX){
00849                     int o;
00850                     ppMode->minAllowedY= 16;
00851                     ppMode->maxAllowedY= 234;
00852                     for(o=0; options[o]!=NULL; o++){
00853                         if(  !strcmp(options[o],"fullyrange")
00854                            ||!strcmp(options[o],"f")){
00855                             ppMode->minAllowedY= 0;
00856                             ppMode->maxAllowedY= 255;
00857                             numOfUnknownOptions--;
00858                         }
00859                     }
00860                 }
00861                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00862                 {
00863                     int o;
00864                     int numOfNoises=0;
00865 
00866                     for(o=0; options[o]!=NULL; o++){
00867                         char *tail;
00868                         ppMode->maxTmpNoise[numOfNoises]=
00869                             strtol(options[o], &tail, 0);
00870                         if(tail!=options[o]){
00871                             numOfNoises++;
00872                             numOfUnknownOptions--;
00873                             if(numOfNoises >= 3) break;
00874                         }
00875                     }
00876                 }
00877                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00878                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00879                     int o;
00880 
00881                     for(o=0; options[o]!=NULL && o<2; o++){
00882                         char *tail;
00883                         int val= strtol(options[o], &tail, 0);
00884                         if(tail==options[o]) break;
00885 
00886                         numOfUnknownOptions--;
00887                         if(o==0) ppMode->baseDcDiff= val;
00888                         else ppMode->flatnessThreshold= val;
00889                     }
00890                 }
00891                 else if(filters[i].mask == FORCE_QUANT){
00892                     int o;
00893                     ppMode->forcedQuant= 15;
00894 
00895                     for(o=0; options[o]!=NULL && o<1; o++){
00896                         char *tail;
00897                         int val= strtol(options[o], &tail, 0);
00898                         if(tail==options[o]) break;
00899 
00900                         numOfUnknownOptions--;
00901                         ppMode->forcedQuant= val;
00902                     }
00903                 }
00904             }
00905         }
00906         if(!filterNameOk) ppMode->error++;
00907         ppMode->error += numOfUnknownOptions;
00908     }
00909 
00910     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00911     if(ppMode->error){
00912         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00913         av_free(ppMode);
00914         return NULL;
00915     }
00916     return ppMode;
00917 }
00918 
00919 void pp_free_mode(pp_mode *mode){
00920     av_free(mode);
00921 }
00922 
00923 static void reallocAlign(void **p, int alignment, int size){
00924     av_free(*p);
00925     *p= av_mallocz(size);
00926 }
00927 
00928 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00929     int mbWidth = (width+15)>>4;
00930     int mbHeight= (height+15)>>4;
00931     int i;
00932 
00933     c->stride= stride;
00934     c->qpStride= qpStride;
00935 
00936     reallocAlign((void **)&c->tempDst, 8, stride*24);
00937     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00938     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00939     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00940     for(i=0; i<256; i++)
00941             c->yHistogram[i]= width*height/64*15/256;
00942 
00943     for(i=0; i<3; i++){
00944         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
00945         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00946         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00947     }
00948 
00949     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00950     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00951     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00952     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00953 }
00954 
00955 static const char * context_to_name(void * ptr) {
00956     return "postproc";
00957 }
00958 
00959 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00960 
00961 pp_context *pp_get_context(int width, int height, int cpuCaps){
00962     PPContext *c= av_malloc(sizeof(PPContext));
00963     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00964     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00965 
00966     memset(c, 0, sizeof(PPContext));
00967     c->av_class = &av_codec_context_class;
00968     c->cpuCaps= cpuCaps;
00969     if(cpuCaps&PP_FORMAT){
00970         c->hChromaSubSample= cpuCaps&0x3;
00971         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00972     }else{
00973         c->hChromaSubSample= 1;
00974         c->vChromaSubSample= 1;
00975     }
00976 
00977     reallocBuffers(c, width, height, stride, qpStride);
00978 
00979     c->frameNum=-1;
00980 
00981     return c;
00982 }
00983 
00984 void pp_free_context(void *vc){
00985     PPContext *c = (PPContext*)vc;
00986     int i;
00987 
00988     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00989     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00990 
00991     av_free(c->tempBlocks);
00992     av_free(c->yHistogram);
00993     av_free(c->tempDst);
00994     av_free(c->tempSrc);
00995     av_free(c->deintTemp);
00996     av_free(c->stdQPTable);
00997     av_free(c->nonBQPTable);
00998     av_free(c->forcedQPTable);
00999 
01000     memset(c, 0, sizeof(PPContext));
01001 
01002     av_free(c);
01003 }
01004 
01005 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
01006                      uint8_t * dst[3], const int dstStride[3],
01007                      int width, int height,
01008                      const QP_STORE_T *QP_store,  int QPStride,
01009                      pp_mode *vm,  void *vc, int pict_type)
01010 {
01011     int mbWidth = (width+15)>>4;
01012     int mbHeight= (height+15)>>4;
01013     PPMode *mode = (PPMode*)vm;
01014     PPContext *c = (PPContext*)vc;
01015     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01016     int absQPStride = FFABS(QPStride);
01017 
01018     // c->stride and c->QPStride are always positive
01019     if(c->stride < minStride || c->qpStride < absQPStride)
01020         reallocBuffers(c, width, height,
01021                        FFMAX(minStride, c->stride),
01022                        FFMAX(c->qpStride, absQPStride));
01023 
01024     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01025         int i;
01026         QP_store= c->forcedQPTable;
01027         absQPStride = QPStride = 0;
01028         if(mode->lumMode & FORCE_QUANT)
01029             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01030         else
01031             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01032     }
01033 
01034     if(pict_type & PP_PICT_TYPE_QP2){
01035         int i;
01036         const int count= mbHeight * absQPStride;
01037         for(i=0; i<(count>>2); i++){
01038             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01039         }
01040         for(i<<=2; i<count; i++){
01041             c->stdQPTable[i] = QP_store[i]>>1;
01042         }
01043         QP_store= c->stdQPTable;
01044         QPStride= absQPStride;
01045     }
01046 
01047     if(0){
01048         int x,y;
01049         for(y=0; y<mbHeight; y++){
01050             for(x=0; x<mbWidth; x++){
01051                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01052             }
01053             av_log(c, AV_LOG_INFO, "\n");
01054         }
01055         av_log(c, AV_LOG_INFO, "\n");
01056     }
01057 
01058     if((pict_type&7)!=3){
01059         if (QPStride >= 0){
01060             int i;
01061             const int count= mbHeight * QPStride;
01062             for(i=0; i<(count>>2); i++){
01063                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01064             }
01065             for(i<<=2; i<count; i++){
01066                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01067             }
01068         } else {
01069             int i,j;
01070             for(i=0; i<mbHeight; i++) {
01071                 for(j=0; j<absQPStride; j++) {
01072                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01073                 }
01074             }
01075         }
01076     }
01077 
01078     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01079            mode->lumMode, mode->chromMode);
01080 
01081     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01082                 width, height, QP_store, QPStride, 0, mode, c);
01083 
01084     width  = (width )>>c->hChromaSubSample;
01085     height = (height)>>c->vChromaSubSample;
01086 
01087     if(mode->chromMode){
01088         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01089                     width, height, QP_store, QPStride, 1, mode, c);
01090         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01091                     width, height, QP_store, QPStride, 2, mode, c);
01092     }
01093     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01094         linecpy(dst[1], src[1], height, srcStride[1]);
01095         linecpy(dst[2], src[2], height, srcStride[2]);
01096     }else{
01097         int y;
01098         for(y=0; y<height; y++){
01099             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01100             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01101         }
01102     }
01103 }