Libav 0.7.1
libavcodec/h264_loopfilter.c
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... loop filter
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/intreadwrite.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "mathops.h"
00035 #include "rectangle.h"
00036 
00037 //#undef NDEBUG
00038 #include <assert.h>
00039 
00040 /* Deblocking filter (p153) */
00041 static const uint8_t alpha_table[52*3] = {
00042      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00043      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00044      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00045      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00046      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00047      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
00048      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
00049     25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
00050     80, 90,101,113,127,144,162,182,203,226,
00051    255,255,
00052    255,255,255,255,255,255,255,255,255,255,255,255,255,
00053    255,255,255,255,255,255,255,255,255,255,255,255,255,
00054    255,255,255,255,255,255,255,255,255,255,255,255,255,
00055    255,255,255,255,255,255,255,255,255,255,255,255,255,
00056 };
00057 static const uint8_t beta_table[52*3] = {
00058      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00059      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00060      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00061      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00062      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00063      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
00064      3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
00065      8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
00066     13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
00067     18, 18,
00068     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
00069     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
00070     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
00071     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
00072 };
00073 static const uint8_t tc0_table[52*3][4] = {
00074     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00075     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00076     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00077     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00078     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00079     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00080     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00081     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00082     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00083     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00084     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
00085     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
00086     {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
00087     {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
00088     {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
00089     {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
00090     {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
00091     {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
00092     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00093     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00094     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00095     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00096     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00097     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00098     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00099     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00100     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
00101 };
00102 
00103 static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) {
00104     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00105     const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00106     const int alpha = alpha_table[index_a];
00107     const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00108     if (alpha ==0 || beta == 0) return;
00109 
00110     if( bS[0] < 4 ) {
00111         int8_t tc[4];
00112         tc[0] = tc0_table[index_a][bS[0]];
00113         tc[1] = tc0_table[index_a][bS[1]];
00114         tc[2] = tc0_table[index_a][bS[2]];
00115         tc[3] = tc0_table[index_a][bS[3]];
00116         h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
00117     } else {
00118         h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
00119     }
00120 }
00121 static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
00122     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00123     const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00124     const int alpha = alpha_table[index_a];
00125     const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00126     if (alpha ==0 || beta == 0) return;
00127 
00128     if( bS[0] < 4 ) {
00129         int8_t tc[4];
00130         tc[0] = tc0_table[index_a][bS[0]]+1;
00131         tc[1] = tc0_table[index_a][bS[1]]+1;
00132         tc[2] = tc0_table[index_a][bS[2]]+1;
00133         tc[3] = tc0_table[index_a][bS[3]]+1;
00134         h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
00135     } else {
00136         h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
00137     }
00138 }
00139 
00140 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
00141     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00142     int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00143     int alpha = alpha_table[index_a];
00144     int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00145     if (alpha ==0 || beta == 0) return;
00146 
00147     if( bS[0] < 4 ) {
00148         int8_t tc[4];
00149         tc[0] = tc0_table[index_a][bS[0*bsi]];
00150         tc[1] = tc0_table[index_a][bS[1*bsi]];
00151         tc[2] = tc0_table[index_a][bS[2*bsi]];
00152         tc[3] = tc0_table[index_a][bS[3*bsi]];
00153         h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
00154     } else {
00155         h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
00156     }
00157 }
00158 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[7], int bsi, int qp ) {
00159     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00160     int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00161     int alpha = alpha_table[index_a];
00162     int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00163     if (alpha ==0 || beta == 0) return;
00164 
00165     if( bS[0] < 4 ) {
00166         int8_t tc[4];
00167         tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
00168         tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
00169         tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
00170         tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
00171         h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
00172     } else {
00173         h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
00174     }
00175 }
00176 
00177 static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
00178     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00179     const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00180     const int alpha = alpha_table[index_a];
00181     const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00182     if (alpha ==0 || beta == 0) return;
00183 
00184     if( bS[0] < 4 ) {
00185         int8_t tc[4];
00186         tc[0] = tc0_table[index_a][bS[0]];
00187         tc[1] = tc0_table[index_a][bS[1]];
00188         tc[2] = tc0_table[index_a][bS[2]];
00189         tc[3] = tc0_table[index_a][bS[3]];
00190         h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
00191     } else {
00192         h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
00193     }
00194 }
00195 
00196 static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) {
00197     const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
00198     const unsigned int index_a = qp - qp_bd_offset + h->slice_alpha_c0_offset;
00199     const int alpha = alpha_table[index_a];
00200     const int beta  = beta_table[qp - qp_bd_offset + h->slice_beta_offset];
00201     if (alpha ==0 || beta == 0) return;
00202 
00203     if( bS[0] < 4 ) {
00204         int8_t tc[4];
00205         tc[0] = tc0_table[index_a][bS[0]]+1;
00206         tc[1] = tc0_table[index_a][bS[1]]+1;
00207         tc[2] = tc0_table[index_a][bS[2]]+1;
00208         tc[3] = tc0_table[index_a][bS[3]]+1;
00209         h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
00210     } else {
00211         h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
00212     }
00213 }
00214 
00215 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
00216     MpegEncContext * const s = &h->s;
00217     int mb_xy;
00218     int mb_type, left_type;
00219     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
00220     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
00221 
00222     mb_xy = h->mb_xy;
00223 
00224     if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) {
00225         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
00226         return;
00227     }
00228     assert(!FRAME_MBAFF);
00229     left_type= h->left_type[0];
00230 
00231     mb_type = s->current_picture.mb_type[mb_xy];
00232     qp = s->current_picture.qscale_table[mb_xy];
00233     qp0 = s->current_picture.qscale_table[mb_xy-1];
00234     qp1 = s->current_picture.qscale_table[h->top_mb_xy];
00235     qpc = get_chroma_qp( h, 0, qp );
00236     qpc0 = get_chroma_qp( h, 0, qp0 );
00237     qpc1 = get_chroma_qp( h, 0, qp1 );
00238     qp0 = (qp + qp0 + 1) >> 1;
00239     qp1 = (qp + qp1 + 1) >> 1;
00240     qpc0 = (qpc + qpc0 + 1) >> 1;
00241     qpc1 = (qpc + qpc1 + 1) >> 1;
00242     qp_thresh = 15+52 - h->slice_alpha_c0_offset;
00243     if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
00244        qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
00245         return;
00246 
00247     if( IS_INTRA(mb_type) ) {
00248         int16_t bS4[4] = {4,4,4,4};
00249         int16_t bS3[4] = {3,3,3,3};
00250         int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
00251         if(left_type)
00252             filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
00253         if( IS_8x8DCT(mb_type) ) {
00254             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
00255             filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
00256             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
00257         } else {
00258             filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
00259             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
00260             filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
00261             filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
00262             filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
00263             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
00264             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
00265         }
00266         if(chroma){
00267             if(left_type){
00268                 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
00269                 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
00270             }
00271             filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
00272             filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
00273             filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
00274             filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
00275             filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
00276             filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
00277         }
00278         return;
00279     } else {
00280         LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]);
00281         int edges;
00282         if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
00283             edges = 4;
00284             AV_WN64A(bS[0][0], 0x0002000200020002ULL);
00285             AV_WN64A(bS[0][2], 0x0002000200020002ULL);
00286             AV_WN64A(bS[1][0], 0x0002000200020002ULL);
00287             AV_WN64A(bS[1][2], 0x0002000200020002ULL);
00288         } else {
00289             int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
00290             int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
00291             int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
00292             edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
00293             h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
00294                                               h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
00295         }
00296         if( IS_INTRA(left_type) )
00297             AV_WN64A(bS[0][0], 0x0004000400040004ULL);
00298         if( IS_INTRA(h->top_type) )
00299             AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
00300 
00301 #define FILTER(hv,dir,edge)\
00302         if(AV_RN64A(bS[dir][edge])) {                                   \
00303             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
00304             if(chroma && !(edge&1)) {\
00305                 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
00306                 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
00307             }\
00308         }
00309         if(left_type)
00310             FILTER(v,0,0);
00311         if( edges == 1 ) {
00312             FILTER(h,1,0);
00313         } else if( IS_8x8DCT(mb_type) ) {
00314             FILTER(v,0,2);
00315             FILTER(h,1,0);
00316             FILTER(h,1,2);
00317         } else {
00318             FILTER(v,0,1);
00319             FILTER(v,0,2);
00320             FILTER(v,0,3);
00321             FILTER(h,1,0);
00322             FILTER(h,1,1);
00323             FILTER(h,1,2);
00324             FILTER(h,1,3);
00325         }
00326 #undef FILTER
00327     }
00328 }
00329 
00330 static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
00331     int v;
00332 
00333     v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx];
00334     if(!v && h->ref_cache[0][b_idx]!=-1)
00335         v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
00336            FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
00337 
00338     if(h->list_count==2){
00339         if(!v)
00340             v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] |
00341                 h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
00342                 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit;
00343 
00344         if(v){
00345             if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] |
00346                h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx])
00347                 return 1;
00348             return
00349                 h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
00350                 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit |
00351                 h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
00352                 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
00353         }
00354     }
00355 
00356     return v;
00357 }
00358 
00359 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int chroma, int chroma444, int dir) {
00360     MpegEncContext * const s = &h->s;
00361     int edge;
00362     int chroma_qp_avg[2];
00363     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
00364     const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
00365 
00366     // how often to recheck mv-based bS when iterating between edges
00367     static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
00368                                               {0,3,1,1,3,3,3,3}};
00369     const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7];
00370     const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4;
00371 
00372     // how often to recheck mv-based bS when iterating along each edge
00373     const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
00374 
00375     if(mbm_type && !first_vertical_edge_done){
00376 
00377         if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0)
00378             && IS_INTERLACED(mbm_type&~mb_type)
00379             ) {
00380             // This is a special case in the norm where the filtering must
00381             // be done twice (one each of the field) even if we are in a
00382             // frame macroblock.
00383             //
00384             unsigned int tmp_linesize   = 2 *   linesize;
00385             unsigned int tmp_uvlinesize = 2 * uvlinesize;
00386             int mbn_xy = mb_xy - 2 * s->mb_stride;
00387             int j;
00388 
00389             for(j=0; j<2; j++, mbn_xy += s->mb_stride){
00390                 DECLARE_ALIGNED(8, int16_t, bS)[4];
00391                 int qp;
00392                 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) {
00393                     AV_WN64A(bS, 0x0003000300030003ULL);
00394                 } else {
00395                     if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){
00396                         bS[0]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+0]);
00397                         bS[1]= 1+((h->cbp_table[mbn_xy] & 0x4000)||h->non_zero_count_cache[scan8[0]+1]);
00398                         bS[2]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+2]);
00399                         bS[3]= 1+((h->cbp_table[mbn_xy] & 0x8000)||h->non_zero_count_cache[scan8[0]+3]);
00400                     }else{
00401                     const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 3*4;
00402                     int i;
00403                     for( i = 0; i < 4; i++ ) {
00404                         bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]);
00405                     }
00406                     }
00407                 }
00408                 // Do not use s->qscale as luma quantizer because it has not the same
00409                 // value in IPCM macroblocks.
00410                 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
00411                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
00412                 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
00413                 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h );
00414                 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
00415                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
00416                 if (chroma) {
00417                     if (chroma444) {
00418                         filter_mb_edgeh (&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
00419                         filter_mb_edgeh (&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
00420                     } else {
00421                         filter_mb_edgech(&img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[0], h);
00422                         filter_mb_edgech(&img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp_avg[1], h);
00423                     }
00424                 }
00425             }
00426         }else{
00427             DECLARE_ALIGNED(8, int16_t, bS)[4];
00428             int qp;
00429 
00430             if( IS_INTRA(mb_type|mbm_type)) {
00431                 AV_WN64A(bS, 0x0003000300030003ULL);
00432                 if (   (!IS_INTERLACED(mb_type|mbm_type))
00433                     || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
00434                 )
00435                     AV_WN64A(bS, 0x0004000400040004ULL);
00436             } else {
00437                 int i;
00438                 int mv_done;
00439 
00440                 if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) {
00441                     AV_WN64A(bS, 0x0001000100010001ULL);
00442                     mv_done = 1;
00443                 }
00444                 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
00445                     int b_idx= 8 + 4;
00446                     int bn_idx= b_idx - (dir ? 8:1);
00447 
00448                     bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit);
00449                     mv_done = 1;
00450                 }
00451                 else
00452                     mv_done = 0;
00453 
00454                 for( i = 0; i < 4; i++ ) {
00455                     int x = dir == 0 ? 0 : i;
00456                     int y = dir == 0 ? i    : 0;
00457                     int b_idx= 8 + 4 + x + 8*y;
00458                     int bn_idx= b_idx - (dir ? 8:1);
00459 
00460                     if( h->non_zero_count_cache[b_idx] |
00461                         h->non_zero_count_cache[bn_idx] ) {
00462                         bS[i] = 2;
00463                     }
00464                     else if(!mv_done)
00465                     {
00466                         bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
00467                     }
00468                 }
00469             }
00470 
00471             /* Filter edge */
00472             // Do not use s->qscale as luma quantizer because it has not the same
00473             // value in IPCM macroblocks.
00474             if(bS[0]+bS[1]+bS[2]+bS[3]){
00475                 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1;
00476                 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
00477                 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
00478                 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
00479                 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
00480                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1;
00481                 if( dir == 0 ) {
00482                     filter_mb_edgev( &img_y[0], linesize, bS, qp, h );
00483                     if (chroma) {
00484                         if (chroma444) {
00485                             filter_mb_edgev ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
00486                             filter_mb_edgev ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
00487                         } else {
00488                             filter_mb_edgecv( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
00489                             filter_mb_edgecv( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
00490                         }
00491                     }
00492                 } else {
00493                     filter_mb_edgeh( &img_y[0], linesize, bS, qp, h );
00494                     if (chroma) {
00495                         if (chroma444) {
00496                             filter_mb_edgeh ( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
00497                             filter_mb_edgeh ( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
00498                         } else {
00499                             filter_mb_edgech( &img_cb[0], uvlinesize, bS, chroma_qp_avg[0], h);
00500                             filter_mb_edgech( &img_cr[0], uvlinesize, bS, chroma_qp_avg[1], h);
00501                         }
00502                     }
00503                 }
00504             }
00505         }
00506     }
00507 
00508     /* Calculate bS */
00509     for( edge = 1; edge < edges; edge++ ) {
00510         DECLARE_ALIGNED(8, int16_t, bS)[4];
00511         int qp;
00512 
00513         if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type)
00514             continue;
00515 
00516         if( IS_INTRA(mb_type)) {
00517             AV_WN64A(bS, 0x0003000300030003ULL);
00518         } else {
00519             int i;
00520             int mv_done;
00521 
00522             if( edge & mask_edge ) {
00523                 AV_ZERO64(bS);
00524                 mv_done = 1;
00525             }
00526             else if( mask_par0 ) {
00527                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
00528                 int bn_idx= b_idx - (dir ? 8:1);
00529 
00530                 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit);
00531                 mv_done = 1;
00532             }
00533             else
00534                 mv_done = 0;
00535 
00536             for( i = 0; i < 4; i++ ) {
00537                 int x = dir == 0 ? edge : i;
00538                 int y = dir == 0 ? i    : edge;
00539                 int b_idx= 8 + 4 + x + 8*y;
00540                 int bn_idx= b_idx - (dir ? 8:1);
00541 
00542                 if( h->non_zero_count_cache[b_idx] |
00543                     h->non_zero_count_cache[bn_idx] ) {
00544                     bS[i] = 2;
00545                 }
00546                 else if(!mv_done)
00547                 {
00548                     bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
00549                 }
00550             }
00551 
00552             if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
00553                 continue;
00554         }
00555 
00556         /* Filter edge */
00557         // Do not use s->qscale as luma quantizer because it has not the same
00558         // value in IPCM macroblocks.
00559         qp = s->current_picture.qscale_table[mb_xy];
00560         //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]);
00561         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
00562         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
00563         if( dir == 0 ) {
00564             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, h );
00565             if (chroma) {
00566                 if (chroma444) {
00567                     filter_mb_edgev ( &img_cb[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
00568                     filter_mb_edgev ( &img_cr[4*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
00569                 } else if( (edge&1) == 0 ) {
00570                     filter_mb_edgecv( &img_cb[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
00571                     filter_mb_edgecv( &img_cr[2*edge << h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
00572                 }
00573             }
00574         } else {
00575             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
00576             if (chroma) {
00577                 if (chroma444) {
00578                     filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
00579                     filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
00580                 } else if( (edge&1) == 0 ) {
00581                     filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h);
00582                     filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h);
00583                 }
00584             }
00585         }
00586     }
00587 }
00588 
00589 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
00590     MpegEncContext * const s = &h->s;
00591     const int mb_xy= mb_x + mb_y*s->mb_stride;
00592     const int mb_type = s->current_picture.mb_type[mb_xy];
00593     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
00594     int first_vertical_edge_done = 0;
00595     av_unused int dir;
00596     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
00597 
00598     if (FRAME_MBAFF
00599             // and current and left pair do not have the same interlaced type
00600             && IS_INTERLACED(mb_type^h->left_type[0])
00601             // and left mb is in available to us
00602             && h->left_type[0]) {
00603         /* First vertical edge is different in MBAFF frames
00604          * There are 8 different bS to compute and 2 different Qp
00605          */
00606         DECLARE_ALIGNED(8, int16_t, bS)[8];
00607         int qp[2];
00608         int bqp[2];
00609         int rqp[2];
00610         int mb_qp, mbn0_qp, mbn1_qp;
00611         int i;
00612         first_vertical_edge_done = 1;
00613 
00614         if( IS_INTRA(mb_type) ) {
00615             AV_WN64A(&bS[0], 0x0004000400040004ULL);
00616             AV_WN64A(&bS[4], 0x0004000400040004ULL);
00617         } else {
00618             static const uint8_t offset[2][2][8]={
00619                 {
00620                     {3+4*0, 3+4*0, 3+4*0, 3+4*0, 3+4*1, 3+4*1, 3+4*1, 3+4*1},
00621                     {3+4*2, 3+4*2, 3+4*2, 3+4*2, 3+4*3, 3+4*3, 3+4*3, 3+4*3},
00622                 },{
00623                     {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
00624                     {3+4*0, 3+4*1, 3+4*2, 3+4*3, 3+4*0, 3+4*1, 3+4*2, 3+4*3},
00625                 }
00626             };
00627             const uint8_t *off= offset[MB_FIELD][mb_y&1];
00628             for( i = 0; i < 8; i++ ) {
00629                 int j= MB_FIELD ? i>>2 : i&1;
00630                 int mbn_xy = h->left_mb_xy[j];
00631                 int mbn_type= h->left_type[j];
00632 
00633                 if( IS_INTRA( mbn_type ) )
00634                     bS[i] = 4;
00635                 else{
00636                     bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] |
00637                          ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ?
00638                             (h->cbp_table[mbn_xy] & (((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2) << 12))
00639                                                                        :
00640                             h->non_zero_count[mbn_xy][ off[i] ]));
00641                 }
00642             }
00643         }
00644 
00645         mb_qp = s->current_picture.qscale_table[mb_xy];
00646         mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]];
00647         mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]];
00648         qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
00649         bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
00650                    get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
00651         rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
00652                    get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
00653         qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
00654         bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
00655                    get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
00656         rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
00657                    get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
00658 
00659         /* Filter edge */
00660         tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
00661         { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
00662         if(MB_FIELD){
00663             filter_mb_mbaff_edgev ( h, img_y                ,   linesize, bS  , 1, qp [0] );
00664             filter_mb_mbaff_edgev ( h, img_y  + 8*  linesize,   linesize, bS+4, 1, qp [1] );
00665             if (chroma){
00666                 if (CHROMA444) {
00667                     filter_mb_mbaff_edgev ( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
00668                     filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
00669                     filter_mb_mbaff_edgev ( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
00670                     filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
00671                 }else{
00672                     filter_mb_mbaff_edgecv( h, img_cb,                uvlinesize, bS  , 1, bqp[0] );
00673                     filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] );
00674                     filter_mb_mbaff_edgecv( h, img_cr,                uvlinesize, bS  , 1, rqp[0] );
00675                     filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] );
00676                 }
00677             }
00678         }else{
00679             filter_mb_mbaff_edgev ( h, img_y              , 2*  linesize, bS  , 2, qp [0] );
00680             filter_mb_mbaff_edgev ( h, img_y  +   linesize, 2*  linesize, bS+1, 2, qp [1] );
00681             if (chroma){
00682                 if (CHROMA444) {
00683                     filter_mb_mbaff_edgev ( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
00684                     filter_mb_mbaff_edgev ( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
00685                     filter_mb_mbaff_edgev ( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
00686                     filter_mb_mbaff_edgev ( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
00687                 }else{
00688                     filter_mb_mbaff_edgecv( h, img_cb,              2*uvlinesize, bS  , 2, bqp[0] );
00689                     filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] );
00690                     filter_mb_mbaff_edgecv( h, img_cr,              2*uvlinesize, bS  , 2, rqp[0] );
00691                     filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] );
00692                 }
00693             }
00694         }
00695     }
00696 
00697 #if CONFIG_SMALL
00698     for( dir = 0; dir < 2; dir++ )
00699         filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, chroma, CHROMA444, dir);
00700 #else
00701     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, chroma, CHROMA444, 0);
00702     filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, chroma, CHROMA444, 1);
00703 #endif
00704 }