libavcodec/h264.c
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "cabac.h"
00031 #include "cabac_functions.h"
00032 #include "dsputil.h"
00033 #include "avcodec.h"
00034 #include "mpegvideo.h"
00035 #include "h264.h"
00036 #include "h264data.h"
00037 #include "h264_mvpred.h"
00038 #include "golomb.h"
00039 #include "mathops.h"
00040 #include "rectangle.h"
00041 #include "thread.h"
00042 #include "vdpau_internal.h"
00043 #include "libavutil/avassert.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_VDA_VLD,
00060     PIX_FMT_YUVJ420P,
00061     PIX_FMT_NONE
00062 };
00063 
00068 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00069     MpegEncContext * const s = &h->s;
00070     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00071     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00072     int i;
00073 
00074     if(!(h->top_samples_available&0x8000)){
00075         for(i=0; i<4; i++){
00076             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00077             if(status<0){
00078                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00079                 return -1;
00080             } else if(status){
00081                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00082             }
00083         }
00084     }
00085 
00086     if((h->left_samples_available&0x8888)!=0x8888){
00087         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00088         for(i=0; i<4; i++){
00089             if(!(h->left_samples_available&mask[i])){
00090                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00091                 if(status<0){
00092                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00093                     return -1;
00094                 } else if(status){
00095                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00096                 }
00097             }
00098         }
00099     }
00100 
00101     return 0;
00102 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00103 
00108 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00109     MpegEncContext * const s = &h->s;
00110     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00111     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00112 
00113     if(mode > 6U) {
00114         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00115         return -1;
00116     }
00117 
00118     if(!(h->top_samples_available&0x8000)){
00119         mode= top[ mode ];
00120         if(mode<0){
00121             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00122             return -1;
00123         }
00124     }
00125 
00126     if((h->left_samples_available&0x8080) != 0x8080){
00127         mode= left[ mode ];
00128         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00129             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00130         }
00131         if(mode<0){
00132             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00133             return -1;
00134         }
00135     }
00136 
00137     return mode;
00138 }
00139 
00140 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00141     int i, si, di;
00142     uint8_t *dst;
00143     int bufidx;
00144 
00145 //    src[0]&0x80;                //forbidden bit
00146     h->nal_ref_idc= src[0]>>5;
00147     h->nal_unit_type= src[0]&0x1F;
00148 
00149     src++; length--;
00150 
00151 #if HAVE_FAST_UNALIGNED
00152 # if HAVE_FAST_64BIT
00153 #   define RS 7
00154     for(i=0; i+1<length; i+=9){
00155         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00156 # else
00157 #   define RS 3
00158     for(i=0; i+1<length; i+=5){
00159         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00160 # endif
00161             continue;
00162         if(i>0 && !src[i]) i--;
00163         while(src[i]) i++;
00164 #else
00165 #   define RS 0
00166     for(i=0; i+1<length; i+=2){
00167         if(src[i]) continue;
00168         if(i>0 && src[i-1]==0) i--;
00169 #endif
00170         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00171             if(src[i+2]!=3){
00172                 /* startcode, so we must be past the end */
00173                 length=i;
00174             }
00175             break;
00176         }
00177         i-= RS;
00178     }
00179 
00180     if(i>=length-1){ //no escaped 0
00181         *dst_length= length;
00182         *consumed= length+1; //+1 for the header
00183         return src;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00188     dst= h->rbsp_buffer[bufidx];
00189 
00190     if (dst == NULL){
00191         return NULL;
00192     }
00193 
00194 //printf("decoding esc\n");
00195     memcpy(dst, src, i);
00196     si=di=i;
00197     while(si+2<length){
00198         //remove escapes (very rare 1:2^22)
00199         if(src[si+2]>3){
00200             dst[di++]= src[si++];
00201             dst[di++]= src[si++];
00202         }else if(src[si]==0 && src[si+1]==0){
00203             if(src[si+2]==3){ //escape
00204                 dst[di++]= 0;
00205                 dst[di++]= 0;
00206                 si+=3;
00207                 continue;
00208             }else //next start code
00209                 goto nsc;
00210         }
00211 
00212         dst[di++]= src[si++];
00213     }
00214     while(si<length)
00215         dst[di++]= src[si++];
00216 nsc:
00217 
00218     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00219 
00220     *dst_length= di;
00221     *consumed= si + 1;//+1 for the header
00222 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00223     return dst;
00224 }
00225 
00230 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00231     int v= *src;
00232     int r;
00233 
00234     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00235 
00236     for(r=1; r<9; r++){
00237         if(v&1) return r;
00238         v>>=1;
00239     }
00240     return 0;
00241 }
00242 
00243 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00244                                  int y_offset, int list){
00245     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00246     int filter_height= (raw_my&3) ? 2 : 0;
00247     int full_my= (raw_my>>2) + y_offset;
00248     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00249 
00250     return FFMAX(abs(top), bottom);
00251 }
00252 
00253 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00254                                int y_offset, int list0, int list1, int *nrefs){
00255     MpegEncContext * const s = &h->s;
00256     int my;
00257 
00258     y_offset += 16*(s->mb_y >> MB_FIELD);
00259 
00260     if(list0){
00261         int ref_n = h->ref_cache[0][ scan8[n] ];
00262         Picture *ref= &h->ref_list[0][ref_n];
00263 
00264         // Error resilience puts the current picture in the ref list.
00265         // Don't try to wait on these as it will cause a deadlock.
00266         // Fields can wait on each other, though.
00267         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00268            (ref->f.reference & 3) != s->picture_structure) {
00269             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00270             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00271             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00272         }
00273     }
00274 
00275     if(list1){
00276         int ref_n = h->ref_cache[1][ scan8[n] ];
00277         Picture *ref= &h->ref_list[1][ref_n];
00278 
00279         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00280            (ref->f.reference & 3) != s->picture_structure) {
00281             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00282             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00283             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00284         }
00285     }
00286 }
00287 
00293 static void await_references(H264Context *h){
00294     MpegEncContext * const s = &h->s;
00295     const int mb_xy= h->mb_xy;
00296     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00297     int refs[2][48];
00298     int nrefs[2] = {0};
00299     int ref, list;
00300 
00301     memset(refs, -1, sizeof(refs));
00302 
00303     if(IS_16X16(mb_type)){
00304         get_lowest_part_y(h, refs, 0, 16, 0,
00305                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00306     }else if(IS_16X8(mb_type)){
00307         get_lowest_part_y(h, refs, 0, 8, 0,
00308                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00309         get_lowest_part_y(h, refs, 8, 8, 8,
00310                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00311     }else if(IS_8X16(mb_type)){
00312         get_lowest_part_y(h, refs, 0, 16, 0,
00313                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00314         get_lowest_part_y(h, refs, 4, 16, 0,
00315                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00316     }else{
00317         int i;
00318 
00319         assert(IS_8X8(mb_type));
00320 
00321         for(i=0; i<4; i++){
00322             const int sub_mb_type= h->sub_mb_type[i];
00323             const int n= 4*i;
00324             int y_offset= (i&2)<<2;
00325 
00326             if(IS_SUB_8X8(sub_mb_type)){
00327                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00328                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00329             }else if(IS_SUB_8X4(sub_mb_type)){
00330                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00331                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00332                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00333                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00334             }else if(IS_SUB_4X8(sub_mb_type)){
00335                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00336                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00337                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00338                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00339             }else{
00340                 int j;
00341                 assert(IS_SUB_4X4(sub_mb_type));
00342                 for(j=0; j<4; j++){
00343                     int sub_y_offset= y_offset + 2*(j&2);
00344                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00345                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 }
00347             }
00348         }
00349     }
00350 
00351     for(list=h->list_count-1; list>=0; list--){
00352         for(ref=0; ref<48 && nrefs[list]; ref++){
00353             int row = refs[list][ref];
00354             if(row >= 0){
00355                 Picture *ref_pic = &h->ref_list[list][ref];
00356                 int ref_field = ref_pic->f.reference - 1;
00357                 int ref_field_picture = ref_pic->field_picture;
00358                 int pic_height = 16*s->mb_height >> ref_field_picture;
00359 
00360                 row <<= MB_MBAFF;
00361                 nrefs[list]--;
00362 
00363                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00364                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00365                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00366                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00367                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00368                 }else if(FIELD_PICTURE){
00369                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00370                 }else{
00371                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00372                 }
00373             }
00374         }
00375     }
00376 }
00377 
00378 #if 0
00379 
00383 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00384 //    const int qmul= dequant_coeff[qp][0];
00385     int i;
00386     int temp[16]; //FIXME check if this is a good idea
00387     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00388     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00389 
00390     for(i=0; i<4; i++){
00391         const int offset= y_offset[i];
00392         const int z0= block[offset+stride*0] + block[offset+stride*4];
00393         const int z1= block[offset+stride*0] - block[offset+stride*4];
00394         const int z2= block[offset+stride*1] - block[offset+stride*5];
00395         const int z3= block[offset+stride*1] + block[offset+stride*5];
00396 
00397         temp[4*i+0]= z0+z3;
00398         temp[4*i+1]= z1+z2;
00399         temp[4*i+2]= z1-z2;
00400         temp[4*i+3]= z0-z3;
00401     }
00402 
00403     for(i=0; i<4; i++){
00404         const int offset= x_offset[i];
00405         const int z0= temp[4*0+i] + temp[4*2+i];
00406         const int z1= temp[4*0+i] - temp[4*2+i];
00407         const int z2= temp[4*1+i] - temp[4*3+i];
00408         const int z3= temp[4*1+i] + temp[4*3+i];
00409 
00410         block[stride*0 +offset]= (z0 + z3)>>1;
00411         block[stride*2 +offset]= (z1 + z2)>>1;
00412         block[stride*8 +offset]= (z1 - z2)>>1;
00413         block[stride*10+offset]= (z0 - z3)>>1;
00414     }
00415 }
00416 #endif
00417 
00418 #undef xStride
00419 #undef stride
00420 
00421 #if 0
00422 static void chroma_dc_dct_c(DCTELEM *block){
00423     const int stride= 16*2;
00424     const int xStride= 16;
00425     int a,b,c,d,e;
00426 
00427     a= block[stride*0 + xStride*0];
00428     b= block[stride*0 + xStride*1];
00429     c= block[stride*1 + xStride*0];
00430     d= block[stride*1 + xStride*1];
00431 
00432     e= a-b;
00433     a= a+b;
00434     b= c-d;
00435     c= c+d;
00436 
00437     block[stride*0 + xStride*0]= (a+c);
00438     block[stride*0 + xStride*1]= (e+b);
00439     block[stride*1 + xStride*0]= (a-c);
00440     block[stride*1 + xStride*1]= (e-b);
00441 }
00442 #endif
00443 
00444 static av_always_inline void
00445 mc_dir_part(H264Context *h, Picture *pic, int n, int square,
00446             int height, int delta, int list,
00447             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00448             int src_x_offset, int src_y_offset,
00449             qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00450             int pixel_shift, int chroma_idc)
00451 {
00452     MpegEncContext * const s = &h->s;
00453     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00454     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00455     const int luma_xy= (mx&3) + ((my&3)<<2);
00456     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00457     uint8_t * src_y = pic->f.data[0] + offset;
00458     uint8_t * src_cb, * src_cr;
00459     int extra_width= h->emu_edge_width;
00460     int extra_height= h->emu_edge_height;
00461     int emu=0;
00462     const int full_mx= mx>>2;
00463     const int full_my= my>>2;
00464     const int pic_width  = 16*s->mb_width;
00465     const int pic_height = 16*s->mb_height >> MB_FIELD;
00466     int ysh;
00467 
00468     if(mx&7) extra_width -= 3;
00469     if(my&7) extra_height -= 3;
00470 
00471     if(   full_mx < 0-extra_width
00472        || full_my < 0-extra_height
00473        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00474        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00475         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00476                                 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00477             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00478         emu=1;
00479     }
00480 
00481     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00482     if(!square){
00483         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00484     }
00485 
00486     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00487 
00488     if(chroma_idc == 3 /* yuv444 */){
00489         src_cb = pic->f.data[1] + offset;
00490         if(emu){
00491             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00492                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00493             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00494         }
00495         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00496         if(!square){
00497             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00498         }
00499 
00500         src_cr = pic->f.data[2] + offset;
00501         if(emu){
00502             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00503                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00504             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00505         }
00506         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00507         if(!square){
00508             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00509         }
00510         return;
00511     }
00512 
00513     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
00514     if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
00515         // chroma offset when predicting from a field of opposite parity
00516         my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
00517         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00518     }
00519 
00520     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00521     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00522 
00523     if(emu){
00524         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
00525                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00526                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00527             src_cb= s->edge_emu_buffer;
00528     }
00529     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00530               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00531 
00532     if(emu){
00533         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
00534                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00535                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00536             src_cr= s->edge_emu_buffer;
00537     }
00538     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00539               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00540 }
00541 
00542 static av_always_inline void
00543 mc_part_std(H264Context *h, int n, int square, int height, int delta,
00544             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545             int x_offset, int y_offset,
00546             qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547             qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548             int list0, int list1, int pixel_shift, int chroma_idc)
00549 {
00550     MpegEncContext * const s = &h->s;
00551     qpel_mc_func *qpix_op=  qpix_put;
00552     h264_chroma_mc_func chroma_op= chroma_put;
00553 
00554     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00555     if (chroma_idc == 3 /* yuv444 */) {
00556         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00558     } else if (chroma_idc == 2 /* yuv422 */) {
00559         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00560         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00561     } else /* yuv420 */ {
00562         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00563         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00564     }
00565     x_offset += 8*s->mb_x;
00566     y_offset += 8*(s->mb_y >> MB_FIELD);
00567 
00568     if(list0){
00569         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00570         mc_dir_part(h, ref, n, square, height, delta, 0,
00571                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00572                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00573 
00574         qpix_op=  qpix_avg;
00575         chroma_op= chroma_avg;
00576     }
00577 
00578     if(list1){
00579         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00580         mc_dir_part(h, ref, n, square, height, delta, 1,
00581                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00582                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00583     }
00584 }
00585 
00586 static av_always_inline void
00587 mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
00588                  uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00589                  int x_offset, int y_offset,
00590                  qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00591                  h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00592                  h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00593                  int list0, int list1, int pixel_shift, int chroma_idc){
00594     MpegEncContext * const s = &h->s;
00595     int chroma_height;
00596 
00597     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00598     if (chroma_idc == 3 /* yuv444 */) {
00599         chroma_height = height;
00600         chroma_weight_avg = luma_weight_avg;
00601         chroma_weight_op = luma_weight_op;
00602         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00603         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00604     } else if (chroma_idc == 2 /* yuv422 */) {
00605         chroma_height = height;
00606         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00607         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00608     } else /* yuv420 */ {
00609         chroma_height = height >> 1;
00610         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00611         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00612     }
00613     x_offset += 8*s->mb_x;
00614     y_offset += 8*(s->mb_y >> MB_FIELD);
00615 
00616     if(list0 && list1){
00617         /* don't optimize for luma-only case, since B-frames usually
00618          * use implicit weights => chroma too. */
00619         uint8_t *tmp_cb = s->obmc_scratchpad;
00620         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00621         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00622         int refn0 = h->ref_cache[0][ scan8[n] ];
00623         int refn1 = h->ref_cache[1][ scan8[n] ];
00624 
00625         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
00626                     dest_y, dest_cb, dest_cr,
00627                     x_offset, y_offset, qpix_put, chroma_put,
00628                     pixel_shift, chroma_idc);
00629         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
00630                     tmp_y, tmp_cb, tmp_cr,
00631                     x_offset, y_offset, qpix_put, chroma_put,
00632                     pixel_shift, chroma_idc);
00633 
00634         if(h->use_weight == 2){
00635             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00636             int weight1 = 64 - weight0;
00637             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize,
00638                               height,        5, weight0, weight1, 0);
00639             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
00640                               chroma_height, 5, weight0, weight1, 0);
00641             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
00642                               chroma_height, 5, weight0, weight1, 0);
00643         }else{
00644             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00645                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00646                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00647             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00648                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00649                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00650             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00651                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00652                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00653         }
00654     }else{
00655         int list = list1 ? 1 : 0;
00656         int refn = h->ref_cache[list][ scan8[n] ];
00657         Picture *ref= &h->ref_list[list][refn];
00658         mc_dir_part(h, ref, n, square, height, delta, list,
00659                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00660                     qpix_put, chroma_put, pixel_shift, chroma_idc);
00661 
00662         luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00663                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00664         if(h->use_weight_chroma){
00665             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00666                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00667             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00668                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00669         }
00670     }
00671 }
00672 
00673 static av_always_inline void
00674 mc_part(H264Context *h, int n, int square, int height, int delta,
00675         uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00676         int x_offset, int y_offset,
00677         qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00678         qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00679         h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00680         int list0, int list1, int pixel_shift, int chroma_idc)
00681 {
00682     if((h->use_weight==2 && list0 && list1
00683         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00684        || h->use_weight==1)
00685         mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00686                          x_offset, y_offset, qpix_put, chroma_put,
00687                          weight_op[0], weight_op[1], weight_avg[0],
00688                          weight_avg[1], list0, list1, pixel_shift, chroma_idc);
00689     else
00690         mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00691                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00692                     chroma_avg, list0, list1, pixel_shift, chroma_idc);
00693 }
00694 
00695 static av_always_inline void
00696 prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
00697 {
00698     /* fetch pixels for estimated mv 4 macroblocks ahead
00699      * optimized for 64byte cache lines */
00700     MpegEncContext * const s = &h->s;
00701     const int refn = h->ref_cache[list][scan8[0]];
00702     if(refn >= 0){
00703         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00704         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00705         uint8_t **src = h->ref_list[list][refn].f.data;
00706         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00707         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00708         if (chroma_idc == 3 /* yuv444 */) {
00709             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00710             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00711         }else{
00712             off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
00713             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00714         }
00715     }
00716 }
00717 
00718 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00719                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00720                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00721                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00722                       int pixel_shift, int chroma_idc)
00723 {
00724     MpegEncContext * const s = &h->s;
00725     const int mb_xy= h->mb_xy;
00726     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00727 
00728     assert(IS_INTER(mb_type));
00729 
00730     if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00731         await_references(h);
00732     prefetch_motion(h, 0, pixel_shift, chroma_idc);
00733 
00734     if(IS_16X16(mb_type)){
00735         mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
00736                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00737                 weight_op, weight_avg,
00738                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00739                 pixel_shift, chroma_idc);
00740     }else if(IS_16X8(mb_type)){
00741         mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00742                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00743                 weight_op, weight_avg,
00744                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00745                 pixel_shift, chroma_idc);
00746         mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00747                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00748                 weight_op, weight_avg,
00749                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00750                 pixel_shift, chroma_idc);
00751     }else if(IS_8X16(mb_type)){
00752         mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00753                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                 &weight_op[1], &weight_avg[1],
00755                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00756                 pixel_shift, chroma_idc);
00757         mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00758                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00759                 &weight_op[1], &weight_avg[1],
00760                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00761                 pixel_shift, chroma_idc);
00762     }else{
00763         int i;
00764 
00765         assert(IS_8X8(mb_type));
00766 
00767         for(i=0; i<4; i++){
00768             const int sub_mb_type= h->sub_mb_type[i];
00769             const int n= 4*i;
00770             int x_offset= (i&1)<<2;
00771             int y_offset= (i&2)<<1;
00772 
00773             if(IS_SUB_8X8(sub_mb_type)){
00774                 mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00775                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00776                     &weight_op[1], &weight_avg[1],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma_idc);
00779             }else if(IS_SUB_8X4(sub_mb_type)){
00780                 mc_part(h, n  , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00781                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00782                     &weight_op[1], &weight_avg[1],
00783                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00784                     pixel_shift, chroma_idc);
00785                 mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00786                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00787                     &weight_op[1], &weight_avg[1],
00788                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                     pixel_shift, chroma_idc);
00790             }else if(IS_SUB_4X8(sub_mb_type)){
00791                 mc_part(h, n  , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00792                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00793                     &weight_op[2], &weight_avg[2],
00794                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00795                     pixel_shift, chroma_idc);
00796                 mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00797                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00798                     &weight_op[2], &weight_avg[2],
00799                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00800                     pixel_shift, chroma_idc);
00801             }else{
00802                 int j;
00803                 assert(IS_SUB_4X4(sub_mb_type));
00804                 for(j=0; j<4; j++){
00805                     int sub_x_offset= x_offset + 2*(j&1);
00806                     int sub_y_offset= y_offset +   (j&2);
00807                     mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00808                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00809                         &weight_op[2], &weight_avg[2],
00810                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00811                         pixel_shift, chroma_idc);
00812                 }
00813             }
00814         }
00815     }
00816 
00817     prefetch_motion(h, 1, pixel_shift, chroma_idc);
00818 }
00819 
00820 static av_always_inline void
00821 hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00822               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00823               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00824               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00825               int pixel_shift)
00826 {
00827     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00828               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
00829 }
00830 
00831 static av_always_inline void
00832 hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00833               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00834               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00835               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00836               int pixel_shift)
00837 {
00838     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00839               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
00840 }
00841 
00842 static void free_tables(H264Context *h, int free_rbsp){
00843     int i;
00844     H264Context *hx;
00845 
00846     av_freep(&h->intra4x4_pred_mode);
00847     av_freep(&h->chroma_pred_mode_table);
00848     av_freep(&h->cbp_table);
00849     av_freep(&h->mvd_table[0]);
00850     av_freep(&h->mvd_table[1]);
00851     av_freep(&h->direct_table);
00852     av_freep(&h->non_zero_count);
00853     av_freep(&h->slice_table_base);
00854     h->slice_table= NULL;
00855     av_freep(&h->list_counts);
00856 
00857     av_freep(&h->mb2b_xy);
00858     av_freep(&h->mb2br_xy);
00859 
00860     for(i = 0; i < MAX_THREADS; i++) {
00861         hx = h->thread_context[i];
00862         if(!hx) continue;
00863         av_freep(&hx->top_borders[1]);
00864         av_freep(&hx->top_borders[0]);
00865         av_freep(&hx->s.obmc_scratchpad);
00866         if (free_rbsp){
00867             av_freep(&hx->rbsp_buffer[1]);
00868             av_freep(&hx->rbsp_buffer[0]);
00869             hx->rbsp_buffer_size[0] = 0;
00870             hx->rbsp_buffer_size[1] = 0;
00871         }
00872         if (i) av_freep(&h->thread_context[i]);
00873     }
00874 }
00875 
00876 static void init_dequant8_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879 
00880     for(i=0; i<6; i++ ){
00881         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00882         for(j=0; j<i; j++){
00883             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00884                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00885                 break;
00886             }
00887         }
00888         if(j<i)
00889             continue;
00890 
00891         for(q=0; q<max_qp+1; q++){
00892             int shift = div6[q];
00893             int idx = rem6[q];
00894             for(x=0; x<64; x++)
00895                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00896                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00897                     h->pps.scaling_matrix8[i][x]) << shift;
00898         }
00899     }
00900 }
00901 
00902 static void init_dequant4_coeff_table(H264Context *h){
00903     int i,j,q,x;
00904     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00905     for(i=0; i<6; i++ ){
00906         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00907         for(j=0; j<i; j++){
00908             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00909                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00910                 break;
00911             }
00912         }
00913         if(j<i)
00914             continue;
00915 
00916         for(q=0; q<max_qp+1; q++){
00917             int shift = div6[q] + 2;
00918             int idx = rem6[q];
00919             for(x=0; x<16; x++)
00920                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00921                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00922                     h->pps.scaling_matrix4[i][x]) << shift;
00923         }
00924     }
00925 }
00926 
00927 static void init_dequant_tables(H264Context *h){
00928     int i,x;
00929     init_dequant4_coeff_table(h);
00930     if(h->pps.transform_8x8_mode)
00931         init_dequant8_coeff_table(h);
00932     if(h->sps.transform_bypass){
00933         for(i=0; i<6; i++)
00934             for(x=0; x<16; x++)
00935                 h->dequant4_coeff[i][0][x] = 1<<6;
00936         if(h->pps.transform_8x8_mode)
00937             for(i=0; i<6; i++)
00938                 for(x=0; x<64; x++)
00939                     h->dequant8_coeff[i][0][x] = 1<<6;
00940     }
00941 }
00942 
00943 
00944 int ff_h264_alloc_tables(H264Context *h){
00945     MpegEncContext * const s = &h->s;
00946     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00947     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00948     int x,y;
00949 
00950     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00951 
00952     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00953     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00954     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00955 
00956     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00957     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00958     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00959     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00960     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00961 
00962     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00963     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00964 
00965     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00966     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00967     for(y=0; y<s->mb_height; y++){
00968         for(x=0; x<s->mb_width; x++){
00969             const int mb_xy= x + y*s->mb_stride;
00970             const int b_xy = 4*x + 4*y*h->b_stride;
00971 
00972             h->mb2b_xy [mb_xy]= b_xy;
00973             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00974         }
00975     }
00976 
00977     s->obmc_scratchpad = NULL;
00978 
00979     if(!h->dequant4_coeff[0])
00980         init_dequant_tables(h);
00981 
00982     return 0;
00983 fail:
00984     free_tables(h, 1);
00985     return -1;
00986 }
00987 
00991 static void clone_tables(H264Context *dst, H264Context *src, int i){
00992     MpegEncContext * const s = &src->s;
00993     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00994     dst->non_zero_count           = src->non_zero_count;
00995     dst->slice_table              = src->slice_table;
00996     dst->cbp_table                = src->cbp_table;
00997     dst->mb2b_xy                  = src->mb2b_xy;
00998     dst->mb2br_xy                 = src->mb2br_xy;
00999     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
01000     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
01001     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
01002     dst->direct_table             = src->direct_table;
01003     dst->list_counts              = src->list_counts;
01004 
01005     dst->s.obmc_scratchpad = NULL;
01006     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
01007 }
01008 
01013 static int context_init(H264Context *h){
01014     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01015     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01016 
01017     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
01018     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
01019 
01020     return 0;
01021 fail:
01022     return -1; // free_tables will clean up for us
01023 }
01024 
01025 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01026 
01027 static av_cold void common_init(H264Context *h){
01028     MpegEncContext * const s = &h->s;
01029 
01030     s->width = s->avctx->width;
01031     s->height = s->avctx->height;
01032     s->codec_id= s->avctx->codec->id;
01033 
01034     ff_h264dsp_init(&h->h264dsp, 8, 1);
01035     ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
01036 
01037     h->dequant_coeff_pps= -1;
01038     s->unrestricted_mv=1;
01039 
01040     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01041 
01042     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01043     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01044 }
01045 
01046 int ff_h264_decode_extradata(H264Context *h)
01047 {
01048     AVCodecContext *avctx = h->s.avctx;
01049 
01050     if(avctx->extradata[0] == 1){
01051         int i, cnt, nalsize;
01052         unsigned char *p = avctx->extradata;
01053 
01054         h->is_avc = 1;
01055 
01056         if(avctx->extradata_size < 7) {
01057             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01058             return -1;
01059         }
01060         /* sps and pps in the avcC always have length coded with 2 bytes,
01061            so put a fake nal_length_size = 2 while parsing them */
01062         h->nal_length_size = 2;
01063         // Decode sps from avcC
01064         cnt = *(p+5) & 0x1f; // Number of sps
01065         p += 6;
01066         for (i = 0; i < cnt; i++) {
01067             nalsize = AV_RB16(p) + 2;
01068             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01069                 return -1;
01070             if(decode_nal_units(h, p, nalsize) < 0) {
01071                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01072                 return -1;
01073             }
01074             p += nalsize;
01075         }
01076         // Decode pps from avcC
01077         cnt = *(p++); // Number of pps
01078         for (i = 0; i < cnt; i++) {
01079             nalsize = AV_RB16(p) + 2;
01080             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01081                 return -1;
01082             if (decode_nal_units(h, p, nalsize) < 0) {
01083                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01084                 return -1;
01085             }
01086             p += nalsize;
01087         }
01088         // Now store right nal length size, that will be use to parse all other nals
01089         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
01090     } else {
01091         h->is_avc = 0;
01092         if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
01093             return -1;
01094     }
01095     return 0;
01096 }
01097 
01098 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01099     H264Context *h= avctx->priv_data;
01100     MpegEncContext * const s = &h->s;
01101     int i;
01102 
01103     MPV_decode_defaults(s);
01104 
01105     s->avctx = avctx;
01106     common_init(h);
01107 
01108     s->out_format = FMT_H264;
01109     s->workaround_bugs= avctx->workaround_bugs;
01110 
01111     // set defaults
01112 //    s->decode_mb= ff_h263_decode_mb;
01113     s->quarter_sample = 1;
01114     if(!avctx->has_b_frames)
01115     s->low_delay= 1;
01116 
01117     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01118 
01119     ff_h264_decode_init_vlc();
01120 
01121     h->pixel_shift = 0;
01122     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01123 
01124     h->thread_context[0] = h;
01125     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01126     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01127         h->last_pocs[i] = INT_MIN;
01128     h->prev_poc_msb= 1<<16;
01129     h->x264_build = -1;
01130     ff_h264_reset_sei(h);
01131     if(avctx->codec_id == CODEC_ID_H264){
01132         if(avctx->ticks_per_frame == 1){
01133             s->avctx->time_base.den *=2;
01134         }
01135         avctx->ticks_per_frame = 2;
01136     }
01137 
01138     if(avctx->extradata_size > 0 && avctx->extradata &&
01139         ff_h264_decode_extradata(h))
01140         return -1;
01141 
01142     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01143         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01144         s->low_delay = 0;
01145     }
01146 
01147     return 0;
01148 }
01149 
01150 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01151 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01152 {
01153     int i;
01154 
01155     for (i=0; i<count; i++){
01156         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01157                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01158                 !from[i]));
01159         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01160     }
01161 }
01162 
01163 static void copy_parameter_set(void **to, void **from, int count, int size)
01164 {
01165     int i;
01166 
01167     for (i=0; i<count; i++){
01168         if (to[i] && !from[i]) av_freep(&to[i]);
01169         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01170 
01171         if (from[i]) memcpy(to[i], from[i], size);
01172     }
01173 }
01174 
01175 static int decode_init_thread_copy(AVCodecContext *avctx){
01176     H264Context *h= avctx->priv_data;
01177 
01178     if (!avctx->internal->is_copy)
01179         return 0;
01180     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01181     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01182 
01183     return 0;
01184 }
01185 
01186 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01187 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01188     H264Context *h= dst->priv_data, *h1= src->priv_data;
01189     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01190     int inited = s->context_initialized, err;
01191     int i;
01192 
01193     if(dst == src || !s1->context_initialized) return 0;
01194 
01195     err = ff_mpeg_update_thread_context(dst, src);
01196     if(err) return err;
01197 
01198     //FIXME handle width/height changing
01199     if(!inited){
01200         for(i = 0; i < MAX_SPS_COUNT; i++)
01201             av_freep(h->sps_buffers + i);
01202 
01203         for(i = 0; i < MAX_PPS_COUNT; i++)
01204             av_freep(h->pps_buffers + i);
01205 
01206         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01207         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01208         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01209         if (ff_h264_alloc_tables(h) < 0) {
01210             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01211             return AVERROR(ENOMEM);
01212         }
01213         context_init(h);
01214 
01215         for(i=0; i<2; i++){
01216             h->rbsp_buffer[i] = NULL;
01217             h->rbsp_buffer_size[i] = 0;
01218         }
01219 
01220         h->thread_context[0] = h;
01221 
01222         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01223         // so this has to be allocated here
01224         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01225 
01226         s->dsp.clear_blocks(h->mb);
01227         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01228     }
01229 
01230     //extradata/NAL handling
01231     h->is_avc          = h1->is_avc;
01232 
01233     //SPS/PPS
01234     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01235     h->sps             = h1->sps;
01236     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01237     h->pps             = h1->pps;
01238 
01239     //Dequantization matrices
01240     //FIXME these are big - can they be only copied when PPS changes?
01241     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01242 
01243     for(i=0; i<6; i++)
01244         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01245 
01246     for(i=0; i<6; i++)
01247         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01248 
01249     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01250 
01251     //POC timing
01252     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01253 
01254     //reference lists
01255     copy_fields(h, h1, ref_count, list_count);
01256     copy_fields(h, h1, ref_list,  intra_gb);
01257     copy_fields(h, h1, short_ref, cabac_init_idc);
01258 
01259     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01260     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01261     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01262 
01263     h->last_slice_type = h1->last_slice_type;
01264 
01265     if(!s->current_picture_ptr) return 0;
01266 
01267     if(!s->dropable) {
01268         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01269         h->prev_poc_msb     = h->poc_msb;
01270         h->prev_poc_lsb     = h->poc_lsb;
01271     }
01272     h->prev_frame_num_offset= h->frame_num_offset;
01273     h->prev_frame_num       = h->frame_num;
01274     h->outputed_poc         = h->next_outputed_poc;
01275 
01276     return err;
01277 }
01278 
01279 int ff_h264_frame_start(H264Context *h){
01280     MpegEncContext * const s = &h->s;
01281     int i;
01282     const int pixel_shift = h->pixel_shift;
01283 
01284     if(MPV_frame_start(s, s->avctx) < 0)
01285         return -1;
01286     ff_er_frame_start(s);
01287     /*
01288      * MPV_frame_start uses pict_type to derive key_frame.
01289      * This is incorrect for H.264; IDR markings must be used.
01290      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01291      * See decode_nal_units().
01292      */
01293     s->current_picture_ptr->f.key_frame = 0;
01294     s->current_picture_ptr->mmco_reset= 0;
01295 
01296     assert(s->linesize && s->uvlinesize);
01297 
01298     for(i=0; i<16; i++){
01299         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01300         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01301     }
01302     for(i=0; i<16; i++){
01303         h->block_offset[16+i]=
01304         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01305         h->block_offset[48+16+i]=
01306         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01307     }
01308 
01309     /* can't be in alloc_tables because linesize isn't known there.
01310      * FIXME: redo bipred weight to not require extra buffer? */
01311     for(i = 0; i < s->slice_context_count; i++)
01312         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01313             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01314 
01315     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01316     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01317 
01318 //    s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.f.reference /*|| h->contains_intra*/ || 1;
01319 
01320     // We mark the current picture as non-reference after allocating it, so
01321     // that if we break out due to an error it can be released automatically
01322     // in the next MPV_frame_start().
01323     // SVQ3 as well as most other codecs have only last/next/current and thus
01324     // get released even with set reference, besides SVQ3 and others do not
01325     // mark frames as reference later "naturally".
01326     if(s->codec_id != CODEC_ID_SVQ3)
01327         s->current_picture_ptr->f.reference = 0;
01328 
01329     s->current_picture_ptr->field_poc[0]=
01330     s->current_picture_ptr->field_poc[1]= INT_MAX;
01331 
01332     h->next_output_pic = NULL;
01333 
01334     assert(s->current_picture_ptr->long_ref==0);
01335 
01336     return 0;
01337 }
01338 
01347 static void decode_postinit(H264Context *h, int setup_finished){
01348     MpegEncContext * const s = &h->s;
01349     Picture *out = s->current_picture_ptr;
01350     Picture *cur = s->current_picture_ptr;
01351     int i, pics, out_of_order, out_idx;
01352     int invalid = 0, cnt = 0;
01353 
01354     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
01355     s->current_picture_ptr->f.pict_type   = s->pict_type;
01356 
01357     if (h->next_output_pic) return;
01358 
01359     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01360         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01361         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01362         //to find this yet, so we assume the worst for now.
01363         //if (setup_finished)
01364         //    ff_thread_finish_setup(s->avctx);
01365         return;
01366     }
01367 
01368     cur->f.interlaced_frame = 0;
01369     cur->f.repeat_pict      = 0;
01370 
01371     /* Signal interlacing information externally. */
01372     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01373 
01374     if(h->sps.pic_struct_present_flag){
01375         switch (h->sei_pic_struct)
01376         {
01377         case SEI_PIC_STRUCT_FRAME:
01378             break;
01379         case SEI_PIC_STRUCT_TOP_FIELD:
01380         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01381             cur->f.interlaced_frame = 1;
01382             break;
01383         case SEI_PIC_STRUCT_TOP_BOTTOM:
01384         case SEI_PIC_STRUCT_BOTTOM_TOP:
01385             if (FIELD_OR_MBAFF_PICTURE)
01386                 cur->f.interlaced_frame = 1;
01387             else
01388                 // try to flag soft telecine progressive
01389                 cur->f.interlaced_frame = h->prev_interlaced_frame;
01390             break;
01391         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01392         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01393             // Signal the possibility of telecined film externally (pic_struct 5,6)
01394             // From these hints, let the applications decide if they apply deinterlacing.
01395             cur->f.repeat_pict = 1;
01396             break;
01397         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01398             // Force progressive here, as doubling interlaced frame is a bad idea.
01399             cur->f.repeat_pict = 2;
01400             break;
01401         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01402             cur->f.repeat_pict = 4;
01403             break;
01404         }
01405 
01406         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01407             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
01408     }else{
01409         /* Derive interlacing flag from used decoding process. */
01410         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01411     }
01412     h->prev_interlaced_frame = cur->f.interlaced_frame;
01413 
01414     if (cur->field_poc[0] != cur->field_poc[1]){
01415         /* Derive top_field_first from field pocs. */
01416         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
01417     }else{
01418         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
01419             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01420             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01421               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01422                 cur->f.top_field_first = 1;
01423             else
01424                 cur->f.top_field_first = 0;
01425         }else{
01426             /* Most likely progressive */
01427             cur->f.top_field_first = 0;
01428         }
01429     }
01430 
01431     //FIXME do something with unavailable reference frames
01432 
01433     /* Sort B-frames into display order */
01434 
01435     if(h->sps.bitstream_restriction_flag
01436        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01437         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01438         s->low_delay = 0;
01439     }
01440 
01441     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01442        && !h->sps.bitstream_restriction_flag){
01443         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
01444         s->low_delay= 0;
01445     }
01446 
01447     pics = 0;
01448     while(h->delayed_pic[pics]) pics++;
01449 
01450     assert(pics <= MAX_DELAYED_PIC_COUNT);
01451 
01452     h->delayed_pic[pics++] = cur;
01453     if (cur->f.reference == 0)
01454         cur->f.reference = DELAYED_PIC_REF;
01455 
01456     /* Frame reordering. This code takes pictures from coding order and sorts
01457      * them by their incremental POC value into display order. It supports POC
01458      * gaps, MMCO reset codes and random resets.
01459      * A "display group" can start either with a IDR frame (f.key_frame = 1),
01460      * and/or can be closed down with a MMCO reset code. In sequences where
01461      * there is no delay, we can't detect that (since the frame was already
01462      * output to the user), so we also set h->mmco_reset to detect the MMCO
01463      * reset code.
01464      * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
01465      * we increase the delay between input and output. All frames affected by
01466      * the lag (e.g. those that should have been output before another frame
01467      * that we already returned to the user) will be dropped. This is a bug
01468      * that we will fix later. */
01469     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
01470         cnt     += out->poc < h->last_pocs[i];
01471         invalid += out->poc == INT_MIN;
01472     }
01473     if (!h->mmco_reset && !cur->f.key_frame && cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
01474         h->mmco_reset = 2;
01475         if (pics > 1)
01476             h->delayed_pic[pics - 2]->mmco_reset = 2;
01477     }
01478     if (h->mmco_reset || cur->f.key_frame) {
01479         for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01480             h->last_pocs[i] = INT_MIN;
01481         cnt     = 0;
01482         invalid = MAX_DELAYED_PIC_COUNT;
01483     }
01484     out = h->delayed_pic[0];
01485     out_idx = 0;
01486     for (i = 1; i < MAX_DELAYED_PIC_COUNT && h->delayed_pic[i] &&
01487          !h->delayed_pic[i-1]->mmco_reset && !h->delayed_pic[i]->f.key_frame; i++)
01488     {
01489         if(h->delayed_pic[i]->poc < out->poc){
01490             out = h->delayed_pic[i];
01491             out_idx = i;
01492         }
01493     }
01494     if (s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
01495         h->next_outputed_poc = INT_MIN;
01496     out_of_order = !out->f.key_frame && !h->mmco_reset && (out->poc < h->next_outputed_poc);
01497 
01498     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01499         { }
01500     else if (out_of_order && pics-1 == s->avctx->has_b_frames &&
01501              s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
01502         if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
01503             s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt);
01504         }
01505         s->low_delay = 0;
01506     } else if (s->low_delay &&
01507                ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) ||
01508                 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
01509         s->low_delay = 0;
01510         s->avctx->has_b_frames++;
01511     }
01512 
01513     if(pics > s->avctx->has_b_frames){
01514         out->f.reference &= ~DELAYED_PIC_REF;
01515         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01516                          // or else the first thread can release the picture and reuse it unsafely
01517         for(i=out_idx; h->delayed_pic[i]; i++)
01518             h->delayed_pic[i] = h->delayed_pic[i+1];
01519     }
01520     memmove(h->last_pocs, &h->last_pocs[1], sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
01521     h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
01522     if(!out_of_order && pics > s->avctx->has_b_frames){
01523         h->next_output_pic = out;
01524         if (out->mmco_reset) {
01525             if (out_idx > 0) {
01526                 h->next_outputed_poc = out->poc;
01527                 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
01528             } else {
01529                 h->next_outputed_poc = INT_MIN;
01530             }
01531         } else {
01532             if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
01533                 h->next_outputed_poc = INT_MIN;
01534             } else {
01535                 h->next_outputed_poc = out->poc;
01536             }
01537         }
01538         h->mmco_reset = 0;
01539     }else{
01540         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01541     }
01542 
01543     if (setup_finished)
01544         ff_thread_finish_setup(s->avctx);
01545 }
01546 
01547 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
01548                                               uint8_t *src_cb, uint8_t *src_cr,
01549                                               int linesize, int uvlinesize, int simple)
01550 {
01551     MpegEncContext * const s = &h->s;
01552     uint8_t *top_border;
01553     int top_idx = 1;
01554     const int pixel_shift = h->pixel_shift;
01555     int chroma444 = CHROMA444;
01556     int chroma422 = CHROMA422;
01557 
01558     src_y  -=   linesize;
01559     src_cb -= uvlinesize;
01560     src_cr -= uvlinesize;
01561 
01562     if(!simple && FRAME_MBAFF){
01563         if(s->mb_y&1){
01564             if(!MB_MBAFF){
01565                 top_border = h->top_borders[0][s->mb_x];
01566                 AV_COPY128(top_border, src_y + 15*linesize);
01567                 if (pixel_shift)
01568                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01569                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01570                     if(chroma444){
01571                         if (pixel_shift){
01572                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01573                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01574                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01575                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01576                         } else {
01577                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01578                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01579                         }
01580                     } else if(chroma422) {
01581                         if (pixel_shift) {
01582                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01583                             AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
01584                         } else {
01585                             AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
01586                             AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
01587                         }
01588                     } else {
01589                         if (pixel_shift) {
01590                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01591                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01592                         } else {
01593                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01594                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01595                         }
01596                     }
01597                 }
01598             }
01599         }else if(MB_MBAFF){
01600             top_idx = 0;
01601         }else
01602             return;
01603     }
01604 
01605     top_border = h->top_borders[top_idx][s->mb_x];
01606     // There are two lines saved, the line above the the top macroblock of a pair,
01607     // and the line above the bottom macroblock
01608     AV_COPY128(top_border, src_y + 16*linesize);
01609     if (pixel_shift)
01610         AV_COPY128(top_border+16, src_y+16*linesize+16);
01611 
01612     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01613         if(chroma444){
01614             if (pixel_shift){
01615                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01616                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01617                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01618                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01619             } else {
01620                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01621                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01622             }
01623         } else if(chroma422) {
01624             if (pixel_shift) {
01625                 AV_COPY128(top_border+32, src_cb+16*uvlinesize);
01626                 AV_COPY128(top_border+48, src_cr+16*uvlinesize);
01627             } else {
01628                 AV_COPY64(top_border+16, src_cb+16*uvlinesize);
01629                 AV_COPY64(top_border+24, src_cr+16*uvlinesize);
01630             }
01631         } else {
01632             if (pixel_shift) {
01633                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01634                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01635             } else {
01636                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01637                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01638             }
01639         }
01640     }
01641 }
01642 
01643 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01644                                   uint8_t *src_cb, uint8_t *src_cr,
01645                                   int linesize, int uvlinesize,
01646                                   int xchg, int chroma444,
01647                                   int simple, int pixel_shift){
01648     MpegEncContext * const s = &h->s;
01649     int deblock_topleft;
01650     int deblock_top;
01651     int top_idx = 1;
01652     uint8_t *top_border_m1;
01653     uint8_t *top_border;
01654 
01655     if(!simple && FRAME_MBAFF){
01656         if(s->mb_y&1){
01657             if(!MB_MBAFF)
01658                 return;
01659         }else{
01660             top_idx = MB_MBAFF ? 0 : 1;
01661         }
01662     }
01663 
01664     if(h->deblocking_filter == 2) {
01665         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01666         deblock_top     = h->top_type;
01667     } else {
01668         deblock_topleft = (s->mb_x > 0);
01669         deblock_top     = (s->mb_y > !!MB_FIELD);
01670     }
01671 
01672     src_y  -=   linesize + 1 + pixel_shift;
01673     src_cb -= uvlinesize + 1 + pixel_shift;
01674     src_cr -= uvlinesize + 1 + pixel_shift;
01675 
01676     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01677     top_border    = h->top_borders[top_idx][s->mb_x];
01678 
01679 #define XCHG(a,b,xchg)\
01680     if (pixel_shift) {\
01681         if (xchg) {\
01682             AV_SWAP64(b+0,a+0);\
01683             AV_SWAP64(b+8,a+8);\
01684         } else {\
01685             AV_COPY128(b,a); \
01686         }\
01687     } else \
01688 if (xchg) AV_SWAP64(b,a);\
01689 else      AV_COPY64(b,a);
01690 
01691     if(deblock_top){
01692         if(deblock_topleft){
01693             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01694         }
01695         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01696         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01697         if(s->mb_x+1 < s->mb_width){
01698             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01699         }
01700     }
01701     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01702         if(chroma444){
01703             if(deblock_topleft){
01704                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01705                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01706             }
01707             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01708             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01709             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01710             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01711             if(s->mb_x+1 < s->mb_width){
01712                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01713                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01714             }
01715         } else {
01716             if(deblock_top){
01717                 if(deblock_topleft){
01718                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01719                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01720                 }
01721                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01722                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01723             }
01724         }
01725     }
01726 }
01727 
01728 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01729     if (high_bit_depth) {
01730         return AV_RN32A(((int32_t*)mb) + index);
01731     } else
01732         return AV_RN16A(mb + index);
01733 }
01734 
01735 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01736     if (high_bit_depth) {
01737         AV_WN32A(((int32_t*)mb) + index, value);
01738     } else
01739         AV_WN16A(mb + index, value);
01740 }
01741 
01742 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01743                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01744 {
01745     MpegEncContext * const s = &h->s;
01746     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01747     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01748     int i;
01749     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01750     block_offset += 16*p;
01751     if(IS_INTRA4x4(mb_type)){
01752         if(simple || !s->encoding){
01753             if(IS_8x8DCT(mb_type)){
01754                 if(transform_bypass){
01755                     idct_dc_add =
01756                     idct_add    = s->dsp.add_pixels8;
01757                 }else{
01758                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01759                     idct_add    = h->h264dsp.h264_idct8_add;
01760                 }
01761                 for(i=0; i<16; i+=4){
01762                     uint8_t * const ptr= dest_y + block_offset[i];
01763                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01764                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01765                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01766                     }else{
01767                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01768                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01769                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01770                         if(nnz){
01771                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01772                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01773                             else
01774                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01775                         }
01776                     }
01777                 }
01778             }else{
01779                 if(transform_bypass){
01780                     idct_dc_add =
01781                     idct_add    = s->dsp.add_pixels4;
01782                 }else{
01783                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01784                     idct_add    = h->h264dsp.h264_idct_add;
01785                 }
01786                 for(i=0; i<16; i++){
01787                     uint8_t * const ptr= dest_y + block_offset[i];
01788                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01789 
01790                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01791                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01792                     }else{
01793                         uint8_t *topright;
01794                         int nnz, tr;
01795                         uint64_t tr_high;
01796                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01797                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01798                             assert(s->mb_y || linesize <= block_offset[i]);
01799                             if(!topright_avail){
01800                                 if (pixel_shift) {
01801                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01802                                     topright= (uint8_t*) &tr_high;
01803                                 } else {
01804                                     tr= ptr[3 - linesize]*0x01010101u;
01805                                     topright= (uint8_t*) &tr;
01806                                 }
01807                             }else
01808                                 topright= ptr + (4 << pixel_shift) - linesize;
01809                         }else
01810                             topright= NULL;
01811 
01812                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01813                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01814                         if(nnz){
01815                             if(is_h264){
01816                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01817                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01818                                 else
01819                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01820                             } else if (CONFIG_SVQ3_DECODER)
01821                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01822                         }
01823                     }
01824                 }
01825             }
01826         }
01827     }else{
01828         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01829         if(is_h264){
01830             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01831                 if(!transform_bypass)
01832                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01833                 else{
01834                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01835                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01836                     for(i = 0; i < 16; i++)
01837                         dctcoef_set(h->mb+(p*256 << pixel_shift), pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01838                 }
01839             }
01840         } else if (CONFIG_SVQ3_DECODER)
01841             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01842     }
01843 }
01844 
01845 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01846                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01847 {
01848     MpegEncContext * const s = &h->s;
01849     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01850     int i;
01851     block_offset += 16*p;
01852     if(!IS_INTRA4x4(mb_type)){
01853         if(is_h264){
01854             if(IS_INTRA16x16(mb_type)){
01855                 if(transform_bypass){
01856                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01857                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01858                     }else{
01859                         for(i=0; i<16; i++){
01860                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01861                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01862                         }
01863                     }
01864                 }else{
01865                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01866                 }
01867             }else if(h->cbp&15){
01868                 if(transform_bypass){
01869                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01870                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01871                     for(i=0; i<16; i+=di){
01872                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01873                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01874                         }
01875                     }
01876                 }else{
01877                     if(IS_8x8DCT(mb_type)){
01878                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01879                     }else{
01880                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01881                     }
01882                 }
01883             }
01884         } else if (CONFIG_SVQ3_DECODER) {
01885             for(i=0; i<16; i++){
01886                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01887                     uint8_t * const ptr= dest_y + block_offset[i];
01888                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01889                 }
01890             }
01891         }
01892     }
01893 }
01894 
01895 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
01896 {
01897     MpegEncContext * const s = &h->s;
01898     const int mb_x= s->mb_x;
01899     const int mb_y= s->mb_y;
01900     const int mb_xy= h->mb_xy;
01901     const int mb_type = s->current_picture.f.mb_type[mb_xy];
01902     uint8_t  *dest_y, *dest_cb, *dest_cr;
01903     int linesize, uvlinesize /*dct_offset*/;
01904     int i, j;
01905     int *block_offset = &h->block_offset[0];
01906     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01907     /* is_h264 should always be true if SVQ3 is disabled. */
01908     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01909     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01910     const int block_h = 16 >> s->chroma_y_shift;
01911     const int chroma422 = CHROMA422;
01912 
01913     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01914     dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01915     dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01916 
01917     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01918     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01919 
01920     h->list_counts[mb_xy]= h->list_count;
01921 
01922     if (!simple && MB_FIELD) {
01923         linesize   = h->mb_linesize   = s->linesize * 2;
01924         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01925         block_offset = &h->block_offset[48];
01926         if(mb_y&1){ //FIXME move out of this function?
01927             dest_y -= s->linesize*15;
01928             dest_cb-= s->uvlinesize * (block_h - 1);
01929             dest_cr-= s->uvlinesize * (block_h - 1);
01930         }
01931         if(FRAME_MBAFF) {
01932             int list;
01933             for(list=0; list<h->list_count; list++){
01934                 if(!USES_LIST(mb_type, list))
01935                     continue;
01936                 if(IS_16X16(mb_type)){
01937                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01938                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01939                 }else{
01940                     for(i=0; i<16; i+=4){
01941                         int ref = h->ref_cache[list][scan8[i]];
01942                         if(ref >= 0)
01943                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01944                     }
01945                 }
01946             }
01947         }
01948     } else {
01949         linesize   = h->mb_linesize   = s->linesize;
01950         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01951 //        dct_offset = s->linesize * 16;
01952     }
01953 
01954     if (!simple && IS_INTRA_PCM(mb_type)) {
01955         if (pixel_shift) {
01956             const int bit_depth = h->sps.bit_depth_luma;
01957             int j;
01958             GetBitContext gb;
01959             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01960 
01961             for (i = 0; i < 16; i++) {
01962                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01963                 for (j = 0; j < 16; j++)
01964                     tmp_y[j] = get_bits(&gb, bit_depth);
01965             }
01966             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01967                 if (!h->sps.chroma_format_idc) {
01968                     for (i = 0; i < block_h; i++) {
01969                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01970                         for (j = 0; j < 8; j++) {
01971                             tmp_cb[j] = 1 << (bit_depth - 1);
01972                         }
01973                     }
01974                     for (i = 0; i < block_h; i++) {
01975                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01976                         for (j = 0; j < 8; j++) {
01977                             tmp_cr[j] = 1 << (bit_depth - 1);
01978                         }
01979                     }
01980                 } else {
01981                     for (i = 0; i < block_h; i++) {
01982                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01983                         for (j = 0; j < 8; j++)
01984                             tmp_cb[j] = get_bits(&gb, bit_depth);
01985                     }
01986                     for (i = 0; i < block_h; i++) {
01987                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01988                         for (j = 0; j < 8; j++)
01989                             tmp_cr[j] = get_bits(&gb, bit_depth);
01990                     }
01991                 }
01992             }
01993         } else {
01994             for (i=0; i<16; i++) {
01995                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01996             }
01997             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01998                 if (!h->sps.chroma_format_idc) {
01999                     for (i = 0; i < block_h; i++) {
02000                         memset(dest_cb + i*uvlinesize, 128, 8);
02001                         memset(dest_cr + i*uvlinesize, 128, 8);
02002                     }
02003                 } else {
02004                     for (i = 0; i < block_h; i++) {
02005                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
02006                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
02007                     }
02008                 }
02009             }
02010         }
02011     } else {
02012         if(IS_INTRA(mb_type)){
02013             if(h->deblocking_filter)
02014                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
02015 
02016             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02017                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02018                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02019             }
02020 
02021             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02022 
02023             if(h->deblocking_filter)
02024                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
02025         }else if(is_h264){
02026             if (chroma422) {
02027                 hl_motion_422(h, dest_y, dest_cb, dest_cr,
02028                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02029                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02030                               h->h264dsp.weight_h264_pixels_tab,
02031                               h->h264dsp.biweight_h264_pixels_tab,
02032                               pixel_shift);
02033             } else {
02034                 hl_motion_420(h, dest_y, dest_cb, dest_cr,
02035                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02036                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02037                               h->h264dsp.weight_h264_pixels_tab,
02038                               h->h264dsp.biweight_h264_pixels_tab,
02039                               pixel_shift);
02040             }
02041         }
02042 
02043         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02044 
02045         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02046             uint8_t *dest[2] = {dest_cb, dest_cr};
02047             if(transform_bypass){
02048                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02049                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
02050                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
02051                 }else{
02052                     idct_add = s->dsp.add_pixels4;
02053                     for(j=1; j<3; j++){
02054                         for(i=j*16; i<j*16+4; i++){
02055                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02056                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
02057                         }
02058                         if (chroma422) {
02059                             for(i=j*16+4; i<j*16+8; i++){
02060                                 if(h->non_zero_count_cache[ scan8[i+4] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02061                                     idct_add   (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
02062                             }
02063                         }
02064                     }
02065                 }
02066             }else{
02067                 if(is_h264){
02068                     int qp[2];
02069                     if (chroma422) {
02070                         qp[0] = h->chroma_qp[0] + 3;
02071                         qp[1] = h->chroma_qp[1] + 3;
02072                     } else {
02073                         qp[0] = h->chroma_qp[0];
02074                         qp[1] = h->chroma_qp[1];
02075                     }
02076                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
02077                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
02078                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
02079                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
02080                     h->h264dsp.h264_idct_add8(dest, block_offset,
02081                                               h->mb, uvlinesize,
02082                                               h->non_zero_count_cache);
02083                 } else if (CONFIG_SVQ3_DECODER) {
02084                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02085                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02086                     for(j=1; j<3; j++){
02087                         for(i=j*16; i<j*16+4; i++){
02088                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02089                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
02090                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
02091                             }
02092                         }
02093                     }
02094                 }
02095             }
02096         }
02097     }
02098     if(h->cbp || IS_INTRA(mb_type))
02099     {
02100         s->dsp.clear_blocks(h->mb);
02101         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02102     }
02103 }
02104 
02105 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
02106     MpegEncContext * const s = &h->s;
02107     const int mb_x= s->mb_x;
02108     const int mb_y= s->mb_y;
02109     const int mb_xy= h->mb_xy;
02110     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02111     uint8_t  *dest[3];
02112     int linesize;
02113     int i, j, p;
02114     int *block_offset = &h->block_offset[0];
02115     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02116     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02117 
02118     for (p = 0; p < plane_count; p++)
02119     {
02120         dest[p] = s->current_picture.f.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02121         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02122     }
02123 
02124     h->list_counts[mb_xy]= h->list_count;
02125 
02126     if (!simple && MB_FIELD) {
02127         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02128         block_offset = &h->block_offset[48];
02129         if(mb_y&1) //FIXME move out of this function?
02130             for (p = 0; p < 3; p++)
02131                 dest[p] -= s->linesize*15;
02132         if(FRAME_MBAFF) {
02133             int list;
02134             for(list=0; list<h->list_count; list++){
02135                 if(!USES_LIST(mb_type, list))
02136                     continue;
02137                 if(IS_16X16(mb_type)){
02138                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02139                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02140                 }else{
02141                     for(i=0; i<16; i+=4){
02142                         int ref = h->ref_cache[list][scan8[i]];
02143                         if(ref >= 0)
02144                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02145                     }
02146                 }
02147             }
02148         }
02149     } else {
02150         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02151     }
02152 
02153     if (!simple && IS_INTRA_PCM(mb_type)) {
02154         if (pixel_shift) {
02155             const int bit_depth = h->sps.bit_depth_luma;
02156             GetBitContext gb;
02157             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02158 
02159             for (p = 0; p < plane_count; p++) {
02160                 for (i = 0; i < 16; i++) {
02161                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02162                     for (j = 0; j < 16; j++)
02163                         tmp[j] = get_bits(&gb, bit_depth);
02164                 }
02165             }
02166         } else {
02167             for (p = 0; p < plane_count; p++) {
02168                 for (i = 0; i < 16; i++) {
02169                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02170                 }
02171             }
02172         }
02173     } else {
02174         if(IS_INTRA(mb_type)){
02175             if(h->deblocking_filter)
02176                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02177 
02178             for (p = 0; p < plane_count; p++)
02179                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02180 
02181             if(h->deblocking_filter)
02182                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02183         }else{
02184             hl_motion(h, dest[0], dest[1], dest[2],
02185                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02186                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02187                       h->h264dsp.weight_h264_pixels_tab,
02188                       h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
02189         }
02190 
02191         for (p = 0; p < plane_count; p++)
02192             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02193     }
02194     if(h->cbp || IS_INTRA(mb_type))
02195     {
02196         s->dsp.clear_blocks(h->mb);
02197         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02198     }
02199 }
02200 
02204 #define hl_decode_mb_simple(sh, bits) \
02205 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02206     hl_decode_mb_internal(h, 1, sh); \
02207 }
02208 hl_decode_mb_simple(0, 8)
02209 hl_decode_mb_simple(1, 16)
02210 
02214 static void av_noinline hl_decode_mb_complex(H264Context *h){
02215     hl_decode_mb_internal(h, 0, h->pixel_shift);
02216 }
02217 
02218 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02219     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02220 }
02221 
02222 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02223     hl_decode_mb_444_internal(h, 1, 0);
02224 }
02225 
02226 void ff_h264_hl_decode_mb(H264Context *h){
02227     MpegEncContext * const s = &h->s;
02228     const int mb_xy= h->mb_xy;
02229     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02230     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02231 
02232     if (CHROMA444) {
02233         if(is_complex || h->pixel_shift)
02234             hl_decode_mb_444_complex(h);
02235         else
02236             hl_decode_mb_444_simple(h);
02237     } else if (is_complex) {
02238         hl_decode_mb_complex(h);
02239     } else if (h->pixel_shift) {
02240         hl_decode_mb_simple_16(h);
02241     } else
02242         hl_decode_mb_simple_8(h);
02243 }
02244 
02245 static int pred_weight_table(H264Context *h){
02246     MpegEncContext * const s = &h->s;
02247     int list, i;
02248     int luma_def, chroma_def;
02249 
02250     h->use_weight= 0;
02251     h->use_weight_chroma= 0;
02252     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02253     if(h->sps.chroma_format_idc)
02254         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02255     luma_def = 1<<h->luma_log2_weight_denom;
02256     chroma_def = 1<<h->chroma_log2_weight_denom;
02257 
02258     for(list=0; list<2; list++){
02259         h->luma_weight_flag[list]   = 0;
02260         h->chroma_weight_flag[list] = 0;
02261         for(i=0; i<h->ref_count[list]; i++){
02262             int luma_weight_flag, chroma_weight_flag;
02263 
02264             luma_weight_flag= get_bits1(&s->gb);
02265             if(luma_weight_flag){
02266                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02267                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02268                 if(   h->luma_weight[i][list][0] != luma_def
02269                    || h->luma_weight[i][list][1] != 0) {
02270                     h->use_weight= 1;
02271                     h->luma_weight_flag[list]= 1;
02272                 }
02273             }else{
02274                 h->luma_weight[i][list][0]= luma_def;
02275                 h->luma_weight[i][list][1]= 0;
02276             }
02277 
02278             if(h->sps.chroma_format_idc){
02279                 chroma_weight_flag= get_bits1(&s->gb);
02280                 if(chroma_weight_flag){
02281                     int j;
02282                     for(j=0; j<2; j++){
02283                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02284                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02285                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02286                            || h->chroma_weight[i][list][j][1] != 0) {
02287                             h->use_weight_chroma= 1;
02288                             h->chroma_weight_flag[list]= 1;
02289                         }
02290                     }
02291                 }else{
02292                     int j;
02293                     for(j=0; j<2; j++){
02294                         h->chroma_weight[i][list][j][0]= chroma_def;
02295                         h->chroma_weight[i][list][j][1]= 0;
02296                     }
02297                 }
02298             }
02299         }
02300         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02301     }
02302     h->use_weight= h->use_weight || h->use_weight_chroma;
02303     return 0;
02304 }
02305 
02311 static void implicit_weight_table(H264Context *h, int field){
02312     MpegEncContext * const s = &h->s;
02313     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02314 
02315     for (i = 0; i < 2; i++) {
02316         h->luma_weight_flag[i]   = 0;
02317         h->chroma_weight_flag[i] = 0;
02318     }
02319 
02320     if(field < 0){
02321         if (s->picture_structure == PICT_FRAME) {
02322             cur_poc = s->current_picture_ptr->poc;
02323         } else {
02324             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02325         }
02326     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02327        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02328         h->use_weight= 0;
02329         h->use_weight_chroma= 0;
02330         return;
02331     }
02332         ref_start= 0;
02333         ref_count0= h->ref_count[0];
02334         ref_count1= h->ref_count[1];
02335     }else{
02336         cur_poc = s->current_picture_ptr->field_poc[field];
02337         ref_start= 16;
02338         ref_count0= 16+2*h->ref_count[0];
02339         ref_count1= 16+2*h->ref_count[1];
02340     }
02341 
02342     h->use_weight= 2;
02343     h->use_weight_chroma= 2;
02344     h->luma_log2_weight_denom= 5;
02345     h->chroma_log2_weight_denom= 5;
02346 
02347     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02348         int poc0 = h->ref_list[0][ref0].poc;
02349         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02350             int w = 32;
02351             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02352                 int poc1 = h->ref_list[1][ref1].poc;
02353                 int td = av_clip(poc1 - poc0, -128, 127);
02354                 if(td){
02355                     int tb = av_clip(cur_poc - poc0, -128, 127);
02356                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02357                     int dist_scale_factor = (tb*tx + 32) >> 8;
02358                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02359                         w = 64 - dist_scale_factor;
02360                 }
02361             }
02362             if(field<0){
02363                 h->implicit_weight[ref0][ref1][0]=
02364                 h->implicit_weight[ref0][ref1][1]= w;
02365             }else{
02366                 h->implicit_weight[ref0][ref1][field]=w;
02367             }
02368         }
02369     }
02370 }
02371 
02375 static void idr(H264Context *h){
02376     ff_h264_remove_all_refs(h);
02377     h->prev_frame_num= 0;
02378     h->prev_frame_num_offset= 0;
02379     h->prev_poc_msb=
02380     h->prev_poc_lsb= 0;
02381 }
02382 
02383 /* forget old pics after a seek */
02384 static void flush_dpb(AVCodecContext *avctx){
02385     H264Context *h= avctx->priv_data;
02386     int i;
02387     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
02388         if(h->delayed_pic[i])
02389             h->delayed_pic[i]->f.reference = 0;
02390         h->delayed_pic[i]= NULL;
02391     }
02392     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
02393         h->last_pocs[i] = INT_MIN;
02394     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02395     h->prev_interlaced_frame = 1;
02396     idr(h);
02397     if(h->s.current_picture_ptr)
02398         h->s.current_picture_ptr->f.reference = 0;
02399     h->s.first_field= 0;
02400     ff_h264_reset_sei(h);
02401     ff_mpeg_flush(avctx);
02402 }
02403 
02404 static int init_poc(H264Context *h){
02405     MpegEncContext * const s = &h->s;
02406     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02407     int field_poc[2];
02408     Picture *cur = s->current_picture_ptr;
02409 
02410     h->frame_num_offset= h->prev_frame_num_offset;
02411     if(h->frame_num < h->prev_frame_num)
02412         h->frame_num_offset += max_frame_num;
02413 
02414     if(h->sps.poc_type==0){
02415         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02416 
02417         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02418             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02419         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02420             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02421         else
02422             h->poc_msb = h->prev_poc_msb;
02423 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02424         field_poc[0] =
02425         field_poc[1] = h->poc_msb + h->poc_lsb;
02426         if(s->picture_structure == PICT_FRAME)
02427             field_poc[1] += h->delta_poc_bottom;
02428     }else if(h->sps.poc_type==1){
02429         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02430         int i;
02431 
02432         if(h->sps.poc_cycle_length != 0)
02433             abs_frame_num = h->frame_num_offset + h->frame_num;
02434         else
02435             abs_frame_num = 0;
02436 
02437         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02438             abs_frame_num--;
02439 
02440         expected_delta_per_poc_cycle = 0;
02441         for(i=0; i < h->sps.poc_cycle_length; i++)
02442             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02443 
02444         if(abs_frame_num > 0){
02445             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02446             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02447 
02448             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02449             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02450                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02451         } else
02452             expectedpoc = 0;
02453 
02454         if(h->nal_ref_idc == 0)
02455             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02456 
02457         field_poc[0] = expectedpoc + h->delta_poc[0];
02458         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02459 
02460         if(s->picture_structure == PICT_FRAME)
02461             field_poc[1] += h->delta_poc[1];
02462     }else{
02463         int poc= 2*(h->frame_num_offset + h->frame_num);
02464 
02465         if(!h->nal_ref_idc)
02466             poc--;
02467 
02468         field_poc[0]= poc;
02469         field_poc[1]= poc;
02470     }
02471 
02472     if(s->picture_structure != PICT_BOTTOM_FIELD)
02473         s->current_picture_ptr->field_poc[0]= field_poc[0];
02474     if(s->picture_structure != PICT_TOP_FIELD)
02475         s->current_picture_ptr->field_poc[1]= field_poc[1];
02476     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02477 
02478     return 0;
02479 }
02480 
02481 
02485 static void init_scan_tables(H264Context *h){
02486     int i;
02487     for(i=0; i<16; i++){
02488 #define T(x) (x>>2) | ((x<<2) & 0xF)
02489         h->zigzag_scan[i] = T(zigzag_scan[i]);
02490         h-> field_scan[i] = T( field_scan[i]);
02491 #undef T
02492     }
02493     for(i=0; i<64; i++){
02494 #define T(x) (x>>3) | ((x&7)<<3)
02495         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02496         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02497         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02498         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02499 #undef T
02500     }
02501     if(h->sps.transform_bypass){ //FIXME same ugly
02502         h->zigzag_scan_q0          = zigzag_scan;
02503         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02504         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02505         h->field_scan_q0           = field_scan;
02506         h->field_scan8x8_q0        = field_scan8x8;
02507         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02508     }else{
02509         h->zigzag_scan_q0          = h->zigzag_scan;
02510         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02511         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02512         h->field_scan_q0           = h->field_scan;
02513         h->field_scan8x8_q0        = h->field_scan8x8;
02514         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02515     }
02516 }
02517 
02518 static int field_end(H264Context *h, int in_setup){
02519     MpegEncContext * const s = &h->s;
02520     AVCodecContext * const avctx= s->avctx;
02521     int err = 0;
02522     s->mb_y= 0;
02523 
02524     if (!in_setup && !s->dropable)
02525         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02526                                   s->picture_structure == PICT_BOTTOM_FIELD);
02527 
02528     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02529         ff_vdpau_h264_set_reference_frames(s);
02530 
02531     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02532         if(!s->dropable) {
02533             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02534             h->prev_poc_msb= h->poc_msb;
02535             h->prev_poc_lsb= h->poc_lsb;
02536         }
02537         h->prev_frame_num_offset= h->frame_num_offset;
02538         h->prev_frame_num= h->frame_num;
02539         h->outputed_poc = h->next_outputed_poc;
02540     }
02541 
02542     if (avctx->hwaccel) {
02543         if (avctx->hwaccel->end_frame(avctx) < 0)
02544             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02545     }
02546 
02547     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02548         ff_vdpau_h264_picture_complete(s);
02549 
02550     /*
02551      * FIXME: Error handling code does not seem to support interlaced
02552      * when slices span multiple rows
02553      * The ff_er_add_slice calls don't work right for bottom
02554      * fields; they cause massive erroneous error concealing
02555      * Error marking covers both fields (top and bottom).
02556      * This causes a mismatched s->error_count
02557      * and a bad error table. Further, the error count goes to
02558      * INT_MAX when called for bottom field, because mb_y is
02559      * past end by one (callers fault) and resync_mb_y != 0
02560      * causes problems for the first MB line, too.
02561      */
02562     if (!FIELD_PICTURE)
02563         ff_er_frame_end(s);
02564 
02565     MPV_frame_end(s);
02566 
02567     h->current_slice=0;
02568 
02569     return err;
02570 }
02571 
02575 static void clone_slice(H264Context *dst, H264Context *src)
02576 {
02577     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02578     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02579     dst->s.current_picture      = src->s.current_picture;
02580     dst->s.linesize             = src->s.linesize;
02581     dst->s.uvlinesize           = src->s.uvlinesize;
02582     dst->s.first_field          = src->s.first_field;
02583 
02584     dst->prev_poc_msb           = src->prev_poc_msb;
02585     dst->prev_poc_lsb           = src->prev_poc_lsb;
02586     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02587     dst->prev_frame_num         = src->prev_frame_num;
02588     dst->short_ref_count        = src->short_ref_count;
02589 
02590     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02591     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02592     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02593     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02594 
02595     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02596     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02597 }
02598 
02606 int ff_h264_get_profile(SPS *sps)
02607 {
02608     int profile = sps->profile_idc;
02609 
02610     switch(sps->profile_idc) {
02611     case FF_PROFILE_H264_BASELINE:
02612         // constraint_set1_flag set to 1
02613         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02614         break;
02615     case FF_PROFILE_H264_HIGH_10:
02616     case FF_PROFILE_H264_HIGH_422:
02617     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02618         // constraint_set3_flag set to 1
02619         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02620         break;
02621     }
02622 
02623     return profile;
02624 }
02625 
02635 static int decode_slice_header(H264Context *h, H264Context *h0){
02636     MpegEncContext * const s = &h->s;
02637     MpegEncContext * const s0 = &h0->s;
02638     unsigned int first_mb_in_slice;
02639     unsigned int pps_id;
02640     int num_ref_idx_active_override_flag;
02641     unsigned int slice_type, tmp, i, j;
02642     int default_ref_list_done = 0;
02643     int last_pic_structure, last_pic_dropable;
02644 
02645     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02646     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02647         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02648         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02649     }else{
02650         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02651         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02652     }
02653 
02654     first_mb_in_slice= get_ue_golomb(&s->gb);
02655 
02656     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02657         if(h0->current_slice && FIELD_PICTURE){
02658             field_end(h, 1);
02659         }
02660 
02661         h0->current_slice = 0;
02662         if (!s0->first_field) {
02663             if (s->current_picture_ptr && !s->dropable &&
02664                 s->current_picture_ptr->owner2 == s) {
02665                 ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02666                                           s->picture_structure == PICT_BOTTOM_FIELD);
02667             }
02668             s->current_picture_ptr = NULL;
02669         }
02670     }
02671 
02672     slice_type= get_ue_golomb_31(&s->gb);
02673     if(slice_type > 9){
02674         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02675         return -1;
02676     }
02677     if(slice_type > 4){
02678         slice_type -= 5;
02679         h->slice_type_fixed=1;
02680     }else
02681         h->slice_type_fixed=0;
02682 
02683     slice_type= golomb_to_pict_type[ slice_type ];
02684     if (slice_type == AV_PICTURE_TYPE_I
02685         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02686         default_ref_list_done = 1;
02687     }
02688     h->slice_type= slice_type;
02689     h->slice_type_nos= slice_type & 3;
02690 
02691     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02692 
02693     pps_id= get_ue_golomb(&s->gb);
02694     if(pps_id>=MAX_PPS_COUNT){
02695         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02696         return -1;
02697     }
02698     if(!h0->pps_buffers[pps_id]) {
02699         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02700         return -1;
02701     }
02702     h->pps= *h0->pps_buffers[pps_id];
02703 
02704     if(!h0->sps_buffers[h->pps.sps_id]) {
02705         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02706         return -1;
02707     }
02708     h->sps = *h0->sps_buffers[h->pps.sps_id];
02709 
02710     s->avctx->profile = ff_h264_get_profile(&h->sps);
02711     s->avctx->level   = h->sps.level_idc;
02712     s->avctx->refs    = h->sps.ref_frame_count;
02713 
02714     s->mb_width= h->sps.mb_width;
02715     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02716 
02717     h->b_stride=  s->mb_width*4;
02718 
02719     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
02720 
02721     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02722     if(h->sps.frame_mbs_only_flag)
02723         s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02724     else
02725         s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02726 
02727     if (FFALIGN(s->avctx->width,  16) == s->width &&
02728         FFALIGN(s->avctx->height, 16) == s->height) {
02729         s->width  = s->avctx->width;
02730         s->height = s->avctx->height;
02731     }
02732 
02733     if (s->context_initialized
02734         && (   s->width != s->avctx->width || s->height != s->avctx->height
02735             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02736         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02737             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02738             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02739         }
02740         free_tables(h, 0);
02741         flush_dpb(s->avctx);
02742         MPV_common_end(s);
02743     }
02744     if (!s->context_initialized) {
02745         if (h != h0) {
02746             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02747             return -1;
02748         }
02749 
02750         avcodec_set_dimensions(s->avctx, s->width, s->height);
02751         s->avctx->sample_aspect_ratio= h->sps.sar;
02752         av_assert0(s->avctx->sample_aspect_ratio.den);
02753 
02754         if(h->sps.video_signal_type_present_flag){
02755             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02756             if(h->sps.colour_description_present_flag){
02757                 s->avctx->color_primaries = h->sps.color_primaries;
02758                 s->avctx->color_trc       = h->sps.color_trc;
02759                 s->avctx->colorspace      = h->sps.colorspace;
02760             }
02761         }
02762 
02763         if(h->sps.timing_info_present_flag){
02764             int64_t den= h->sps.time_scale;
02765             if(h->x264_build < 44U)
02766                 den *= 2;
02767             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02768                       h->sps.num_units_in_tick, den, 1<<30);
02769         }
02770 
02771         switch (h->sps.bit_depth_luma) {
02772             case 9 :
02773                 if (CHROMA444) {
02774                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02775                         s->avctx->pix_fmt = PIX_FMT_GBRP9;
02776                     } else
02777                         s->avctx->pix_fmt = PIX_FMT_YUV444P9;
02778                 } else if (CHROMA422)
02779                     s->avctx->pix_fmt = PIX_FMT_YUV422P9;
02780                 else
02781                     s->avctx->pix_fmt = PIX_FMT_YUV420P9;
02782                 break;
02783             case 10 :
02784                 if (CHROMA444) {
02785                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02786                         s->avctx->pix_fmt = PIX_FMT_GBRP10;
02787                     } else
02788                         s->avctx->pix_fmt = PIX_FMT_YUV444P10;
02789                 } else if (CHROMA422)
02790                     s->avctx->pix_fmt = PIX_FMT_YUV422P10;
02791                 else
02792                     s->avctx->pix_fmt = PIX_FMT_YUV420P10;
02793                 break;
02794             case 8:
02795                 if (CHROMA444){
02796                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02797                         s->avctx->pix_fmt = PIX_FMT_GBRP;
02798                     } else
02799                         s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02800                 } else if (CHROMA422) {
02801                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
02802                 }else{
02803                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02804                                                              s->avctx->codec->pix_fmts ?
02805                                                              s->avctx->codec->pix_fmts :
02806                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02807                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02808                                                              ff_hwaccel_pixfmt_list_420);
02809                 }
02810                 break;
02811             default:
02812                 av_log(s->avctx, AV_LOG_ERROR,
02813                        "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
02814                 return AVERROR_INVALIDDATA;
02815         }
02816 
02817         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02818 
02819         if (MPV_common_init(s) < 0) {
02820             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02821             return -1;
02822         }
02823         s->first_field = 0;
02824         h->prev_interlaced_frame = 1;
02825 
02826         init_scan_tables(h);
02827         if (ff_h264_alloc_tables(h) < 0) {
02828             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02829             return AVERROR(ENOMEM);
02830         }
02831 
02832         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02833             if (context_init(h) < 0) {
02834                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02835                 return -1;
02836             }
02837         } else {
02838             for(i = 1; i < s->slice_context_count; i++) {
02839                 H264Context *c;
02840                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02841                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02842                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02843                 c->h264dsp = h->h264dsp;
02844                 c->sps = h->sps;
02845                 c->pps = h->pps;
02846                 c->pixel_shift = h->pixel_shift;
02847                 init_scan_tables(c);
02848                 clone_tables(c, h, i);
02849             }
02850 
02851             for(i = 0; i < s->slice_context_count; i++)
02852                 if (context_init(h->thread_context[i]) < 0) {
02853                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02854                     return -1;
02855                 }
02856         }
02857     }
02858 
02859     if(h == h0 && h->dequant_coeff_pps != pps_id){
02860         h->dequant_coeff_pps = pps_id;
02861         init_dequant_tables(h);
02862     }
02863 
02864     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02865 
02866     h->mb_mbaff = 0;
02867     h->mb_aff_frame = 0;
02868     last_pic_structure = s0->picture_structure;
02869     last_pic_dropable  = s->dropable;
02870     s->dropable        = h->nal_ref_idc == 0;
02871     if(h->sps.frame_mbs_only_flag){
02872         s->picture_structure= PICT_FRAME;
02873     }else{
02874         if(get_bits1(&s->gb)) { //field_pic_flag
02875             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02876         } else {
02877             s->picture_structure= PICT_FRAME;
02878             h->mb_aff_frame = h->sps.mb_aff;
02879         }
02880     }
02881     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02882 
02883     if (h0->current_slice != 0) {
02884         if (last_pic_structure != s->picture_structure ||
02885             last_pic_dropable  != s->dropable) {
02886             av_log(h->s.avctx, AV_LOG_ERROR,
02887                    "Changing field mode (%d -> %d) between slices is not allowed\n",
02888                    last_pic_structure, s->picture_structure);
02889             s->picture_structure = last_pic_structure;
02890             s->dropable          = last_pic_dropable;
02891             return AVERROR_INVALIDDATA;
02892         }
02893     } else {
02894         /* Shorten frame num gaps so we don't have to allocate reference
02895          * frames just to throw them away */
02896         if (h->frame_num != h->prev_frame_num) {
02897             int unwrap_prev_frame_num = h->prev_frame_num;
02898             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
02899 
02900             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02901 
02902             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02903                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02904                 if (unwrap_prev_frame_num < 0)
02905                     unwrap_prev_frame_num += max_frame_num;
02906 
02907                 h->prev_frame_num = unwrap_prev_frame_num;
02908             }
02909         }
02910 
02911         /* See if we have a decoded first field looking for a pair...
02912          * Here, we're using that to see if we should mark previously
02913          * decode frames as "finished".
02914          * We have to do that before the "dummy" in-between frame allocation,
02915          * since that can modify s->current_picture_ptr. */
02916         if (s0->first_field) {
02917             assert(s0->current_picture_ptr);
02918             assert(s0->current_picture_ptr->f.data[0]);
02919             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
02920 
02921             /* Mark old field/frame as completed */
02922             if (!last_pic_dropable && s0->current_picture_ptr->owner2 == s0) {
02923                 ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02924                                           last_pic_structure == PICT_BOTTOM_FIELD);
02925             }
02926 
02927             /* figure out if we have a complementary field pair */
02928             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02929                 /* Previous field is unmatched. Don't display it, but let it
02930                  * remain for reference if marked as such. */
02931                 if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02932                     ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02933                                               last_pic_structure == PICT_TOP_FIELD);
02934                 }
02935             } else {
02936                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02937                     /* This and previous field were reference, but had
02938                      * different frame_nums. Consider this field first in
02939                      * pair. Throw away previous field except for reference
02940                      * purposes. */
02941                     if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02942                         ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02943                                                   last_pic_structure == PICT_TOP_FIELD);
02944                     }
02945                 } else {
02946                     /* Second field in complementary pair */
02947                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
02948                            s->picture_structure == PICT_BOTTOM_FIELD) ||
02949                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
02950                            s->picture_structure == PICT_TOP_FIELD))) {
02951                         av_log(s->avctx, AV_LOG_ERROR,
02952                                "Invalid field mode combination %d/%d\n",
02953                                last_pic_structure, s->picture_structure);
02954                         s->picture_structure = last_pic_structure;
02955                         s->dropable          = last_pic_dropable;
02956                         return AVERROR_INVALIDDATA;
02957                     } else if (last_pic_dropable != s->dropable) {
02958                         av_log(s->avctx, AV_LOG_ERROR,
02959                                "Cannot combine reference and non-reference fields in the same frame\n");
02960                         av_log_ask_for_sample(s->avctx, NULL);
02961                         s->picture_structure = last_pic_structure;
02962                         s->dropable          = last_pic_dropable;
02963                         return AVERROR_INVALIDDATA;
02964                     }
02965 
02966                     /* Take ownership of this buffer. Note that if another thread owned
02967                      * the first field of this buffer, we're not operating on that pointer,
02968                      * so the original thread is still responsible for reporting progress
02969                      * on that first field (or if that was us, we just did that above).
02970                      * By taking ownership, we assign responsibility to ourselves to
02971                      * report progress on the second field. */
02972                     s0->current_picture_ptr->owner2 = s0;
02973                 }
02974             }
02975         }
02976 
02977         while (h->frame_num != h->prev_frame_num &&
02978                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
02979             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02980             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02981             if (ff_h264_frame_start(h) < 0)
02982                 return -1;
02983             h->prev_frame_num++;
02984             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02985             s->current_picture_ptr->frame_num= h->prev_frame_num;
02986             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02987             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02988             ff_generate_sliding_window_mmcos(h);
02989             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
02990                 (s->avctx->err_recognition & AV_EF_EXPLODE))
02991                 return AVERROR_INVALIDDATA;
02992             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02993              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02994              * about there being no actual duplicates.
02995              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02996              * concealing a lost frame, this probably isn't noticeable by comparison, but it should
02997              * be fixed. */
02998             if (h->short_ref_count) {
02999                 if (prev) {
03000                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
03001                                   (const uint8_t**)prev->f.data, prev->f.linesize,
03002                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
03003                     h->short_ref[0]->poc = prev->poc+2;
03004                 }
03005                 h->short_ref[0]->frame_num = h->prev_frame_num;
03006             }
03007         }
03008 
03009         /* See if we have a decoded first field looking for a pair...
03010          * We're using that to see whether to continue decoding in that
03011          * frame, or to allocate a new one. */
03012         if (s0->first_field) {
03013             assert(s0->current_picture_ptr);
03014             assert(s0->current_picture_ptr->f.data[0]);
03015             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
03016 
03017             /* figure out if we have a complementary field pair */
03018             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
03019                 /*
03020                  * Previous field is unmatched. Don't display it, but let it
03021                  * remain for reference if marked as such.
03022                  */
03023                 s0->current_picture_ptr = NULL;
03024                 s0->first_field = FIELD_PICTURE;
03025 
03026             } else {
03027                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
03028                     /* This and the previous field had different frame_nums.
03029                      * Consider this field first in pair. Throw away previous
03030                      * one except for reference purposes. */
03031                     s0->first_field         = 1;
03032                     s0->current_picture_ptr = NULL;
03033 
03034                 } else {
03035                     /* Second field in complementary pair */
03036                     s0->first_field = 0;
03037                 }
03038             }
03039 
03040         } else {
03041             /* Frame or first field in a potentially complementary pair */
03042             assert(!s0->current_picture_ptr);
03043             s0->first_field = FIELD_PICTURE;
03044         }
03045 
03046         if(!FIELD_PICTURE || s0->first_field) {
03047             if (ff_h264_frame_start(h) < 0) {
03048                 s0->first_field = 0;
03049                 return -1;
03050             }
03051         } else {
03052             ff_release_unused_pictures(s, 0);
03053         }
03054     }
03055     if(h != h0)
03056         clone_slice(h, h0);
03057 
03058     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
03059 
03060     assert(s->mb_num == s->mb_width * s->mb_height);
03061     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
03062        first_mb_in_slice                    >= s->mb_num){
03063         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03064         return -1;
03065     }
03066     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03067     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03068     if (s->picture_structure == PICT_BOTTOM_FIELD)
03069         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03070     assert(s->mb_y < s->mb_height);
03071 
03072     if(s->picture_structure==PICT_FRAME){
03073         h->curr_pic_num=   h->frame_num;
03074         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03075     }else{
03076         h->curr_pic_num= 2*h->frame_num + 1;
03077         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03078     }
03079 
03080     if(h->nal_unit_type == NAL_IDR_SLICE){
03081         get_ue_golomb(&s->gb); /* idr_pic_id */
03082     }
03083 
03084     if(h->sps.poc_type==0){
03085         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03086 
03087         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03088             h->delta_poc_bottom= get_se_golomb(&s->gb);
03089         }
03090     }
03091 
03092     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03093         h->delta_poc[0]= get_se_golomb(&s->gb);
03094 
03095         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03096             h->delta_poc[1]= get_se_golomb(&s->gb);
03097     }
03098 
03099     init_poc(h);
03100 
03101     if(h->pps.redundant_pic_cnt_present){
03102         h->redundant_pic_count= get_ue_golomb(&s->gb);
03103     }
03104 
03105     //set defaults, might be overridden a few lines later
03106     h->ref_count[0]= h->pps.ref_count[0];
03107     h->ref_count[1]= h->pps.ref_count[1];
03108 
03109     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
03110         int max_refs = s->picture_structure == PICT_FRAME ? 16 : 32;
03111 
03112         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
03113             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03114         }
03115         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03116 
03117         if(num_ref_idx_active_override_flag){
03118             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03119             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
03120                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03121         }
03122 
03123         if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) {
03124             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03125             h->ref_count[0] = h->ref_count[1] = 1;
03126             return AVERROR_INVALIDDATA;
03127         }
03128 
03129         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
03130             h->list_count= 2;
03131         else
03132             h->list_count= 1;
03133     }else
03134         h->list_count= 0;
03135 
03136     if(!default_ref_list_done){
03137         ff_h264_fill_default_ref_list(h);
03138     }
03139 
03140     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
03141         h->ref_count[1]= h->ref_count[0]= 0;
03142         return -1;
03143     }
03144 
03145     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
03146         s->last_picture_ptr= &h->ref_list[0][0];
03147         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03148     }
03149     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
03150         s->next_picture_ptr= &h->ref_list[1][0];
03151         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03152     }
03153 
03154     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
03155        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
03156         pred_weight_table(h);
03157     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03158         implicit_weight_table(h, -1);
03159     }else {
03160         h->use_weight = 0;
03161         for (i = 0; i < 2; i++) {
03162             h->luma_weight_flag[i]   = 0;
03163             h->chroma_weight_flag[i] = 0;
03164         }
03165     }
03166 
03167     if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
03168        (s->avctx->err_recognition & AV_EF_EXPLODE))
03169         return AVERROR_INVALIDDATA;
03170 
03171     if(FRAME_MBAFF){
03172         ff_h264_fill_mbaff_ref_list(h);
03173 
03174         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03175             implicit_weight_table(h, 0);
03176             implicit_weight_table(h, 1);
03177         }
03178     }
03179 
03180     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
03181         ff_h264_direct_dist_scale_factor(h);
03182     ff_h264_direct_ref_list_init(h);
03183 
03184     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
03185         tmp = get_ue_golomb_31(&s->gb);
03186         if(tmp > 2){
03187             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03188             return -1;
03189         }
03190         h->cabac_init_idc= tmp;
03191     }
03192 
03193     h->last_qscale_diff = 0;
03194     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03195     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
03196         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03197         return -1;
03198     }
03199     s->qscale= tmp;
03200     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03201     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03202     //FIXME qscale / qp ... stuff
03203     if(h->slice_type == AV_PICTURE_TYPE_SP){
03204         get_bits1(&s->gb); /* sp_for_switch_flag */
03205     }
03206     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
03207         get_se_golomb(&s->gb); /* slice_qs_delta */
03208     }
03209 
03210     h->deblocking_filter = 1;
03211     h->slice_alpha_c0_offset = 52;
03212     h->slice_beta_offset = 52;
03213     if( h->pps.deblocking_filter_parameters_present ) {
03214         tmp= get_ue_golomb_31(&s->gb);
03215         if(tmp > 2){
03216             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03217             return -1;
03218         }
03219         h->deblocking_filter= tmp;
03220         if(h->deblocking_filter < 2)
03221             h->deblocking_filter^= 1; // 1<->0
03222 
03223         if( h->deblocking_filter ) {
03224             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03225             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03226             if(   h->slice_alpha_c0_offset > 104U
03227                || h->slice_beta_offset     > 104U){
03228                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03229                 return -1;
03230             }
03231         }
03232     }
03233 
03234     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03235        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03236        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03237        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03238         h->deblocking_filter= 0;
03239 
03240     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03241         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03242             /* Cheat slightly for speed:
03243                Do not bother to deblock across slices. */
03244             h->deblocking_filter = 2;
03245         } else {
03246             h0->max_contexts = 1;
03247             if(!h0->single_decode_warning) {
03248                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03249                 h0->single_decode_warning = 1;
03250             }
03251             if (h != h0) {
03252                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03253                 return 1;
03254             }
03255         }
03256     }
03257     h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
03258                  - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
03259                  + 6 * (h->sps.bit_depth_luma - 8);
03260 
03261 #if 0 //FMO
03262     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03263         slice_group_change_cycle= get_bits(&s->gb, ?);
03264 #endif
03265 
03266     h0->last_slice_type = slice_type;
03267     h->slice_num = ++h0->current_slice;
03268     if(h->slice_num >= MAX_SLICES){
03269         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
03270     }
03271 
03272     for(j=0; j<2; j++){
03273         int id_list[16];
03274         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03275         for(i=0; i<16; i++){
03276             id_list[i]= 60;
03277             if (h->ref_list[j][i].f.data[0]) {
03278                 int k;
03279                 uint8_t *base = h->ref_list[j][i].f.base[0];
03280                 for(k=0; k<h->short_ref_count; k++)
03281                     if (h->short_ref[k]->f.base[0] == base) {
03282                         id_list[i]= k;
03283                         break;
03284                     }
03285                 for(k=0; k<h->long_ref_count; k++)
03286                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
03287                         id_list[i]= h->short_ref_count + k;
03288                         break;
03289                     }
03290             }
03291         }
03292 
03293         ref2frm[0]=
03294         ref2frm[1]= -1;
03295         for(i=0; i<16; i++)
03296             ref2frm[i+2]= 4*id_list[i]
03297                           + (h->ref_list[j][i].f.reference & 3);
03298         ref2frm[18+0]=
03299         ref2frm[18+1]= -1;
03300         for(i=16; i<48; i++)
03301             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03302                           + (h->ref_list[j][i].f.reference & 3);
03303     }
03304 
03305     //FIXME: fix draw_edges+PAFF+frame threads
03306     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03307     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03308 
03309     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03310         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03311                h->slice_num,
03312                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03313                first_mb_in_slice,
03314                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03315                pps_id, h->frame_num,
03316                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03317                h->ref_count[0], h->ref_count[1],
03318                s->qscale,
03319                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03320                h->use_weight,
03321                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03322                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03323                );
03324     }
03325 
03326     return 0;
03327 }
03328 
03329 int ff_h264_get_slice_type(const H264Context *h)
03330 {
03331     switch (h->slice_type) {
03332     case AV_PICTURE_TYPE_P:  return 0;
03333     case AV_PICTURE_TYPE_B:  return 1;
03334     case AV_PICTURE_TYPE_I:  return 2;
03335     case AV_PICTURE_TYPE_SP: return 3;
03336     case AV_PICTURE_TYPE_SI: return 4;
03337     default:         return -1;
03338     }
03339 }
03340 
03341 static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
03342                                                       int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
03343 {
03344     int b_stride = h->b_stride;
03345     int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
03346     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
03347     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03348         if(USES_LIST(top_type, list)){
03349             const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
03350             const int b8_xy= 4*top_xy + 2;
03351             int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03352             AV_COPY128(mv_dst - 1*8, s->current_picture.f.motion_val[list][b_xy + 0]);
03353             ref_cache[0 - 1*8]=
03354             ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
03355             ref_cache[2 - 1*8]=
03356             ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
03357         }else{
03358             AV_ZERO128(mv_dst - 1*8);
03359             AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03360         }
03361 
03362         if(!IS_INTERLACED(mb_type^left_type[LTOP])){
03363             if(USES_LIST(left_type[LTOP], list)){
03364                 const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
03365                 const int b8_xy= 4*left_xy[LTOP] + 1;
03366                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03367                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride*0]);
03368                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride*1]);
03369                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride*2]);
03370                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride*3]);
03371                 ref_cache[-1 +  0]=
03372                 ref_cache[-1 +  8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*0]];
03373                 ref_cache[-1 + 16]=
03374                 ref_cache[-1 + 24]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*1]];
03375             }else{
03376                 AV_ZERO32(mv_dst - 1 + 0);
03377                 AV_ZERO32(mv_dst - 1 + 8);
03378                 AV_ZERO32(mv_dst - 1 +16);
03379                 AV_ZERO32(mv_dst - 1 +24);
03380                 ref_cache[-1 +  0]=
03381                 ref_cache[-1 +  8]=
03382                 ref_cache[-1 + 16]=
03383                 ref_cache[-1 + 24]= LIST_NOT_USED;
03384             }
03385         }
03386     }
03387 
03388     if(!USES_LIST(mb_type, list)){
03389         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
03390         AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03391         AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03392         AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03393         AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03394         return;
03395     }
03396 
03397     {
03398         int8_t *ref = &s->current_picture.f.ref_index[list][4*mb_xy];
03399         int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03400         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
03401         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
03402         AV_WN32A(&ref_cache[0*8], ref01);
03403         AV_WN32A(&ref_cache[1*8], ref01);
03404         AV_WN32A(&ref_cache[2*8], ref23);
03405         AV_WN32A(&ref_cache[3*8], ref23);
03406     }
03407 
03408     {
03409         int16_t (*mv_src)[2] = &s->current_picture.f.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03410         AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
03411         AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
03412         AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
03413         AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
03414     }
03415 }
03416 
03421 static int fill_filter_caches(H264Context *h, int mb_type){
03422     MpegEncContext * const s = &h->s;
03423     const int mb_xy= h->mb_xy;
03424     int top_xy, left_xy[LEFT_MBS];
03425     int top_type, left_type[LEFT_MBS];
03426     uint8_t *nnz;
03427     uint8_t *nnz_cache;
03428 
03429     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03430 
03431     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03432      * stuff, I can't imagine that these complex rules are worth it. */
03433 
03434     left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
03435     if(FRAME_MBAFF){
03436         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
03437         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03438         if(s->mb_y&1){
03439             if (left_mb_field_flag != curr_mb_field_flag) {
03440                 left_xy[LTOP] -= s->mb_stride;
03441             }
03442         }else{
03443             if(curr_mb_field_flag){
03444                 top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
03445             }
03446             if (left_mb_field_flag != curr_mb_field_flag) {
03447                 left_xy[LBOT] += s->mb_stride;
03448             }
03449         }
03450     }
03451 
03452     h->top_mb_xy = top_xy;
03453     h->left_mb_xy[LTOP] = left_xy[LTOP];
03454     h->left_mb_xy[LBOT] = left_xy[LBOT];
03455     {
03456         //for sufficiently low qp, filtering wouldn't do anything
03457         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03458         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03459         int qp = s->current_picture.f.qscale_table[mb_xy];
03460         if(qp <= qp_thresh
03461            && (left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh)
03462            && (top_xy        < 0 || ((qp + s->current_picture.f.qscale_table[top_xy       ] + 1) >> 1) <= qp_thresh)) {
03463             if(!FRAME_MBAFF)
03464                 return 1;
03465             if ((left_xy[LTOP] < 0            || ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]        ] + 1) >> 1) <= qp_thresh) &&
03466                 (top_xy        < s->mb_stride || ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
03467                 return 1;
03468         }
03469     }
03470 
03471     top_type        = s->current_picture.f.mb_type[top_xy];
03472     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
03473     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
03474     if(h->deblocking_filter == 2){
03475         if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
03476         if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
03477     }else{
03478         if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
03479         if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
03480     }
03481     h->top_type       = top_type;
03482     h->left_type[LTOP]= left_type[LTOP];
03483     h->left_type[LBOT]= left_type[LBOT];
03484 
03485     if(IS_INTRA(mb_type))
03486         return 0;
03487 
03488     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
03489     if(h->list_count == 2)
03490         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
03491 
03492     nnz = h->non_zero_count[mb_xy];
03493     nnz_cache = h->non_zero_count_cache;
03494     AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
03495     AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
03496     AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
03497     AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
03498     h->cbp= h->cbp_table[mb_xy];
03499 
03500     if(top_type){
03501         nnz = h->non_zero_count[top_xy];
03502         AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
03503     }
03504 
03505     if(left_type[LTOP]){
03506         nnz = h->non_zero_count[left_xy[LTOP]];
03507         nnz_cache[3+8*1]= nnz[3+0*4];
03508         nnz_cache[3+8*2]= nnz[3+1*4];
03509         nnz_cache[3+8*3]= nnz[3+2*4];
03510         nnz_cache[3+8*4]= nnz[3+3*4];
03511     }
03512 
03513     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03514     if(!CABAC && h->pps.transform_8x8_mode){
03515         if(IS_8x8DCT(top_type)){
03516             nnz_cache[4+8*0]=
03517             nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03518             nnz_cache[6+8*0]=
03519             nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03520         }
03521         if(IS_8x8DCT(left_type[LTOP])){
03522             nnz_cache[3+8*1]=
03523             nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
03524         }
03525         if(IS_8x8DCT(left_type[LBOT])){
03526             nnz_cache[3+8*3]=
03527             nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
03528         }
03529 
03530         if(IS_8x8DCT(mb_type)){
03531             nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
03532             nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03533 
03534             nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
03535             nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03536 
03537             nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
03538             nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03539 
03540             nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
03541             nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03542         }
03543     }
03544 
03545     return 0;
03546 }
03547 
03548 static void loop_filter(H264Context *h, int start_x, int end_x){
03549     MpegEncContext * const s = &h->s;
03550     uint8_t  *dest_y, *dest_cb, *dest_cr;
03551     int linesize, uvlinesize, mb_x, mb_y;
03552     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03553     const int old_slice_type= h->slice_type;
03554     const int pixel_shift = h->pixel_shift;
03555     const int block_h = 16 >> s->chroma_y_shift;
03556 
03557     if(h->deblocking_filter) {
03558         for(mb_x= start_x; mb_x<end_x; mb_x++){
03559             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03560                 int mb_xy, mb_type;
03561                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03562                 h->slice_num= h->slice_table[mb_xy];
03563                 mb_type = s->current_picture.f.mb_type[mb_xy];
03564                 h->list_count= h->list_counts[mb_xy];
03565 
03566                 if(FRAME_MBAFF)
03567                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03568 
03569                 s->mb_x= mb_x;
03570                 s->mb_y= mb_y;
03571                 dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03572                 dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03573                 dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03574                     //FIXME simplify above
03575 
03576                 if (MB_FIELD) {
03577                     linesize   = h->mb_linesize   = s->linesize * 2;
03578                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03579                     if(mb_y&1){ //FIXME move out of this function?
03580                         dest_y -= s->linesize*15;
03581                         dest_cb-= s->uvlinesize * (block_h - 1);
03582                         dest_cr-= s->uvlinesize * (block_h - 1);
03583                     }
03584                 } else {
03585                     linesize   = h->mb_linesize   = s->linesize;
03586                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03587                 }
03588                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
03589                 if(fill_filter_caches(h, mb_type))
03590                     continue;
03591                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
03592                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
03593 
03594                 if (FRAME_MBAFF) {
03595                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03596                 } else {
03597                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03598                 }
03599             }
03600         }
03601     }
03602     h->slice_type= old_slice_type;
03603     s->mb_x= end_x;
03604     s->mb_y= end_mb_y - FRAME_MBAFF;
03605     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03606     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03607 }
03608 
03609 static void predict_field_decoding_flag(H264Context *h){
03610     MpegEncContext * const s = &h->s;
03611     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03612     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03613                 ? s->current_picture.f.mb_type[mb_xy - 1]
03614                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03615                 ? s->current_picture.f.mb_type[mb_xy - s->mb_stride]
03616                 : 0;
03617     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03618 }
03619 
03623 static void decode_finish_row(H264Context *h){
03624     MpegEncContext * const s = &h->s;
03625     int top = 16*(s->mb_y >> FIELD_PICTURE);
03626     int height = 16 << FRAME_MBAFF;
03627     int deblock_border = (16 + 4) << FRAME_MBAFF;
03628     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03629 
03630     if (h->deblocking_filter) {
03631         if((top + height) >= pic_height)
03632             height += deblock_border;
03633 
03634         top -= deblock_border;
03635     }
03636 
03637     if (top >= pic_height || (top + height) < h->emu_edge_height)
03638         return;
03639 
03640     height = FFMIN(height, pic_height - top);
03641     if (top < h->emu_edge_height) {
03642         height = top+height;
03643         top = 0;
03644     }
03645 
03646     ff_draw_horiz_band(s, top, height);
03647 
03648     if (s->dropable) return;
03649 
03650     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03651                              s->picture_structure==PICT_BOTTOM_FIELD);
03652 }
03653 
03654 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03655     H264Context *h = *(void**)arg;
03656     MpegEncContext * const s = &h->s;
03657     const int part_mask= s->partitioned_frame ? (ER_AC_END|ER_AC_ERROR) : 0x7F;
03658     int lf_x_start = s->mb_x;
03659 
03660     s->mb_skip_run= -1;
03661 
03662     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03663                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03664 
03665     if( h->pps.cabac ) {
03666         /* realign */
03667         align_get_bits( &s->gb );
03668 
03669         /* init cabac */
03670         ff_init_cabac_states( &h->cabac);
03671         ff_init_cabac_decoder( &h->cabac,
03672                                s->gb.buffer + get_bits_count(&s->gb)/8,
03673                                (get_bits_left(&s->gb) + 7)/8);
03674 
03675         ff_h264_init_cabac_states(h);
03676 
03677         for(;;){
03678 //START_TIMER
03679             int ret = ff_h264_decode_mb_cabac(h);
03680             int eos;
03681 //STOP_TIMER("decode_mb_cabac")
03682 
03683             if(ret>=0) ff_h264_hl_decode_mb(h);
03684 
03685             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03686                 s->mb_y++;
03687 
03688                 ret = ff_h264_decode_mb_cabac(h);
03689 
03690                 if(ret>=0) ff_h264_hl_decode_mb(h);
03691                 s->mb_y--;
03692             }
03693             eos = get_cabac_terminate( &h->cabac );
03694 
03695             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03696                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03697                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03698                 return 0;
03699             }
03700             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03701                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03702                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03703                 return -1;
03704             }
03705 
03706             if( ++s->mb_x >= s->mb_width ) {
03707                 loop_filter(h, lf_x_start, s->mb_x);
03708                 s->mb_x = lf_x_start = 0;
03709                 decode_finish_row(h);
03710                 ++s->mb_y;
03711                 if(FIELD_OR_MBAFF_PICTURE) {
03712                     ++s->mb_y;
03713                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03714                         predict_field_decoding_flag(h);
03715                 }
03716             }
03717 
03718             if( eos || s->mb_y >= s->mb_height ) {
03719                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03720                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03721                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03722                 return 0;
03723             }
03724         }
03725 
03726     } else {
03727         for(;;){
03728             int ret = ff_h264_decode_mb_cavlc(h);
03729 
03730             if(ret>=0) ff_h264_hl_decode_mb(h);
03731 
03732             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03733                 s->mb_y++;
03734                 ret = ff_h264_decode_mb_cavlc(h);
03735 
03736                 if(ret>=0) ff_h264_hl_decode_mb(h);
03737                 s->mb_y--;
03738             }
03739 
03740             if(ret<0){
03741                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03742                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03743                 return -1;
03744             }
03745 
03746             if(++s->mb_x >= s->mb_width){
03747                 loop_filter(h, lf_x_start, s->mb_x);
03748                 s->mb_x = lf_x_start = 0;
03749                 decode_finish_row(h);
03750                 ++s->mb_y;
03751                 if(FIELD_OR_MBAFF_PICTURE) {
03752                     ++s->mb_y;
03753                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03754                         predict_field_decoding_flag(h);
03755                 }
03756                 if(s->mb_y >= s->mb_height){
03757                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03758 
03759                     if (get_bits_left(&s->gb) == 0) {
03760                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03761 
03762                         return 0;
03763                     }else{
03764                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_END&part_mask);
03765 
03766                         return -1;
03767                     }
03768                 }
03769             }
03770 
03771             if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0){
03772                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03773                 if (get_bits_left(&s->gb) == 0) {
03774                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03775                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03776 
03777                     return 0;
03778                 }else{
03779                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03780 
03781                     return -1;
03782                 }
03783             }
03784         }
03785     }
03786 }
03787 
03794 static int execute_decode_slices(H264Context *h, int context_count){
03795     MpegEncContext * const s = &h->s;
03796     AVCodecContext * const avctx= s->avctx;
03797     H264Context *hx;
03798     int i;
03799 
03800     if (s->avctx->hwaccel || s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03801         return 0;
03802     if(context_count == 1) {
03803         return decode_slice(avctx, &h);
03804     } else {
03805         for(i = 1; i < context_count; i++) {
03806             hx = h->thread_context[i];
03807             hx->s.err_recognition = avctx->err_recognition;
03808             hx->s.error_count = 0;
03809         }
03810 
03811         avctx->execute(avctx, decode_slice,
03812                        h->thread_context, NULL, context_count, sizeof(void*));
03813 
03814         /* pull back stuff from slices to master context */
03815         hx = h->thread_context[context_count - 1];
03816         s->mb_x = hx->s.mb_x;
03817         s->mb_y = hx->s.mb_y;
03818         s->dropable = hx->s.dropable;
03819         s->picture_structure = hx->s.picture_structure;
03820         for(i = 1; i < context_count; i++)
03821             h->s.error_count += h->thread_context[i]->s.error_count;
03822     }
03823 
03824     return 0;
03825 }
03826 
03827 
03828 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03829     MpegEncContext * const s = &h->s;
03830     AVCodecContext * const avctx= s->avctx;
03831     H264Context *hx; 
03832     int buf_index;
03833     int context_count;
03834     int next_avc;
03835     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03836     int nals_needed=0; 
03837     int nal_index;
03838 
03839     h->max_contexts = s->slice_context_count;
03840     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03841         h->current_slice = 0;
03842         if (!s->first_field)
03843             s->current_picture_ptr= NULL;
03844         ff_h264_reset_sei(h);
03845     }
03846 
03847     for(;pass <= 1;pass++){
03848         buf_index = 0;
03849         context_count = 0;
03850         next_avc = h->is_avc ? 0 : buf_size;
03851         nal_index = 0;
03852     for(;;){
03853         int consumed;
03854         int dst_length;
03855         int bit_length;
03856         const uint8_t *ptr;
03857         int i, nalsize = 0;
03858         int err;
03859 
03860         if(buf_index >= next_avc) {
03861             if (buf_index >= buf_size - h->nal_length_size) break;
03862             nalsize = 0;
03863             for(i = 0; i < h->nal_length_size; i++)
03864                 nalsize = (nalsize << 8) | buf[buf_index++];
03865             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03866                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03867                 break;
03868             }
03869             next_avc= buf_index + nalsize;
03870         } else {
03871             // start code prefix search
03872             for(; buf_index + 3 < next_avc; buf_index++){
03873                 // This should always succeed in the first iteration.
03874                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03875                     break;
03876             }
03877 
03878 
03879             if (buf_index + 3 >= buf_size) {
03880                 buf_index = buf_size;
03881                 break;
03882             }
03883 
03884             buf_index+=3;
03885             if(buf_index >= next_avc) continue;
03886         }
03887 
03888         hx = h->thread_context[context_count];
03889 
03890         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03891         if (ptr == NULL || dst_length < 0) {
03892             buf_index = -1;
03893             goto end;
03894         }
03895         i= buf_index + consumed;
03896         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03897            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03898             s->workaround_bugs |= FF_BUG_TRUNCATED;
03899 
03900         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03901         while(ptr[dst_length - 1] == 0 && dst_length > 0)
03902             dst_length--;
03903         }
03904         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03905 
03906         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03907             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
03908         }
03909 
03910         if (h->is_avc && (nalsize != consumed) && nalsize){
03911             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03912         }
03913 
03914         buf_index += consumed;
03915         nal_index++;
03916 
03917         if(pass == 0) {
03918             // packets can sometimes contain multiple PPS/SPS
03919             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03920             // if so, when frame threading we can't start the next thread until we've read all of them
03921             switch (hx->nal_unit_type) {
03922                 case NAL_SPS:
03923                 case NAL_PPS:
03924                     nals_needed = nal_index;
03925                     break;
03926                 case NAL_IDR_SLICE:
03927                 case NAL_SLICE:
03928                     init_get_bits(&hx->s.gb, ptr, bit_length);
03929                     if (!get_ue_golomb(&hx->s.gb))
03930                         nals_needed = nal_index;
03931             }
03932             continue;
03933         }
03934 
03935         //FIXME do not discard SEI id
03936         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03937             continue;
03938 
03939       again:
03940         err = 0;
03941         switch(hx->nal_unit_type){
03942         case NAL_IDR_SLICE:
03943             if (h->nal_unit_type != NAL_IDR_SLICE) {
03944                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03945                 buf_index = -1;
03946                 goto end;
03947             }
03948             idr(h); // FIXME ensure we don't lose some frames if there is reordering
03949         case NAL_SLICE:
03950             init_get_bits(&hx->s.gb, ptr, bit_length);
03951             hx->intra_gb_ptr=
03952             hx->inter_gb_ptr= &hx->s.gb;
03953             hx->s.data_partitioning = 0;
03954 
03955             if((err = decode_slice_header(hx, h)))
03956                break;
03957 
03958             s->current_picture_ptr->f.key_frame |=
03959                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03960                     (h->sei_recovery_frame_cnt >= 0);
03961 
03962             if (h->current_slice == 1) {
03963                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03964                     decode_postinit(h, nal_index >= nals_needed);
03965                 }
03966 
03967                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03968                     return -1;
03969                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03970                     ff_vdpau_h264_picture_start(s);
03971             }
03972 
03973             if(hx->redundant_pic_count==0
03974                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03975                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03976                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03977                && avctx->skip_frame < AVDISCARD_ALL){
03978                 if(avctx->hwaccel) {
03979                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03980                         return -1;
03981                 }else
03982                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03983                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03984                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03985                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03986                 }else
03987                     context_count++;
03988             }
03989             break;
03990         case NAL_DPA:
03991             init_get_bits(&hx->s.gb, ptr, bit_length);
03992             hx->intra_gb_ptr=
03993             hx->inter_gb_ptr= NULL;
03994 
03995             if ((err = decode_slice_header(hx, h)) < 0)
03996                 break;
03997 
03998             hx->s.data_partitioning = 1;
03999 
04000             break;
04001         case NAL_DPB:
04002             init_get_bits(&hx->intra_gb, ptr, bit_length);
04003             hx->intra_gb_ptr= &hx->intra_gb;
04004             break;
04005         case NAL_DPC:
04006             init_get_bits(&hx->inter_gb, ptr, bit_length);
04007             hx->inter_gb_ptr= &hx->inter_gb;
04008 
04009             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
04010                && s->context_initialized
04011                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
04012                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
04013                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
04014                && avctx->skip_frame < AVDISCARD_ALL)
04015                 context_count++;
04016             break;
04017         case NAL_SEI:
04018             init_get_bits(&s->gb, ptr, bit_length);
04019             ff_h264_decode_sei(h);
04020             break;
04021         case NAL_SPS:
04022             init_get_bits(&s->gb, ptr, bit_length);
04023             if (ff_h264_decode_seq_parameter_set(h) < 0 &&
04024                 h->is_avc && (nalsize != consumed) && nalsize) {
04025                 av_log(h->s.avctx, AV_LOG_DEBUG, "SPS decoding failure, "
04026                        "try parsing the coomplete NAL\n");
04027                 init_get_bits(&s->gb, buf + buf_index + 1 - consumed,
04028                               8 * (nalsize - 1));
04029                 ff_h264_decode_seq_parameter_set(h);
04030             }
04031 
04032             if (s->flags& CODEC_FLAG_LOW_DELAY ||
04033                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
04034                 s->low_delay=1;
04035 
04036             if(avctx->has_b_frames < 2)
04037                 avctx->has_b_frames= !s->low_delay;
04038 
04039             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
04040                 h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
04041                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
04042                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
04043                     h->cur_chroma_format_idc = h->sps.chroma_format_idc;
04044                     h->pixel_shift = h->sps.bit_depth_luma > 8;
04045 
04046                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04047                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04048                     s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
04049                     dsputil_init(&s->dsp, s->avctx);
04050                 } else {
04051                     av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
04052                     buf_index = -1;
04053                     goto end;
04054                 }
04055             }
04056             break;
04057         case NAL_PPS:
04058             init_get_bits(&s->gb, ptr, bit_length);
04059 
04060             ff_h264_decode_picture_parameter_set(h, bit_length);
04061 
04062             break;
04063         case NAL_AUD:
04064         case NAL_END_SEQUENCE:
04065         case NAL_END_STREAM:
04066         case NAL_FILLER_DATA:
04067         case NAL_SPS_EXT:
04068         case NAL_AUXILIARY_SLICE:
04069             break;
04070         default:
04071             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
04072         }
04073 
04074         if(context_count == h->max_contexts) {
04075             execute_decode_slices(h, context_count);
04076             context_count = 0;
04077         }
04078 
04079         if (err < 0)
04080             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
04081         else if(err == 1) {
04082             /* Slice could not be decoded in parallel mode, copy down
04083              * NAL unit stuff to context 0 and restart. Note that
04084              * rbsp_buffer is not transferred, but since we no longer
04085              * run in parallel mode this should not be an issue. */
04086             h->nal_unit_type = hx->nal_unit_type;
04087             h->nal_ref_idc   = hx->nal_ref_idc;
04088             hx = h;
04089             goto again;
04090         }
04091     }
04092     }
04093     if(context_count)
04094         execute_decode_slices(h, context_count);
04095 
04096 end:
04097     /* clean up */
04098     if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
04099         !s->dropable) {
04100         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
04101                                   s->picture_structure == PICT_BOTTOM_FIELD);
04102     }
04103 
04104     return buf_index;
04105 }
04106 
04110 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
04111         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
04112         if(pos+10>buf_size) pos=buf_size; // oops ;)
04113 
04114         return pos;
04115 }
04116 
04117 static int decode_frame(AVCodecContext *avctx,
04118                              void *data, int *data_size,
04119                              AVPacket *avpkt)
04120 {
04121     const uint8_t *buf = avpkt->data;
04122     int buf_size = avpkt->size;
04123     H264Context *h = avctx->priv_data;
04124     MpegEncContext *s = &h->s;
04125     AVFrame *pict = data;
04126     int buf_index = 0;
04127 
04128     s->flags= avctx->flags;
04129     s->flags2= avctx->flags2;
04130 
04131    /* end of stream, output what is still in the buffers */
04132  out:
04133     if (buf_size == 0) {
04134         Picture *out;
04135         int i, out_idx;
04136 
04137         s->current_picture_ptr = NULL;
04138 
04139 //FIXME factorize this with the output code below
04140         out = h->delayed_pic[0];
04141         out_idx = 0;
04142         for (i = 1; h->delayed_pic[i] && !h->delayed_pic[i]->f.key_frame && !h->delayed_pic[i]->mmco_reset; i++)
04143             if(h->delayed_pic[i]->poc < out->poc){
04144                 out = h->delayed_pic[i];
04145                 out_idx = i;
04146             }
04147 
04148         for(i=out_idx; h->delayed_pic[i]; i++)
04149             h->delayed_pic[i] = h->delayed_pic[i+1];
04150 
04151         if(out){
04152             *data_size = sizeof(AVFrame);
04153             *pict= *(AVFrame*)out;
04154         }
04155 
04156         return buf_index;
04157     }
04158 
04159     buf_index=decode_nal_units(h, buf, buf_size);
04160     if(buf_index < 0)
04161         return -1;
04162 
04163     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
04164         buf_size = 0;
04165         goto out;
04166     }
04167 
04168     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
04169         if (avctx->skip_frame >= AVDISCARD_NONREF)
04170             return 0;
04171         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
04172         return -1;
04173     }
04174 
04175     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
04176 
04177         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
04178 
04179         field_end(h, 0);
04180 
04181         if (!h->next_output_pic) {
04182             /* Wait for second field. */
04183             *data_size = 0;
04184 
04185         } else {
04186             *data_size = sizeof(AVFrame);
04187             *pict = *(AVFrame*)h->next_output_pic;
04188         }
04189     }
04190 
04191     assert(pict->data[0] || !*data_size);
04192     ff_print_debug_info(s, pict);
04193 //printf("out %d\n", (int)pict->data[0]);
04194 
04195     return get_consumed_bytes(s, buf_index, buf_size);
04196 }
04197 #if 0
04198 static inline void fill_mb_avail(H264Context *h){
04199     MpegEncContext * const s = &h->s;
04200     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04201 
04202     if(s->mb_y){
04203         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04204         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04205         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04206     }else{
04207         h->mb_avail[0]=
04208         h->mb_avail[1]=
04209         h->mb_avail[2]= 0;
04210     }
04211     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04212     h->mb_avail[4]= 1; //FIXME move out
04213     h->mb_avail[5]= 0; //FIXME move out
04214 }
04215 #endif
04216 
04217 #ifdef TEST
04218 #undef printf
04219 #undef random
04220 #define COUNT 8000
04221 #define SIZE (COUNT*40)
04222 int main(void){
04223     int i;
04224     uint8_t temp[SIZE];
04225     PutBitContext pb;
04226     GetBitContext gb;
04227     DSPContext dsp;
04228     AVCodecContext avctx;
04229 
04230     avctx.av_class = avcodec_get_class();
04231     dsputil_init(&dsp, &avctx);
04232 
04233     init_put_bits(&pb, temp, SIZE);
04234     printf("testing unsigned exp golomb\n");
04235     for(i=0; i<COUNT; i++){
04236         START_TIMER
04237         set_ue_golomb(&pb, i);
04238         STOP_TIMER("set_ue_golomb");
04239     }
04240     flush_put_bits(&pb);
04241 
04242     init_get_bits(&gb, temp, 8*SIZE);
04243     for(i=0; i<COUNT; i++){
04244         int j, s = show_bits(&gb, 24);
04245 
04246         START_TIMER
04247         j= get_ue_golomb(&gb);
04248         if(j != i){
04249             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04250 //            return -1;
04251         }
04252         STOP_TIMER("get_ue_golomb");
04253     }
04254 
04255 
04256     init_put_bits(&pb, temp, SIZE);
04257     printf("testing signed exp golomb\n");
04258     for(i=0; i<COUNT; i++){
04259         START_TIMER
04260         set_se_golomb(&pb, i - COUNT/2);
04261         STOP_TIMER("set_se_golomb");
04262     }
04263     flush_put_bits(&pb);
04264 
04265     init_get_bits(&gb, temp, 8*SIZE);
04266     for(i=0; i<COUNT; i++){
04267         int j, s = show_bits(&gb, 24);
04268 
04269         START_TIMER
04270         j= get_se_golomb(&gb);
04271         if(j != i - COUNT/2){
04272             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04273 //            return -1;
04274         }
04275         STOP_TIMER("get_se_golomb");
04276     }
04277 
04278     printf("Testing RBSP\n");
04279 
04280 
04281     return 0;
04282 }
04283 #endif /* TEST */
04284 
04285 
04286 av_cold void ff_h264_free_context(H264Context *h)
04287 {
04288     int i;
04289 
04290     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04291 
04292     for(i = 0; i < MAX_SPS_COUNT; i++)
04293         av_freep(h->sps_buffers + i);
04294 
04295     for(i = 0; i < MAX_PPS_COUNT; i++)
04296         av_freep(h->pps_buffers + i);
04297 }
04298 
04299 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04300 {
04301     H264Context *h = avctx->priv_data;
04302     MpegEncContext *s = &h->s;
04303 
04304     ff_h264_free_context(h);
04305 
04306     MPV_common_end(s);
04307 
04308 //    memset(h, 0, sizeof(H264Context));
04309 
04310     return 0;
04311 }
04312 
04313 static const AVProfile profiles[] = {
04314     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04315     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04316     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04317     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04318     { FF_PROFILE_H264_HIGH,                 "High"                  },
04319     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04320     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04321     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04322     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04323     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04324     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04325     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04326     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04327     { FF_PROFILE_UNKNOWN },
04328 };
04329 
04330 AVCodec ff_h264_decoder = {
04331     .name           = "h264",
04332     .type           = AVMEDIA_TYPE_VIDEO,
04333     .id             = CODEC_ID_H264,
04334     .priv_data_size = sizeof(H264Context),
04335     .init           = ff_h264_decode_init,
04336     .close          = ff_h264_decode_end,
04337     .decode         = decode_frame,
04338     .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04339                       CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04340     .flush= flush_dpb,
04341     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04342     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04343     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04344     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04345 };
04346 
04347 #if CONFIG_H264_VDPAU_DECODER
04348 AVCodec ff_h264_vdpau_decoder = {
04349     .name           = "h264_vdpau",
04350     .type           = AVMEDIA_TYPE_VIDEO,
04351     .id             = CODEC_ID_H264,
04352     .priv_data_size = sizeof(H264Context),
04353     .init           = ff_h264_decode_init,
04354     .close          = ff_h264_decode_end,
04355     .decode         = decode_frame,
04356     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04357     .flush= flush_dpb,
04358     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04359     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04360     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04361 };
04362 #endif