libavcodec/h264.c
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "cabac.h"
00031 #include "cabac_functions.h"
00032 #include "dsputil.h"
00033 #include "avcodec.h"
00034 #include "mpegvideo.h"
00035 #include "h264.h"
00036 #include "h264data.h"
00037 #include "h264_mvpred.h"
00038 #include "golomb.h"
00039 #include "mathops.h"
00040 #include "rectangle.h"
00041 #include "thread.h"
00042 #include "vdpau_internal.h"
00043 #include "libavutil/avassert.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_VDA_VLD,
00060     PIX_FMT_YUVJ420P,
00061     PIX_FMT_NONE
00062 };
00063 
00068 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00069     MpegEncContext * const s = &h->s;
00070     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00071     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00072     int i;
00073 
00074     if(!(h->top_samples_available&0x8000)){
00075         for(i=0; i<4; i++){
00076             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00077             if(status<0){
00078                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00079                 return -1;
00080             } else if(status){
00081                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00082             }
00083         }
00084     }
00085 
00086     if((h->left_samples_available&0x8888)!=0x8888){
00087         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00088         for(i=0; i<4; i++){
00089             if(!(h->left_samples_available&mask[i])){
00090                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00091                 if(status<0){
00092                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00093                     return -1;
00094                 } else if(status){
00095                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00096                 }
00097             }
00098         }
00099     }
00100 
00101     return 0;
00102 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00103 
00108 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00109     MpegEncContext * const s = &h->s;
00110     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00111     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00112 
00113     if(mode > 6U) {
00114         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00115         return -1;
00116     }
00117 
00118     if(!(h->top_samples_available&0x8000)){
00119         mode= top[ mode ];
00120         if(mode<0){
00121             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00122             return -1;
00123         }
00124     }
00125 
00126     if((h->left_samples_available&0x8080) != 0x8080){
00127         mode= left[ mode ];
00128         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00129             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00130         }
00131         if(mode<0){
00132             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00133             return -1;
00134         }
00135     }
00136 
00137     return mode;
00138 }
00139 
00140 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00141     int i, si, di;
00142     uint8_t *dst;
00143     int bufidx;
00144 
00145 //    src[0]&0x80;                //forbidden bit
00146     h->nal_ref_idc= src[0]>>5;
00147     h->nal_unit_type= src[0]&0x1F;
00148 
00149     src++; length--;
00150 
00151 #if HAVE_FAST_UNALIGNED
00152 # if HAVE_FAST_64BIT
00153 #   define RS 7
00154     for(i=0; i+1<length; i+=9){
00155         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00156 # else
00157 #   define RS 3
00158     for(i=0; i+1<length; i+=5){
00159         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00160 # endif
00161             continue;
00162         if(i>0 && !src[i]) i--;
00163         while(src[i]) i++;
00164 #else
00165 #   define RS 0
00166     for(i=0; i+1<length; i+=2){
00167         if(src[i]) continue;
00168         if(i>0 && src[i-1]==0) i--;
00169 #endif
00170         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00171             if(src[i+2]!=3){
00172                 /* startcode, so we must be past the end */
00173                 length=i;
00174             }
00175             break;
00176         }
00177         i-= RS;
00178     }
00179 
00180     if(i>=length-1){ //no escaped 0
00181         *dst_length= length;
00182         *consumed= length+1; //+1 for the header
00183         return src;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00188     dst= h->rbsp_buffer[bufidx];
00189 
00190     if (dst == NULL){
00191         return NULL;
00192     }
00193 
00194 //printf("decoding esc\n");
00195     memcpy(dst, src, i);
00196     si=di=i;
00197     while(si+2<length){
00198         //remove escapes (very rare 1:2^22)
00199         if(src[si+2]>3){
00200             dst[di++]= src[si++];
00201             dst[di++]= src[si++];
00202         }else if(src[si]==0 && src[si+1]==0){
00203             if(src[si+2]==3){ //escape
00204                 dst[di++]= 0;
00205                 dst[di++]= 0;
00206                 si+=3;
00207                 continue;
00208             }else //next start code
00209                 goto nsc;
00210         }
00211 
00212         dst[di++]= src[si++];
00213     }
00214     while(si<length)
00215         dst[di++]= src[si++];
00216 nsc:
00217 
00218     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00219 
00220     *dst_length= di;
00221     *consumed= si + 1;//+1 for the header
00222 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00223     return dst;
00224 }
00225 
00230 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00231     int v= *src;
00232     int r;
00233 
00234     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00235 
00236     for(r=1; r<9; r++){
00237         if(v&1) return r;
00238         v>>=1;
00239     }
00240     return 0;
00241 }
00242 
00243 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00244                                  int y_offset, int list){
00245     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00246     int filter_height= (raw_my&3) ? 2 : 0;
00247     int full_my= (raw_my>>2) + y_offset;
00248     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00249 
00250     return FFMAX(abs(top), bottom);
00251 }
00252 
00253 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00254                                int y_offset, int list0, int list1, int *nrefs){
00255     MpegEncContext * const s = &h->s;
00256     int my;
00257 
00258     y_offset += 16*(s->mb_y >> MB_FIELD);
00259 
00260     if(list0){
00261         int ref_n = h->ref_cache[0][ scan8[n] ];
00262         Picture *ref= &h->ref_list[0][ref_n];
00263 
00264         // Error resilience puts the current picture in the ref list.
00265         // Don't try to wait on these as it will cause a deadlock.
00266         // Fields can wait on each other, though.
00267         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00268            (ref->f.reference & 3) != s->picture_structure) {
00269             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00270             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00271             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00272         }
00273     }
00274 
00275     if(list1){
00276         int ref_n = h->ref_cache[1][ scan8[n] ];
00277         Picture *ref= &h->ref_list[1][ref_n];
00278 
00279         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00280            (ref->f.reference & 3) != s->picture_structure) {
00281             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00282             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00283             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00284         }
00285     }
00286 }
00287 
00293 static void await_references(H264Context *h){
00294     MpegEncContext * const s = &h->s;
00295     const int mb_xy= h->mb_xy;
00296     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00297     int refs[2][48];
00298     int nrefs[2] = {0};
00299     int ref, list;
00300 
00301     memset(refs, -1, sizeof(refs));
00302 
00303     if(IS_16X16(mb_type)){
00304         get_lowest_part_y(h, refs, 0, 16, 0,
00305                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00306     }else if(IS_16X8(mb_type)){
00307         get_lowest_part_y(h, refs, 0, 8, 0,
00308                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00309         get_lowest_part_y(h, refs, 8, 8, 8,
00310                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00311     }else if(IS_8X16(mb_type)){
00312         get_lowest_part_y(h, refs, 0, 16, 0,
00313                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00314         get_lowest_part_y(h, refs, 4, 16, 0,
00315                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00316     }else{
00317         int i;
00318 
00319         assert(IS_8X8(mb_type));
00320 
00321         for(i=0; i<4; i++){
00322             const int sub_mb_type= h->sub_mb_type[i];
00323             const int n= 4*i;
00324             int y_offset= (i&2)<<2;
00325 
00326             if(IS_SUB_8X8(sub_mb_type)){
00327                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00328                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00329             }else if(IS_SUB_8X4(sub_mb_type)){
00330                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00331                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00332                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00333                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00334             }else if(IS_SUB_4X8(sub_mb_type)){
00335                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00336                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00337                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00338                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00339             }else{
00340                 int j;
00341                 assert(IS_SUB_4X4(sub_mb_type));
00342                 for(j=0; j<4; j++){
00343                     int sub_y_offset= y_offset + 2*(j&2);
00344                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00345                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 }
00347             }
00348         }
00349     }
00350 
00351     for(list=h->list_count-1; list>=0; list--){
00352         for(ref=0; ref<48 && nrefs[list]; ref++){
00353             int row = refs[list][ref];
00354             if(row >= 0){
00355                 Picture *ref_pic = &h->ref_list[list][ref];
00356                 int ref_field = ref_pic->f.reference - 1;
00357                 int ref_field_picture = ref_pic->field_picture;
00358                 int pic_height = 16*s->mb_height >> ref_field_picture;
00359 
00360                 row <<= MB_MBAFF;
00361                 nrefs[list]--;
00362 
00363                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00364                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00365                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00366                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00367                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00368                 }else if(FIELD_PICTURE){
00369                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00370                 }else{
00371                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00372                 }
00373             }
00374         }
00375     }
00376 }
00377 
00378 #if 0
00379 
00383 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00384 //    const int qmul= dequant_coeff[qp][0];
00385     int i;
00386     int temp[16]; //FIXME check if this is a good idea
00387     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00388     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00389 
00390     for(i=0; i<4; i++){
00391         const int offset= y_offset[i];
00392         const int z0= block[offset+stride*0] + block[offset+stride*4];
00393         const int z1= block[offset+stride*0] - block[offset+stride*4];
00394         const int z2= block[offset+stride*1] - block[offset+stride*5];
00395         const int z3= block[offset+stride*1] + block[offset+stride*5];
00396 
00397         temp[4*i+0]= z0+z3;
00398         temp[4*i+1]= z1+z2;
00399         temp[4*i+2]= z1-z2;
00400         temp[4*i+3]= z0-z3;
00401     }
00402 
00403     for(i=0; i<4; i++){
00404         const int offset= x_offset[i];
00405         const int z0= temp[4*0+i] + temp[4*2+i];
00406         const int z1= temp[4*0+i] - temp[4*2+i];
00407         const int z2= temp[4*1+i] - temp[4*3+i];
00408         const int z3= temp[4*1+i] + temp[4*3+i];
00409 
00410         block[stride*0 +offset]= (z0 + z3)>>1;
00411         block[stride*2 +offset]= (z1 + z2)>>1;
00412         block[stride*8 +offset]= (z1 - z2)>>1;
00413         block[stride*10+offset]= (z0 - z3)>>1;
00414     }
00415 }
00416 #endif
00417 
00418 #undef xStride
00419 #undef stride
00420 
00421 #if 0
00422 static void chroma_dc_dct_c(DCTELEM *block){
00423     const int stride= 16*2;
00424     const int xStride= 16;
00425     int a,b,c,d,e;
00426 
00427     a= block[stride*0 + xStride*0];
00428     b= block[stride*0 + xStride*1];
00429     c= block[stride*1 + xStride*0];
00430     d= block[stride*1 + xStride*1];
00431 
00432     e= a-b;
00433     a= a+b;
00434     b= c-d;
00435     c= c+d;
00436 
00437     block[stride*0 + xStride*0]= (a+c);
00438     block[stride*0 + xStride*1]= (e+b);
00439     block[stride*1 + xStride*0]= (a-c);
00440     block[stride*1 + xStride*1]= (e-b);
00441 }
00442 #endif
00443 
00444 static av_always_inline void
00445 mc_dir_part(H264Context *h, Picture *pic, int n, int square,
00446             int height, int delta, int list,
00447             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00448             int src_x_offset, int src_y_offset,
00449             qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00450             int pixel_shift, int chroma_idc)
00451 {
00452     MpegEncContext * const s = &h->s;
00453     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00454     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00455     const int luma_xy= (mx&3) + ((my&3)<<2);
00456     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00457     uint8_t * src_y = pic->f.data[0] + offset;
00458     uint8_t * src_cb, * src_cr;
00459     int extra_width= h->emu_edge_width;
00460     int extra_height= h->emu_edge_height;
00461     int emu=0;
00462     const int full_mx= mx>>2;
00463     const int full_my= my>>2;
00464     const int pic_width  = 16*s->mb_width;
00465     const int pic_height = 16*s->mb_height >> MB_FIELD;
00466     int ysh;
00467 
00468     if(mx&7) extra_width -= 3;
00469     if(my&7) extra_height -= 3;
00470 
00471     if(   full_mx < 0-extra_width
00472        || full_my < 0-extra_height
00473        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00474        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00475         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00476                                 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00477             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00478         emu=1;
00479     }
00480 
00481     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00482     if(!square){
00483         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00484     }
00485 
00486     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00487 
00488     if(chroma_idc == 3 /* yuv444 */){
00489         src_cb = pic->f.data[1] + offset;
00490         if(emu){
00491             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00492                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00493             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00494         }
00495         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00496         if(!square){
00497             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00498         }
00499 
00500         src_cr = pic->f.data[2] + offset;
00501         if(emu){
00502             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00503                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00504             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00505         }
00506         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00507         if(!square){
00508             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00509         }
00510         return;
00511     }
00512 
00513     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
00514     if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
00515         // chroma offset when predicting from a field of opposite parity
00516         my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
00517         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00518     }
00519 
00520     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00521     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00522 
00523     if(emu){
00524         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
00525                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00526                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00527             src_cb= s->edge_emu_buffer;
00528     }
00529     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00530               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00531 
00532     if(emu){
00533         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
00534                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00535                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00536             src_cr= s->edge_emu_buffer;
00537     }
00538     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00539               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00540 }
00541 
00542 static av_always_inline void
00543 mc_part_std(H264Context *h, int n, int square, int height, int delta,
00544             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545             int x_offset, int y_offset,
00546             qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547             qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548             int list0, int list1, int pixel_shift, int chroma_idc)
00549 {
00550     MpegEncContext * const s = &h->s;
00551     qpel_mc_func *qpix_op=  qpix_put;
00552     h264_chroma_mc_func chroma_op= chroma_put;
00553 
00554     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00555     if (chroma_idc == 3 /* yuv444 */) {
00556         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00558     } else if (chroma_idc == 2 /* yuv422 */) {
00559         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00560         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00561     } else /* yuv420 */ {
00562         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00563         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00564     }
00565     x_offset += 8*s->mb_x;
00566     y_offset += 8*(s->mb_y >> MB_FIELD);
00567 
00568     if(list0){
00569         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00570         mc_dir_part(h, ref, n, square, height, delta, 0,
00571                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00572                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00573 
00574         qpix_op=  qpix_avg;
00575         chroma_op= chroma_avg;
00576     }
00577 
00578     if(list1){
00579         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00580         mc_dir_part(h, ref, n, square, height, delta, 1,
00581                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00582                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00583     }
00584 }
00585 
00586 static av_always_inline void
00587 mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
00588                  uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00589                  int x_offset, int y_offset,
00590                  qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00591                  h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00592                  h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00593                  int list0, int list1, int pixel_shift, int chroma_idc){
00594     MpegEncContext * const s = &h->s;
00595     int chroma_height;
00596 
00597     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00598     if (chroma_idc == 3 /* yuv444 */) {
00599         chroma_height = height;
00600         chroma_weight_avg = luma_weight_avg;
00601         chroma_weight_op = luma_weight_op;
00602         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00603         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00604     } else if (chroma_idc == 2 /* yuv422 */) {
00605         chroma_height = height;
00606         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00607         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00608     } else /* yuv420 */ {
00609         chroma_height = height >> 1;
00610         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00611         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00612     }
00613     x_offset += 8*s->mb_x;
00614     y_offset += 8*(s->mb_y >> MB_FIELD);
00615 
00616     if(list0 && list1){
00617         /* don't optimize for luma-only case, since B-frames usually
00618          * use implicit weights => chroma too. */
00619         uint8_t *tmp_cb = s->obmc_scratchpad;
00620         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00621         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00622         int refn0 = h->ref_cache[0][ scan8[n] ];
00623         int refn1 = h->ref_cache[1][ scan8[n] ];
00624 
00625         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
00626                     dest_y, dest_cb, dest_cr,
00627                     x_offset, y_offset, qpix_put, chroma_put,
00628                     pixel_shift, chroma_idc);
00629         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
00630                     tmp_y, tmp_cb, tmp_cr,
00631                     x_offset, y_offset, qpix_put, chroma_put,
00632                     pixel_shift, chroma_idc);
00633 
00634         if(h->use_weight == 2){
00635             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00636             int weight1 = 64 - weight0;
00637             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize,
00638                               height,        5, weight0, weight1, 0);
00639             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
00640                               chroma_height, 5, weight0, weight1, 0);
00641             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
00642                               chroma_height, 5, weight0, weight1, 0);
00643         }else{
00644             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00645                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00646                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00647             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00648                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00649                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00650             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00651                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00652                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00653         }
00654     }else{
00655         int list = list1 ? 1 : 0;
00656         int refn = h->ref_cache[list][ scan8[n] ];
00657         Picture *ref= &h->ref_list[list][refn];
00658         mc_dir_part(h, ref, n, square, height, delta, list,
00659                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00660                     qpix_put, chroma_put, pixel_shift, chroma_idc);
00661 
00662         luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00663                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00664         if(h->use_weight_chroma){
00665             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00666                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00667             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00668                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00669         }
00670     }
00671 }
00672 
00673 static av_always_inline void
00674 mc_part(H264Context *h, int n, int square, int height, int delta,
00675         uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00676         int x_offset, int y_offset,
00677         qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00678         qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00679         h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00680         int list0, int list1, int pixel_shift, int chroma_idc)
00681 {
00682     if((h->use_weight==2 && list0 && list1
00683         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00684        || h->use_weight==1)
00685         mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00686                          x_offset, y_offset, qpix_put, chroma_put,
00687                          weight_op[0], weight_op[1], weight_avg[0],
00688                          weight_avg[1], list0, list1, pixel_shift, chroma_idc);
00689     else
00690         mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00691                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00692                     chroma_avg, list0, list1, pixel_shift, chroma_idc);
00693 }
00694 
00695 static av_always_inline void
00696 prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
00697 {
00698     /* fetch pixels for estimated mv 4 macroblocks ahead
00699      * optimized for 64byte cache lines */
00700     MpegEncContext * const s = &h->s;
00701     const int refn = h->ref_cache[list][scan8[0]];
00702     if(refn >= 0){
00703         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00704         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00705         uint8_t **src = h->ref_list[list][refn].f.data;
00706         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00707         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00708         if (chroma_idc == 3 /* yuv444 */) {
00709             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00710             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00711         }else{
00712             off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
00713             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00714         }
00715     }
00716 }
00717 
00718 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00719                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00720                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00721                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00722                       int pixel_shift, int chroma_idc)
00723 {
00724     MpegEncContext * const s = &h->s;
00725     const int mb_xy= h->mb_xy;
00726     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00727 
00728     assert(IS_INTER(mb_type));
00729 
00730     if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00731         await_references(h);
00732     prefetch_motion(h, 0, pixel_shift, chroma_idc);
00733 
00734     if(IS_16X16(mb_type)){
00735         mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
00736                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00737                 weight_op, weight_avg,
00738                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00739                 pixel_shift, chroma_idc);
00740     }else if(IS_16X8(mb_type)){
00741         mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00742                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00743                 weight_op, weight_avg,
00744                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00745                 pixel_shift, chroma_idc);
00746         mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00747                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00748                 weight_op, weight_avg,
00749                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00750                 pixel_shift, chroma_idc);
00751     }else if(IS_8X16(mb_type)){
00752         mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00753                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                 &weight_op[1], &weight_avg[1],
00755                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00756                 pixel_shift, chroma_idc);
00757         mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00758                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00759                 &weight_op[1], &weight_avg[1],
00760                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00761                 pixel_shift, chroma_idc);
00762     }else{
00763         int i;
00764 
00765         assert(IS_8X8(mb_type));
00766 
00767         for(i=0; i<4; i++){
00768             const int sub_mb_type= h->sub_mb_type[i];
00769             const int n= 4*i;
00770             int x_offset= (i&1)<<2;
00771             int y_offset= (i&2)<<1;
00772 
00773             if(IS_SUB_8X8(sub_mb_type)){
00774                 mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00775                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00776                     &weight_op[1], &weight_avg[1],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma_idc);
00779             }else if(IS_SUB_8X4(sub_mb_type)){
00780                 mc_part(h, n  , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00781                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00782                     &weight_op[1], &weight_avg[1],
00783                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00784                     pixel_shift, chroma_idc);
00785                 mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00786                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00787                     &weight_op[1], &weight_avg[1],
00788                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                     pixel_shift, chroma_idc);
00790             }else if(IS_SUB_4X8(sub_mb_type)){
00791                 mc_part(h, n  , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00792                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00793                     &weight_op[2], &weight_avg[2],
00794                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00795                     pixel_shift, chroma_idc);
00796                 mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00797                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00798                     &weight_op[2], &weight_avg[2],
00799                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00800                     pixel_shift, chroma_idc);
00801             }else{
00802                 int j;
00803                 assert(IS_SUB_4X4(sub_mb_type));
00804                 for(j=0; j<4; j++){
00805                     int sub_x_offset= x_offset + 2*(j&1);
00806                     int sub_y_offset= y_offset +   (j&2);
00807                     mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00808                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00809                         &weight_op[2], &weight_avg[2],
00810                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00811                         pixel_shift, chroma_idc);
00812                 }
00813             }
00814         }
00815     }
00816 
00817     prefetch_motion(h, 1, pixel_shift, chroma_idc);
00818 }
00819 
00820 static av_always_inline void
00821 hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00822               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00823               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00824               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00825               int pixel_shift)
00826 {
00827     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00828               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
00829 }
00830 
00831 static av_always_inline void
00832 hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00833               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00834               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00835               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00836               int pixel_shift)
00837 {
00838     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00839               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
00840 }
00841 
00842 static void free_tables(H264Context *h, int free_rbsp){
00843     int i;
00844     H264Context *hx;
00845 
00846     av_freep(&h->intra4x4_pred_mode);
00847     av_freep(&h->chroma_pred_mode_table);
00848     av_freep(&h->cbp_table);
00849     av_freep(&h->mvd_table[0]);
00850     av_freep(&h->mvd_table[1]);
00851     av_freep(&h->direct_table);
00852     av_freep(&h->non_zero_count);
00853     av_freep(&h->slice_table_base);
00854     h->slice_table= NULL;
00855     av_freep(&h->list_counts);
00856 
00857     av_freep(&h->mb2b_xy);
00858     av_freep(&h->mb2br_xy);
00859 
00860     for(i = 0; i < MAX_THREADS; i++) {
00861         hx = h->thread_context[i];
00862         if(!hx) continue;
00863         av_freep(&hx->top_borders[1]);
00864         av_freep(&hx->top_borders[0]);
00865         av_freep(&hx->s.obmc_scratchpad);
00866         if (free_rbsp){
00867             av_freep(&hx->rbsp_buffer[1]);
00868             av_freep(&hx->rbsp_buffer[0]);
00869             hx->rbsp_buffer_size[0] = 0;
00870             hx->rbsp_buffer_size[1] = 0;
00871         }
00872         if (i) av_freep(&h->thread_context[i]);
00873     }
00874 }
00875 
00876 static void init_dequant8_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879 
00880     for(i=0; i<6; i++ ){
00881         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00882         for(j=0; j<i; j++){
00883             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00884                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00885                 break;
00886             }
00887         }
00888         if(j<i)
00889             continue;
00890 
00891         for(q=0; q<max_qp+1; q++){
00892             int shift = div6[q];
00893             int idx = rem6[q];
00894             for(x=0; x<64; x++)
00895                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00896                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00897                     h->pps.scaling_matrix8[i][x]) << shift;
00898         }
00899     }
00900 }
00901 
00902 static void init_dequant4_coeff_table(H264Context *h){
00903     int i,j,q,x;
00904     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00905     for(i=0; i<6; i++ ){
00906         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00907         for(j=0; j<i; j++){
00908             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00909                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00910                 break;
00911             }
00912         }
00913         if(j<i)
00914             continue;
00915 
00916         for(q=0; q<max_qp+1; q++){
00917             int shift = div6[q] + 2;
00918             int idx = rem6[q];
00919             for(x=0; x<16; x++)
00920                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00921                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00922                     h->pps.scaling_matrix4[i][x]) << shift;
00923         }
00924     }
00925 }
00926 
00927 static void init_dequant_tables(H264Context *h){
00928     int i,x;
00929     init_dequant4_coeff_table(h);
00930     if(h->pps.transform_8x8_mode)
00931         init_dequant8_coeff_table(h);
00932     if(h->sps.transform_bypass){
00933         for(i=0; i<6; i++)
00934             for(x=0; x<16; x++)
00935                 h->dequant4_coeff[i][0][x] = 1<<6;
00936         if(h->pps.transform_8x8_mode)
00937             for(i=0; i<6; i++)
00938                 for(x=0; x<64; x++)
00939                     h->dequant8_coeff[i][0][x] = 1<<6;
00940     }
00941 }
00942 
00943 
00944 int ff_h264_alloc_tables(H264Context *h){
00945     MpegEncContext * const s = &h->s;
00946     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00947     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00948     int x,y;
00949 
00950     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00951 
00952     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00953     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00954     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00955 
00956     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00957     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00958     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00959     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00960     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00961 
00962     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00963     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00964 
00965     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00966     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00967     for(y=0; y<s->mb_height; y++){
00968         for(x=0; x<s->mb_width; x++){
00969             const int mb_xy= x + y*s->mb_stride;
00970             const int b_xy = 4*x + 4*y*h->b_stride;
00971 
00972             h->mb2b_xy [mb_xy]= b_xy;
00973             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00974         }
00975     }
00976 
00977     s->obmc_scratchpad = NULL;
00978 
00979     if(!h->dequant4_coeff[0])
00980         init_dequant_tables(h);
00981 
00982     return 0;
00983 fail:
00984     free_tables(h, 1);
00985     return -1;
00986 }
00987 
00991 static void clone_tables(H264Context *dst, H264Context *src, int i){
00992     MpegEncContext * const s = &src->s;
00993     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00994     dst->non_zero_count           = src->non_zero_count;
00995     dst->slice_table              = src->slice_table;
00996     dst->cbp_table                = src->cbp_table;
00997     dst->mb2b_xy                  = src->mb2b_xy;
00998     dst->mb2br_xy                 = src->mb2br_xy;
00999     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
01000     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
01001     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
01002     dst->direct_table             = src->direct_table;
01003     dst->list_counts              = src->list_counts;
01004 
01005     dst->s.obmc_scratchpad = NULL;
01006     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
01007 }
01008 
01013 static int context_init(H264Context *h){
01014     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01015     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01016 
01017     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
01018     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
01019 
01020     return 0;
01021 fail:
01022     return -1; // free_tables will clean up for us
01023 }
01024 
01025 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01026 
01027 static av_cold void common_init(H264Context *h){
01028     MpegEncContext * const s = &h->s;
01029 
01030     s->width = s->avctx->width;
01031     s->height = s->avctx->height;
01032     s->codec_id= s->avctx->codec->id;
01033 
01034     ff_h264dsp_init(&h->h264dsp, 8, 1);
01035     ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
01036 
01037     h->dequant_coeff_pps= -1;
01038     s->unrestricted_mv=1;
01039 
01040     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01041 
01042     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01043     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01044 }
01045 
01046 int ff_h264_decode_extradata(H264Context *h)
01047 {
01048     AVCodecContext *avctx = h->s.avctx;
01049 
01050     if(avctx->extradata[0] == 1){
01051         int i, cnt, nalsize;
01052         unsigned char *p = avctx->extradata;
01053 
01054         h->is_avc = 1;
01055 
01056         if(avctx->extradata_size < 7) {
01057             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01058             return -1;
01059         }
01060         /* sps and pps in the avcC always have length coded with 2 bytes,
01061            so put a fake nal_length_size = 2 while parsing them */
01062         h->nal_length_size = 2;
01063         // Decode sps from avcC
01064         cnt = *(p+5) & 0x1f; // Number of sps
01065         p += 6;
01066         for (i = 0; i < cnt; i++) {
01067             nalsize = AV_RB16(p) + 2;
01068             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01069                 return -1;
01070             if(decode_nal_units(h, p, nalsize) < 0) {
01071                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01072                 return -1;
01073             }
01074             p += nalsize;
01075         }
01076         // Decode pps from avcC
01077         cnt = *(p++); // Number of pps
01078         for (i = 0; i < cnt; i++) {
01079             nalsize = AV_RB16(p) + 2;
01080             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01081                 return -1;
01082             if (decode_nal_units(h, p, nalsize) < 0) {
01083                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01084                 return -1;
01085             }
01086             p += nalsize;
01087         }
01088         // Now store right nal length size, that will be use to parse all other nals
01089         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
01090     } else {
01091         h->is_avc = 0;
01092         if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
01093             return -1;
01094     }
01095     return 0;
01096 }
01097 
01098 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01099     H264Context *h= avctx->priv_data;
01100     MpegEncContext * const s = &h->s;
01101     int i;
01102 
01103     MPV_decode_defaults(s);
01104 
01105     s->avctx = avctx;
01106     common_init(h);
01107 
01108     s->out_format = FMT_H264;
01109     s->workaround_bugs= avctx->workaround_bugs;
01110 
01111     // set defaults
01112 //    s->decode_mb= ff_h263_decode_mb;
01113     s->quarter_sample = 1;
01114     if(!avctx->has_b_frames)
01115     s->low_delay= 1;
01116 
01117     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01118 
01119     ff_h264_decode_init_vlc();
01120 
01121     h->pixel_shift = 0;
01122     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01123 
01124     h->thread_context[0] = h;
01125     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01126     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01127         h->last_pocs[i] = INT_MIN;
01128     h->prev_poc_msb= 1<<16;
01129     h->x264_build = -1;
01130     ff_h264_reset_sei(h);
01131     if(avctx->codec_id == CODEC_ID_H264){
01132         if(avctx->ticks_per_frame == 1){
01133             s->avctx->time_base.den *=2;
01134         }
01135         avctx->ticks_per_frame = 2;
01136     }
01137 
01138     if(avctx->extradata_size > 0 && avctx->extradata &&
01139         ff_h264_decode_extradata(h))
01140         return -1;
01141 
01142     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01143         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01144         s->low_delay = 0;
01145     }
01146 
01147     return 0;
01148 }
01149 
01150 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01151 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01152 {
01153     int i;
01154 
01155     for (i=0; i<count; i++){
01156         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01157                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01158                 !from[i]));
01159         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01160     }
01161 }
01162 
01163 static void copy_parameter_set(void **to, void **from, int count, int size)
01164 {
01165     int i;
01166 
01167     for (i=0; i<count; i++){
01168         if (to[i] && !from[i]) av_freep(&to[i]);
01169         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01170 
01171         if (from[i]) memcpy(to[i], from[i], size);
01172     }
01173 }
01174 
01175 static int decode_init_thread_copy(AVCodecContext *avctx){
01176     H264Context *h= avctx->priv_data;
01177 
01178     if (!avctx->internal->is_copy)
01179         return 0;
01180     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01181     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01182 
01183     return 0;
01184 }
01185 
01186 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01187 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01188     H264Context *h= dst->priv_data, *h1= src->priv_data;
01189     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01190     int inited = s->context_initialized, err;
01191     int i;
01192 
01193     if(dst == src || !s1->context_initialized) return 0;
01194 
01195     err = ff_mpeg_update_thread_context(dst, src);
01196     if(err) return err;
01197 
01198     //FIXME handle width/height changing
01199     if(!inited){
01200         for(i = 0; i < MAX_SPS_COUNT; i++)
01201             av_freep(h->sps_buffers + i);
01202 
01203         for(i = 0; i < MAX_PPS_COUNT; i++)
01204             av_freep(h->pps_buffers + i);
01205 
01206         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01207         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01208         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01209         if (ff_h264_alloc_tables(h) < 0) {
01210             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01211             return AVERROR(ENOMEM);
01212         }
01213         context_init(h);
01214 
01215         for(i=0; i<2; i++){
01216             h->rbsp_buffer[i] = NULL;
01217             h->rbsp_buffer_size[i] = 0;
01218         }
01219 
01220         h->thread_context[0] = h;
01221 
01222         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01223         // so this has to be allocated here
01224         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01225 
01226         s->dsp.clear_blocks(h->mb);
01227         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01228     }
01229 
01230     //extradata/NAL handling
01231     h->is_avc          = h1->is_avc;
01232 
01233     //SPS/PPS
01234     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01235     h->sps             = h1->sps;
01236     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01237     h->pps             = h1->pps;
01238 
01239     //Dequantization matrices
01240     //FIXME these are big - can they be only copied when PPS changes?
01241     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01242 
01243     for(i=0; i<6; i++)
01244         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01245 
01246     for(i=0; i<6; i++)
01247         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01248 
01249     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01250 
01251     //POC timing
01252     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01253 
01254     //reference lists
01255     copy_fields(h, h1, ref_count, list_count);
01256     copy_fields(h, h1, ref_list,  intra_gb);
01257     copy_fields(h, h1, short_ref, cabac_init_idc);
01258 
01259     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01260     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01261     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01262 
01263     h->last_slice_type = h1->last_slice_type;
01264 
01265     if(!s->current_picture_ptr) return 0;
01266 
01267     if(!s->dropable) {
01268         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01269         h->prev_poc_msb     = h->poc_msb;
01270         h->prev_poc_lsb     = h->poc_lsb;
01271     }
01272     h->prev_frame_num_offset= h->frame_num_offset;
01273     h->prev_frame_num       = h->frame_num;
01274     h->outputed_poc         = h->next_outputed_poc;
01275 
01276     return err;
01277 }
01278 
01279 int ff_h264_frame_start(H264Context *h){
01280     MpegEncContext * const s = &h->s;
01281     int i;
01282     const int pixel_shift = h->pixel_shift;
01283 
01284     if(MPV_frame_start(s, s->avctx) < 0)
01285         return -1;
01286     ff_er_frame_start(s);
01287     /*
01288      * MPV_frame_start uses pict_type to derive key_frame.
01289      * This is incorrect for H.264; IDR markings must be used.
01290      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01291      * See decode_nal_units().
01292      */
01293     s->current_picture_ptr->f.key_frame = 0;
01294     s->current_picture_ptr->mmco_reset= 0;
01295 
01296     assert(s->linesize && s->uvlinesize);
01297 
01298     for(i=0; i<16; i++){
01299         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01300         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01301     }
01302     for(i=0; i<16; i++){
01303         h->block_offset[16+i]=
01304         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01305         h->block_offset[48+16+i]=
01306         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01307     }
01308 
01309     /* can't be in alloc_tables because linesize isn't known there.
01310      * FIXME: redo bipred weight to not require extra buffer? */
01311     for(i = 0; i < s->slice_context_count; i++)
01312         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01313             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01314 
01315     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01316     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01317 
01318 //    s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.f.reference /*|| h->contains_intra*/ || 1;
01319 
01320     // We mark the current picture as non-reference after allocating it, so
01321     // that if we break out due to an error it can be released automatically
01322     // in the next MPV_frame_start().
01323     // SVQ3 as well as most other codecs have only last/next/current and thus
01324     // get released even with set reference, besides SVQ3 and others do not
01325     // mark frames as reference later "naturally".
01326     if(s->codec_id != CODEC_ID_SVQ3)
01327         s->current_picture_ptr->f.reference = 0;
01328 
01329     s->current_picture_ptr->field_poc[0]=
01330     s->current_picture_ptr->field_poc[1]= INT_MAX;
01331 
01332     h->next_output_pic = NULL;
01333 
01334     assert(s->current_picture_ptr->long_ref==0);
01335 
01336     return 0;
01337 }
01338 
01347 static void decode_postinit(H264Context *h, int setup_finished){
01348     MpegEncContext * const s = &h->s;
01349     Picture *out = s->current_picture_ptr;
01350     Picture *cur = s->current_picture_ptr;
01351     int i, pics, out_of_order, out_idx;
01352     int invalid = 0, cnt = 0;
01353 
01354     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
01355     s->current_picture_ptr->f.pict_type   = s->pict_type;
01356 
01357     if (h->next_output_pic) return;
01358 
01359     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01360         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01361         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01362         //to find this yet, so we assume the worst for now.
01363         //if (setup_finished)
01364         //    ff_thread_finish_setup(s->avctx);
01365         return;
01366     }
01367 
01368     cur->f.interlaced_frame = 0;
01369     cur->f.repeat_pict      = 0;
01370 
01371     /* Signal interlacing information externally. */
01372     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01373 
01374     if(h->sps.pic_struct_present_flag){
01375         switch (h->sei_pic_struct)
01376         {
01377         case SEI_PIC_STRUCT_FRAME:
01378             break;
01379         case SEI_PIC_STRUCT_TOP_FIELD:
01380         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01381             cur->f.interlaced_frame = 1;
01382             break;
01383         case SEI_PIC_STRUCT_TOP_BOTTOM:
01384         case SEI_PIC_STRUCT_BOTTOM_TOP:
01385             if (FIELD_OR_MBAFF_PICTURE)
01386                 cur->f.interlaced_frame = 1;
01387             else
01388                 // try to flag soft telecine progressive
01389                 cur->f.interlaced_frame = h->prev_interlaced_frame;
01390             break;
01391         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01392         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01393             // Signal the possibility of telecined film externally (pic_struct 5,6)
01394             // From these hints, let the applications decide if they apply deinterlacing.
01395             cur->f.repeat_pict = 1;
01396             break;
01397         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01398             // Force progressive here, as doubling interlaced frame is a bad idea.
01399             cur->f.repeat_pict = 2;
01400             break;
01401         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01402             cur->f.repeat_pict = 4;
01403             break;
01404         }
01405 
01406         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01407             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
01408     }else{
01409         /* Derive interlacing flag from used decoding process. */
01410         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01411     }
01412     h->prev_interlaced_frame = cur->f.interlaced_frame;
01413 
01414     if (cur->field_poc[0] != cur->field_poc[1]){
01415         /* Derive top_field_first from field pocs. */
01416         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
01417     }else{
01418         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
01419             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01420             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01421               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01422                 cur->f.top_field_first = 1;
01423             else
01424                 cur->f.top_field_first = 0;
01425         }else{
01426             /* Most likely progressive */
01427             cur->f.top_field_first = 0;
01428         }
01429     }
01430 
01431     //FIXME do something with unavailable reference frames
01432 
01433     /* Sort B-frames into display order */
01434 
01435     if(h->sps.bitstream_restriction_flag
01436        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01437         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01438         s->low_delay = 0;
01439     }
01440 
01441     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01442        && !h->sps.bitstream_restriction_flag){
01443         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
01444         s->low_delay= 0;
01445     }
01446 
01447     pics = 0;
01448     while(h->delayed_pic[pics]) pics++;
01449 
01450     assert(pics <= MAX_DELAYED_PIC_COUNT);
01451 
01452     h->delayed_pic[pics++] = cur;
01453     if (cur->f.reference == 0)
01454         cur->f.reference = DELAYED_PIC_REF;
01455 
01456     /* Frame reordering. This code takes pictures from coding order and sorts
01457      * them by their incremental POC value into display order. It supports POC
01458      * gaps, MMCO reset codes and random resets.
01459      * A "display group" can start either with a IDR frame (f.key_frame = 1),
01460      * and/or can be closed down with a MMCO reset code. In sequences where
01461      * there is no delay, we can't detect that (since the frame was already
01462      * output to the user), so we also set h->mmco_reset to detect the MMCO
01463      * reset code.
01464      * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
01465      * we increase the delay between input and output. All frames affected by
01466      * the lag (e.g. those that should have been output before another frame
01467      * that we already returned to the user) will be dropped. This is a bug
01468      * that we will fix later. */
01469     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
01470         cnt     += out->poc < h->last_pocs[i];
01471         invalid += out->poc == INT_MIN;
01472     }
01473     if (!h->mmco_reset && !cur->f.key_frame && cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
01474         h->mmco_reset = 2;
01475         if (pics > 1)
01476             h->delayed_pic[pics - 2]->mmco_reset = 2;
01477     }
01478     if (h->mmco_reset || cur->f.key_frame) {
01479         for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01480             h->last_pocs[i] = INT_MIN;
01481         cnt     = 0;
01482         invalid = MAX_DELAYED_PIC_COUNT;
01483     }
01484     out = h->delayed_pic[0];
01485     out_idx = 0;
01486     for (i = 1; i < MAX_DELAYED_PIC_COUNT && h->delayed_pic[i] &&
01487          !h->delayed_pic[i-1]->mmco_reset && !h->delayed_pic[i]->f.key_frame; i++)
01488     {
01489         if(h->delayed_pic[i]->poc < out->poc){
01490             out = h->delayed_pic[i];
01491             out_idx = i;
01492         }
01493     }
01494     if (s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
01495         h->next_outputed_poc = INT_MIN;
01496     out_of_order = !out->f.key_frame && !h->mmco_reset && (out->poc < h->next_outputed_poc);
01497 
01498     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01499         { }
01500     else if (out_of_order && pics-1 == s->avctx->has_b_frames &&
01501              s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
01502         if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
01503             s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt);
01504         }
01505         s->low_delay = 0;
01506     } else if (s->low_delay &&
01507                ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) ||
01508                 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
01509         s->low_delay = 0;
01510         s->avctx->has_b_frames++;
01511     }
01512 
01513     if(pics > s->avctx->has_b_frames){
01514         out->f.reference &= ~DELAYED_PIC_REF;
01515         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01516                          // or else the first thread can release the picture and reuse it unsafely
01517         for(i=out_idx; h->delayed_pic[i]; i++)
01518             h->delayed_pic[i] = h->delayed_pic[i+1];
01519     }
01520     memmove(h->last_pocs, &h->last_pocs[1], sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
01521     h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
01522     if(!out_of_order && pics > s->avctx->has_b_frames){
01523         h->next_output_pic = out;
01524         if (out->mmco_reset) {
01525             if (out_idx > 0) {
01526                 h->next_outputed_poc = out->poc;
01527                 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
01528             } else {
01529                 h->next_outputed_poc = INT_MIN;
01530             }
01531         } else {
01532             if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
01533                 h->next_outputed_poc = INT_MIN;
01534             } else {
01535                 h->next_outputed_poc = out->poc;
01536             }
01537         }
01538         h->mmco_reset = 0;
01539     }else{
01540         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01541     }
01542 
01543     if (setup_finished)
01544         ff_thread_finish_setup(s->avctx);
01545 }
01546 
01547 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
01548                                               uint8_t *src_cb, uint8_t *src_cr,
01549                                               int linesize, int uvlinesize, int simple)
01550 {
01551     MpegEncContext * const s = &h->s;
01552     uint8_t *top_border;
01553     int top_idx = 1;
01554     const int pixel_shift = h->pixel_shift;
01555     int chroma444 = CHROMA444;
01556     int chroma422 = CHROMA422;
01557 
01558     src_y  -=   linesize;
01559     src_cb -= uvlinesize;
01560     src_cr -= uvlinesize;
01561 
01562     if(!simple && FRAME_MBAFF){
01563         if(s->mb_y&1){
01564             if(!MB_MBAFF){
01565                 top_border = h->top_borders[0][s->mb_x];
01566                 AV_COPY128(top_border, src_y + 15*linesize);
01567                 if (pixel_shift)
01568                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01569                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01570                     if(chroma444){
01571                         if (pixel_shift){
01572                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01573                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01574                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01575                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01576                         } else {
01577                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01578                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01579                         }
01580                     } else if(chroma422) {
01581                         if (pixel_shift) {
01582                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01583                             AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
01584                         } else {
01585                             AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
01586                             AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
01587                         }
01588                     } else {
01589                         if (pixel_shift) {
01590                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01591                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01592                         } else {
01593                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01594                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01595                         }
01596                     }
01597                 }
01598             }
01599         }else if(MB_MBAFF){
01600             top_idx = 0;
01601         }else
01602             return;
01603     }
01604 
01605     top_border = h->top_borders[top_idx][s->mb_x];
01606     // There are two lines saved, the line above the the top macroblock of a pair,
01607     // and the line above the bottom macroblock
01608     AV_COPY128(top_border, src_y + 16*linesize);
01609     if (pixel_shift)
01610         AV_COPY128(top_border+16, src_y+16*linesize+16);
01611 
01612     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01613         if(chroma444){
01614             if (pixel_shift){
01615                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01616                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01617                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01618                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01619             } else {
01620                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01621                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01622             }
01623         } else if(chroma422) {
01624             if (pixel_shift) {
01625                 AV_COPY128(top_border+32, src_cb+16*uvlinesize);
01626                 AV_COPY128(top_border+48, src_cr+16*uvlinesize);
01627             } else {
01628                 AV_COPY64(top_border+16, src_cb+16*uvlinesize);
01629                 AV_COPY64(top_border+24, src_cr+16*uvlinesize);
01630             }
01631         } else {
01632             if (pixel_shift) {
01633                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01634                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01635             } else {
01636                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01637                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01638             }
01639         }
01640     }
01641 }
01642 
01643 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01644                                   uint8_t *src_cb, uint8_t *src_cr,
01645                                   int linesize, int uvlinesize,
01646                                   int xchg, int chroma444,
01647                                   int simple, int pixel_shift){
01648     MpegEncContext * const s = &h->s;
01649     int deblock_topleft;
01650     int deblock_top;
01651     int top_idx = 1;
01652     uint8_t *top_border_m1;
01653     uint8_t *top_border;
01654 
01655     if(!simple && FRAME_MBAFF){
01656         if(s->mb_y&1){
01657             if(!MB_MBAFF)
01658                 return;
01659         }else{
01660             top_idx = MB_MBAFF ? 0 : 1;
01661         }
01662     }
01663 
01664     if(h->deblocking_filter == 2) {
01665         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01666         deblock_top     = h->top_type;
01667     } else {
01668         deblock_topleft = (s->mb_x > 0);
01669         deblock_top     = (s->mb_y > !!MB_FIELD);
01670     }
01671 
01672     src_y  -=   linesize + 1 + pixel_shift;
01673     src_cb -= uvlinesize + 1 + pixel_shift;
01674     src_cr -= uvlinesize + 1 + pixel_shift;
01675 
01676     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01677     top_border    = h->top_borders[top_idx][s->mb_x];
01678 
01679 #define XCHG(a,b,xchg)\
01680     if (pixel_shift) {\
01681         if (xchg) {\
01682             AV_SWAP64(b+0,a+0);\
01683             AV_SWAP64(b+8,a+8);\
01684         } else {\
01685             AV_COPY128(b,a); \
01686         }\
01687     } else \
01688 if (xchg) AV_SWAP64(b,a);\
01689 else      AV_COPY64(b,a);
01690 
01691     if(deblock_top){
01692         if(deblock_topleft){
01693             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01694         }
01695         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01696         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01697         if(s->mb_x+1 < s->mb_width){
01698             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01699         }
01700     }
01701     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01702         if(chroma444){
01703             if(deblock_topleft){
01704                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01705                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01706             }
01707             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01708             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01709             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01710             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01711             if(s->mb_x+1 < s->mb_width){
01712                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01713                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01714             }
01715         } else {
01716             if(deblock_top){
01717                 if(deblock_topleft){
01718                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01719                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01720                 }
01721                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01722                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01723             }
01724         }
01725     }
01726 }
01727 
01728 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01729     if (high_bit_depth) {
01730         return AV_RN32A(((int32_t*)mb) + index);
01731     } else
01732         return AV_RN16A(mb + index);
01733 }
01734 
01735 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01736     if (high_bit_depth) {
01737         AV_WN32A(((int32_t*)mb) + index, value);
01738     } else
01739         AV_WN16A(mb + index, value);
01740 }
01741 
01742 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01743                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01744 {
01745     MpegEncContext * const s = &h->s;
01746     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01747     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01748     int i;
01749     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01750     block_offset += 16*p;
01751     if(IS_INTRA4x4(mb_type)){
01752         if(simple || !s->encoding){
01753             if(IS_8x8DCT(mb_type)){
01754                 if(transform_bypass){
01755                     idct_dc_add =
01756                     idct_add    = s->dsp.add_pixels8;
01757                 }else{
01758                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01759                     idct_add    = h->h264dsp.h264_idct8_add;
01760                 }
01761                 for(i=0; i<16; i+=4){
01762                     uint8_t * const ptr= dest_y + block_offset[i];
01763                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01764                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01765                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01766                     }else{
01767                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01768                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01769                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01770                         if(nnz){
01771                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01772                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01773                             else
01774                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01775                         }
01776                     }
01777                 }
01778             }else{
01779                 if(transform_bypass){
01780                     idct_dc_add =
01781                     idct_add    = s->dsp.add_pixels4;
01782                 }else{
01783                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01784                     idct_add    = h->h264dsp.h264_idct_add;
01785                 }
01786                 for(i=0; i<16; i++){
01787                     uint8_t * const ptr= dest_y + block_offset[i];
01788                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01789 
01790                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01791                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01792                     }else{
01793                         uint8_t *topright;
01794                         int nnz, tr;
01795                         uint64_t tr_high;
01796                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01797                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01798                             assert(s->mb_y || linesize <= block_offset[i]);
01799                             if(!topright_avail){
01800                                 if (pixel_shift) {
01801                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01802                                     topright= (uint8_t*) &tr_high;
01803                                 } else {
01804                                     tr= ptr[3 - linesize]*0x01010101u;
01805                                     topright= (uint8_t*) &tr;
01806                                 }
01807                             }else
01808                                 topright= ptr + (4 << pixel_shift) - linesize;
01809                         }else
01810                             topright= NULL;
01811 
01812                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01813                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01814                         if(nnz){
01815                             if(is_h264){
01816                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01817                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01818                                 else
01819                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01820                             } else if (CONFIG_SVQ3_DECODER)
01821                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01822                         }
01823                     }
01824                 }
01825             }
01826         }
01827     }else{
01828         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01829         if(is_h264){
01830             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01831                 if(!transform_bypass)
01832                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01833                 else{
01834                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01835                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01836                     for(i = 0; i < 16; i++)
01837                         dctcoef_set(h->mb+(p*256 << pixel_shift), pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01838                 }
01839             }
01840         } else if (CONFIG_SVQ3_DECODER)
01841             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01842     }
01843 }
01844 
01845 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01846                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01847 {
01848     MpegEncContext * const s = &h->s;
01849     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01850     int i;
01851     block_offset += 16*p;
01852     if(!IS_INTRA4x4(mb_type)){
01853         if(is_h264){
01854             if(IS_INTRA16x16(mb_type)){
01855                 if(transform_bypass){
01856                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01857                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01858                     }else{
01859                         for(i=0; i<16; i++){
01860                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01861                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01862                         }
01863                     }
01864                 }else{
01865                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01866                 }
01867             }else if(h->cbp&15){
01868                 if(transform_bypass){
01869                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01870                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01871                     for(i=0; i<16; i+=di){
01872                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01873                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01874                         }
01875                     }
01876                 }else{
01877                     if(IS_8x8DCT(mb_type)){
01878                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01879                     }else{
01880                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01881                     }
01882                 }
01883             }
01884         } else if (CONFIG_SVQ3_DECODER) {
01885             for(i=0; i<16; i++){
01886                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01887                     uint8_t * const ptr= dest_y + block_offset[i];
01888                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01889                 }
01890             }
01891         }
01892     }
01893 }
01894 
01895 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
01896 {
01897     MpegEncContext * const s = &h->s;
01898     const int mb_x= s->mb_x;
01899     const int mb_y= s->mb_y;
01900     const int mb_xy= h->mb_xy;
01901     const int mb_type = s->current_picture.f.mb_type[mb_xy];
01902     uint8_t  *dest_y, *dest_cb, *dest_cr;
01903     int linesize, uvlinesize /*dct_offset*/;
01904     int i, j;
01905     int *block_offset = &h->block_offset[0];
01906     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01907     /* is_h264 should always be true if SVQ3 is disabled. */
01908     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01909     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01910     const int block_h = 16 >> s->chroma_y_shift;
01911     const int chroma422 = CHROMA422;
01912 
01913     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01914     dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01915     dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01916 
01917     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01918     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01919 
01920     h->list_counts[mb_xy]= h->list_count;
01921 
01922     if (!simple && MB_FIELD) {
01923         linesize   = h->mb_linesize   = s->linesize * 2;
01924         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01925         block_offset = &h->block_offset[48];
01926         if(mb_y&1){ //FIXME move out of this function?
01927             dest_y -= s->linesize*15;
01928             dest_cb-= s->uvlinesize * (block_h - 1);
01929             dest_cr-= s->uvlinesize * (block_h - 1);
01930         }
01931         if(FRAME_MBAFF) {
01932             int list;
01933             for(list=0; list<h->list_count; list++){
01934                 if(!USES_LIST(mb_type, list))
01935                     continue;
01936                 if(IS_16X16(mb_type)){
01937                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01938                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01939                 }else{
01940                     for(i=0; i<16; i+=4){
01941                         int ref = h->ref_cache[list][scan8[i]];
01942                         if(ref >= 0)
01943                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01944                     }
01945                 }
01946             }
01947         }
01948     } else {
01949         linesize   = h->mb_linesize   = s->linesize;
01950         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01951 //        dct_offset = s->linesize * 16;
01952     }
01953 
01954     if (!simple && IS_INTRA_PCM(mb_type)) {
01955         if (pixel_shift) {
01956             const int bit_depth = h->sps.bit_depth_luma;
01957             int j;
01958             GetBitContext gb;
01959             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01960 
01961             for (i = 0; i < 16; i++) {
01962                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01963                 for (j = 0; j < 16; j++)
01964                     tmp_y[j] = get_bits(&gb, bit_depth);
01965             }
01966             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01967                 if (!h->sps.chroma_format_idc) {
01968                     for (i = 0; i < block_h; i++) {
01969                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01970                         for (j = 0; j < 8; j++) {
01971                             tmp_cb[j] = 1 << (bit_depth - 1);
01972                         }
01973                     }
01974                     for (i = 0; i < block_h; i++) {
01975                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01976                         for (j = 0; j < 8; j++) {
01977                             tmp_cr[j] = 1 << (bit_depth - 1);
01978                         }
01979                     }
01980                 } else {
01981                     for (i = 0; i < block_h; i++) {
01982                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01983                         for (j = 0; j < 8; j++)
01984                             tmp_cb[j] = get_bits(&gb, bit_depth);
01985                     }
01986                     for (i = 0; i < block_h; i++) {
01987                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01988                         for (j = 0; j < 8; j++)
01989                             tmp_cr[j] = get_bits(&gb, bit_depth);
01990                     }
01991                 }
01992             }
01993         } else {
01994             for (i=0; i<16; i++) {
01995                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01996             }
01997             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01998                 if (!h->sps.chroma_format_idc) {
01999                     for (i = 0; i < block_h; i++) {
02000                         memset(dest_cb + i*uvlinesize, 128, 8);
02001                         memset(dest_cr + i*uvlinesize, 128, 8);
02002                     }
02003                 } else {
02004                     for (i = 0; i < block_h; i++) {
02005                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
02006                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
02007                     }
02008                 }
02009             }
02010         }
02011     } else {
02012         if(IS_INTRA(mb_type)){
02013             if(h->deblocking_filter)
02014                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
02015 
02016             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02017                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02018                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02019             }
02020 
02021             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02022 
02023             if(h->deblocking_filter)
02024                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
02025         }else if(is_h264){
02026             if (chroma422) {
02027                 hl_motion_422(h, dest_y, dest_cb, dest_cr,
02028                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02029                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02030                               h->h264dsp.weight_h264_pixels_tab,
02031                               h->h264dsp.biweight_h264_pixels_tab,
02032                               pixel_shift);
02033             } else {
02034                 hl_motion_420(h, dest_y, dest_cb, dest_cr,
02035                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02036                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02037                               h->h264dsp.weight_h264_pixels_tab,
02038                               h->h264dsp.biweight_h264_pixels_tab,
02039                               pixel_shift);
02040             }
02041         }
02042 
02043         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02044 
02045         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02046             uint8_t *dest[2] = {dest_cb, dest_cr};
02047             if(transform_bypass){
02048                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02049                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
02050                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
02051                 }else{
02052                     idct_add = s->dsp.add_pixels4;
02053                     for(j=1; j<3; j++){
02054                         for(i=j*16; i<j*16+4; i++){
02055                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02056                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
02057                         }
02058                         if (chroma422) {
02059                             for(i=j*16+4; i<j*16+8; i++){
02060                                 if(h->non_zero_count_cache[ scan8[i+4] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02061                                     idct_add   (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
02062                             }
02063                         }
02064                     }
02065                 }
02066             }else{
02067                 if(is_h264){
02068                     int qp[2];
02069                     if (chroma422) {
02070                         qp[0] = h->chroma_qp[0] + 3;
02071                         qp[1] = h->chroma_qp[1] + 3;
02072                     } else {
02073                         qp[0] = h->chroma_qp[0];
02074                         qp[1] = h->chroma_qp[1];
02075                     }
02076                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
02077                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
02078                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
02079                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
02080                     h->h264dsp.h264_idct_add8(dest, block_offset,
02081                                               h->mb, uvlinesize,
02082                                               h->non_zero_count_cache);
02083                 } else if (CONFIG_SVQ3_DECODER) {
02084                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02085                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02086                     for(j=1; j<3; j++){
02087                         for(i=j*16; i<j*16+4; i++){
02088                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02089                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
02090                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
02091                             }
02092                         }
02093                     }
02094                 }
02095             }
02096         }
02097     }
02098     if(h->cbp || IS_INTRA(mb_type))
02099     {
02100         s->dsp.clear_blocks(h->mb);
02101         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02102     }
02103 }
02104 
02105 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
02106     MpegEncContext * const s = &h->s;
02107     const int mb_x= s->mb_x;
02108     const int mb_y= s->mb_y;
02109     const int mb_xy= h->mb_xy;
02110     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02111     uint8_t  *dest[3];
02112     int linesize;
02113     int i, j, p;
02114     int *block_offset = &h->block_offset[0];
02115     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02116     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02117 
02118     for (p = 0; p < plane_count; p++)
02119     {
02120         dest[p] = s->current_picture.f.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02121         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02122     }
02123 
02124     h->list_counts[mb_xy]= h->list_count;
02125 
02126     if (!simple && MB_FIELD) {
02127         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02128         block_offset = &h->block_offset[48];
02129         if(mb_y&1) //FIXME move out of this function?
02130             for (p = 0; p < 3; p++)
02131                 dest[p] -= s->linesize*15;
02132         if(FRAME_MBAFF) {
02133             int list;
02134             for(list=0; list<h->list_count; list++){
02135                 if(!USES_LIST(mb_type, list))
02136                     continue;
02137                 if(IS_16X16(mb_type)){
02138                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02139                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02140                 }else{
02141                     for(i=0; i<16; i+=4){
02142                         int ref = h->ref_cache[list][scan8[i]];
02143                         if(ref >= 0)
02144                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02145                     }
02146                 }
02147             }
02148         }
02149     } else {
02150         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02151     }
02152 
02153     if (!simple && IS_INTRA_PCM(mb_type)) {
02154         if (pixel_shift) {
02155             const int bit_depth = h->sps.bit_depth_luma;
02156             GetBitContext gb;
02157             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02158 
02159             for (p = 0; p < plane_count; p++) {
02160                 for (i = 0; i < 16; i++) {
02161                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02162                     for (j = 0; j < 16; j++)
02163                         tmp[j] = get_bits(&gb, bit_depth);
02164                 }
02165             }
02166         } else {
02167             for (p = 0; p < plane_count; p++) {
02168                 for (i = 0; i < 16; i++) {
02169                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02170                 }
02171             }
02172         }
02173     } else {
02174         if(IS_INTRA(mb_type)){
02175             if(h->deblocking_filter)
02176                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02177 
02178             for (p = 0; p < plane_count; p++)
02179                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02180 
02181             if(h->deblocking_filter)
02182                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02183         }else{
02184             hl_motion(h, dest[0], dest[1], dest[2],
02185                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02186                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02187                       h->h264dsp.weight_h264_pixels_tab,
02188                       h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
02189         }
02190 
02191         for (p = 0; p < plane_count; p++)
02192             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02193     }
02194     if(h->cbp || IS_INTRA(mb_type))
02195     {
02196         s->dsp.clear_blocks(h->mb);
02197         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02198     }
02199 }
02200 
02204 #define hl_decode_mb_simple(sh, bits) \
02205 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02206     hl_decode_mb_internal(h, 1, sh); \
02207 }
02208 hl_decode_mb_simple(0, 8)
02209 hl_decode_mb_simple(1, 16)
02210 
02214 static void av_noinline hl_decode_mb_complex(H264Context *h){
02215     hl_decode_mb_internal(h, 0, h->pixel_shift);
02216 }
02217 
02218 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02219     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02220 }
02221 
02222 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02223     hl_decode_mb_444_internal(h, 1, 0);
02224 }
02225 
02226 void ff_h264_hl_decode_mb(H264Context *h){
02227     MpegEncContext * const s = &h->s;
02228     const int mb_xy= h->mb_xy;
02229     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02230     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02231 
02232     if (CHROMA444) {
02233         if(is_complex || h->pixel_shift)
02234             hl_decode_mb_444_complex(h);
02235         else
02236             hl_decode_mb_444_simple(h);
02237     } else if (is_complex) {
02238         hl_decode_mb_complex(h);
02239     } else if (h->pixel_shift) {
02240         hl_decode_mb_simple_16(h);
02241     } else
02242         hl_decode_mb_simple_8(h);
02243 }
02244 
02245 static int pred_weight_table(H264Context *h){
02246     MpegEncContext * const s = &h->s;
02247     int list, i;
02248     int luma_def, chroma_def;
02249 
02250     h->use_weight= 0;
02251     h->use_weight_chroma= 0;
02252     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02253     if(h->sps.chroma_format_idc)
02254         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02255     luma_def = 1<<h->luma_log2_weight_denom;
02256     chroma_def = 1<<h->chroma_log2_weight_denom;
02257 
02258     for(list=0; list<2; list++){
02259         h->luma_weight_flag[list]   = 0;
02260         h->chroma_weight_flag[list] = 0;
02261         for(i=0; i<h->ref_count[list]; i++){
02262             int luma_weight_flag, chroma_weight_flag;
02263 
02264             luma_weight_flag= get_bits1(&s->gb);
02265             if(luma_weight_flag){
02266                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02267                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02268                 if(   h->luma_weight[i][list][0] != luma_def
02269                    || h->luma_weight[i][list][1] != 0) {
02270                     h->use_weight= 1;
02271                     h->luma_weight_flag[list]= 1;
02272                 }
02273             }else{
02274                 h->luma_weight[i][list][0]= luma_def;
02275                 h->luma_weight[i][list][1]= 0;
02276             }
02277 
02278             if(h->sps.chroma_format_idc){
02279                 chroma_weight_flag= get_bits1(&s->gb);
02280                 if(chroma_weight_flag){
02281                     int j;
02282                     for(j=0; j<2; j++){
02283                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02284                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02285                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02286                            || h->chroma_weight[i][list][j][1] != 0) {
02287                             h->use_weight_chroma= 1;
02288                             h->chroma_weight_flag[list]= 1;
02289                         }
02290                     }
02291                 }else{
02292                     int j;
02293                     for(j=0; j<2; j++){
02294                         h->chroma_weight[i][list][j][0]= chroma_def;
02295                         h->chroma_weight[i][list][j][1]= 0;
02296                     }
02297                 }
02298             }
02299         }
02300         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02301     }
02302     h->use_weight= h->use_weight || h->use_weight_chroma;
02303     return 0;
02304 }
02305 
02311 static void implicit_weight_table(H264Context *h, int field){
02312     MpegEncContext * const s = &h->s;
02313     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02314 
02315     for (i = 0; i < 2; i++) {
02316         h->luma_weight_flag[i]   = 0;
02317         h->chroma_weight_flag[i] = 0;
02318     }
02319 
02320     if(field < 0){
02321         if (s->picture_structure == PICT_FRAME) {
02322             cur_poc = s->current_picture_ptr->poc;
02323         } else {
02324             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02325         }
02326     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02327        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02328         h->use_weight= 0;
02329         h->use_weight_chroma= 0;
02330         return;
02331     }
02332         ref_start= 0;
02333         ref_count0= h->ref_count[0];
02334         ref_count1= h->ref_count[1];
02335     }else{
02336         cur_poc = s->current_picture_ptr->field_poc[field];
02337         ref_start= 16;
02338         ref_count0= 16+2*h->ref_count[0];
02339         ref_count1= 16+2*h->ref_count[1];
02340     }
02341 
02342     h->use_weight= 2;
02343     h->use_weight_chroma= 2;
02344     h->luma_log2_weight_denom= 5;
02345     h->chroma_log2_weight_denom= 5;
02346 
02347     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02348         int poc0 = h->ref_list[0][ref0].poc;
02349         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02350             int w = 32;
02351             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02352                 int poc1 = h->ref_list[1][ref1].poc;
02353                 int td = av_clip(poc1 - poc0, -128, 127);
02354                 if(td){
02355                     int tb = av_clip(cur_poc - poc0, -128, 127);
02356                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02357                     int dist_scale_factor = (tb*tx + 32) >> 8;
02358                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02359                         w = 64 - dist_scale_factor;
02360                 }
02361             }
02362             if(field<0){
02363                 h->implicit_weight[ref0][ref1][0]=
02364                 h->implicit_weight[ref0][ref1][1]= w;
02365             }else{
02366                 h->implicit_weight[ref0][ref1][field]=w;
02367             }
02368         }
02369     }
02370 }
02371 
02375 static void idr(H264Context *h){
02376     ff_h264_remove_all_refs(h);
02377     h->prev_frame_num= 0;
02378     h->prev_frame_num_offset= 0;
02379     h->prev_poc_msb=
02380     h->prev_poc_lsb= 0;
02381 }
02382 
02383 /* forget old pics after a seek */
02384 static void flush_dpb(AVCodecContext *avctx){
02385     H264Context *h= avctx->priv_data;
02386     int i;
02387     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
02388         if(h->delayed_pic[i])
02389             h->delayed_pic[i]->f.reference = 0;
02390         h->delayed_pic[i]= NULL;
02391     }
02392     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
02393         h->last_pocs[i] = INT_MIN;
02394     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02395     h->prev_interlaced_frame = 1;
02396     idr(h);
02397     if(h->s.current_picture_ptr)
02398         h->s.current_picture_ptr->f.reference = 0;
02399     h->s.first_field= 0;
02400     ff_h264_reset_sei(h);
02401     ff_mpeg_flush(avctx);
02402 }
02403 
02404 static int init_poc(H264Context *h){
02405     MpegEncContext * const s = &h->s;
02406     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02407     int field_poc[2];
02408     Picture *cur = s->current_picture_ptr;
02409 
02410     h->frame_num_offset= h->prev_frame_num_offset;
02411     if(h->frame_num < h->prev_frame_num)
02412         h->frame_num_offset += max_frame_num;
02413 
02414     if(h->sps.poc_type==0){
02415         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02416 
02417         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02418             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02419         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02420             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02421         else
02422             h->poc_msb = h->prev_poc_msb;
02423 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02424         field_poc[0] =
02425         field_poc[1] = h->poc_msb + h->poc_lsb;
02426         if(s->picture_structure == PICT_FRAME)
02427             field_poc[1] += h->delta_poc_bottom;
02428     }else if(h->sps.poc_type==1){
02429         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02430         int i;
02431 
02432         if(h->sps.poc_cycle_length != 0)
02433             abs_frame_num = h->frame_num_offset + h->frame_num;
02434         else
02435             abs_frame_num = 0;
02436 
02437         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02438             abs_frame_num--;
02439 
02440         expected_delta_per_poc_cycle = 0;
02441         for(i=0; i < h->sps.poc_cycle_length; i++)
02442             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02443 
02444         if(abs_frame_num > 0){
02445             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02446             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02447 
02448             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02449             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02450                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02451         } else
02452             expectedpoc = 0;
02453 
02454         if(h->nal_ref_idc == 0)
02455             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02456 
02457         field_poc[0] = expectedpoc + h->delta_poc[0];
02458         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02459 
02460         if(s->picture_structure == PICT_FRAME)
02461             field_poc[1] += h->delta_poc[1];
02462     }else{
02463         int poc= 2*(h->frame_num_offset + h->frame_num);
02464 
02465         if(!h->nal_ref_idc)
02466             poc--;
02467 
02468         field_poc[0]= poc;
02469         field_poc[1]= poc;
02470     }
02471 
02472     if(s->picture_structure != PICT_BOTTOM_FIELD)
02473         s->current_picture_ptr->field_poc[0]= field_poc[0];
02474     if(s->picture_structure != PICT_TOP_FIELD)
02475         s->current_picture_ptr->field_poc[1]= field_poc[1];
02476     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02477 
02478     return 0;
02479 }
02480 
02481 
02485 static void init_scan_tables(H264Context *h){
02486     int i;
02487     for(i=0; i<16; i++){
02488 #define T(x) (x>>2) | ((x<<2) & 0xF)
02489         h->zigzag_scan[i] = T(zigzag_scan[i]);
02490         h-> field_scan[i] = T( field_scan[i]);
02491 #undef T
02492     }
02493     for(i=0; i<64; i++){
02494 #define T(x) (x>>3) | ((x&7)<<3)
02495         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02496         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02497         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02498         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02499 #undef T
02500     }
02501     if(h->sps.transform_bypass){ //FIXME same ugly
02502         h->zigzag_scan_q0          = zigzag_scan;
02503         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02504         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02505         h->field_scan_q0           = field_scan;
02506         h->field_scan8x8_q0        = field_scan8x8;
02507         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02508     }else{
02509         h->zigzag_scan_q0          = h->zigzag_scan;
02510         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02511         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02512         h->field_scan_q0           = h->field_scan;
02513         h->field_scan8x8_q0        = h->field_scan8x8;
02514         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02515     }
02516 }
02517 
02518 static int field_end(H264Context *h, int in_setup){
02519     MpegEncContext * const s = &h->s;
02520     AVCodecContext * const avctx= s->avctx;
02521     int err = 0;
02522     s->mb_y= 0;
02523 
02524     if (!in_setup && !s->dropable)
02525         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02526                                   s->picture_structure == PICT_BOTTOM_FIELD);
02527 
02528     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02529         ff_vdpau_h264_set_reference_frames(s);
02530 
02531     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02532         if(!s->dropable) {
02533             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02534             h->prev_poc_msb= h->poc_msb;
02535             h->prev_poc_lsb= h->poc_lsb;
02536         }
02537         h->prev_frame_num_offset= h->frame_num_offset;
02538         h->prev_frame_num= h->frame_num;
02539         h->outputed_poc = h->next_outputed_poc;
02540     }
02541 
02542     if (avctx->hwaccel) {
02543         if (avctx->hwaccel->end_frame(avctx) < 0)
02544             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02545     }
02546 
02547     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02548         ff_vdpau_h264_picture_complete(s);
02549 
02550     /*
02551      * FIXME: Error handling code does not seem to support interlaced
02552      * when slices span multiple rows
02553      * The ff_er_add_slice calls don't work right for bottom
02554      * fields; they cause massive erroneous error concealing
02555      * Error marking covers both fields (top and bottom).
02556      * This causes a mismatched s->error_count
02557      * and a bad error table. Further, the error count goes to
02558      * INT_MAX when called for bottom field, because mb_y is
02559      * past end by one (callers fault) and resync_mb_y != 0
02560      * causes problems for the first MB line, too.
02561      */
02562     if (!FIELD_PICTURE)
02563         ff_er_frame_end(s);
02564 
02565     MPV_frame_end(s);
02566 
02567     h->current_slice=0;
02568 
02569     return err;
02570 }
02571 
02575 static void clone_slice(H264Context *dst, H264Context *src)
02576 {
02577     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02578     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02579     dst->s.current_picture      = src->s.current_picture;
02580     dst->s.linesize             = src->s.linesize;
02581     dst->s.uvlinesize           = src->s.uvlinesize;
02582     dst->s.first_field          = src->s.first_field;
02583 
02584     dst->prev_poc_msb           = src->prev_poc_msb;
02585     dst->prev_poc_lsb           = src->prev_poc_lsb;
02586     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02587     dst->prev_frame_num         = src->prev_frame_num;
02588     dst->short_ref_count        = src->short_ref_count;
02589 
02590     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02591     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02592     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02593     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02594 
02595     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02596     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02597 }
02598 
02606 int ff_h264_get_profile(SPS *sps)
02607 {
02608     int profile = sps->profile_idc;
02609 
02610     switch(sps->profile_idc) {
02611     case FF_PROFILE_H264_BASELINE:
02612         // constraint_set1_flag set to 1
02613         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02614         break;
02615     case FF_PROFILE_H264_HIGH_10:
02616     case FF_PROFILE_H264_HIGH_422:
02617     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02618         // constraint_set3_flag set to 1
02619         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02620         break;
02621     }
02622 
02623     return profile;
02624 }
02625 
02635 static int decode_slice_header(H264Context *h, H264Context *h0){
02636     MpegEncContext * const s = &h->s;
02637     MpegEncContext * const s0 = &h0->s;
02638     unsigned int first_mb_in_slice;
02639     unsigned int pps_id;
02640     int num_ref_idx_active_override_flag;
02641     unsigned int slice_type, tmp, i, j;
02642     int default_ref_list_done = 0;
02643     int last_pic_structure, last_pic_dropable;
02644 
02645     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02646     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02647         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02648         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02649     }else{
02650         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02651         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02652     }
02653 
02654     first_mb_in_slice= get_ue_golomb(&s->gb);
02655 
02656     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02657         if(h0->current_slice && FIELD_PICTURE){
02658             field_end(h, 1);
02659         }
02660 
02661         h0->current_slice = 0;
02662         if (!s0->first_field) {
02663             if (s->current_picture_ptr && !s->dropable &&
02664                 s->current_picture_ptr->owner2 == s) {
02665                 ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02666                                           s->picture_structure == PICT_BOTTOM_FIELD);
02667             }
02668             s->current_picture_ptr = NULL;
02669         }
02670     }
02671 
02672     slice_type= get_ue_golomb_31(&s->gb);
02673     if(slice_type > 9){
02674         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02675         return -1;
02676     }
02677     if(slice_type > 4){
02678         slice_type -= 5;
02679         h->slice_type_fixed=1;
02680     }else
02681         h->slice_type_fixed=0;
02682 
02683     slice_type= golomb_to_pict_type[ slice_type ];
02684     if (slice_type == AV_PICTURE_TYPE_I
02685         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02686         default_ref_list_done = 1;
02687     }
02688     h->slice_type= slice_type;
02689     h->slice_type_nos= slice_type & 3;
02690 
02691     if (h->nal_unit_type  == NAL_IDR_SLICE &&
02692         h->slice_type_nos != AV_PICTURE_TYPE_I) {
02693         av_log(h->s.avctx, AV_LOG_ERROR, "A non-intra slice in an IDR NAL unit.\n");
02694         return AVERROR_INVALIDDATA;
02695     }
02696 
02697     // to make a few old functions happy, it's wrong though
02698     s->pict_type = h->slice_type;
02699 
02700     pps_id= get_ue_golomb(&s->gb);
02701     if(pps_id>=MAX_PPS_COUNT){
02702         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02703         return -1;
02704     }
02705     if(!h0->pps_buffers[pps_id]) {
02706         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02707         return -1;
02708     }
02709     h->pps= *h0->pps_buffers[pps_id];
02710 
02711     if(!h0->sps_buffers[h->pps.sps_id]) {
02712         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02713         return -1;
02714     }
02715     h->sps = *h0->sps_buffers[h->pps.sps_id];
02716 
02717     s->avctx->profile = ff_h264_get_profile(&h->sps);
02718     s->avctx->level   = h->sps.level_idc;
02719     s->avctx->refs    = h->sps.ref_frame_count;
02720 
02721     s->mb_width= h->sps.mb_width;
02722     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02723 
02724     h->b_stride=  s->mb_width*4;
02725 
02726     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
02727 
02728     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02729     if(h->sps.frame_mbs_only_flag)
02730         s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02731     else
02732         s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02733 
02734     if (FFALIGN(s->avctx->width,  16) == s->width &&
02735         FFALIGN(s->avctx->height, 16) == s->height) {
02736         s->width  = s->avctx->width;
02737         s->height = s->avctx->height;
02738     }
02739 
02740     if (s->context_initialized
02741         && (   s->width != s->avctx->width || s->height != s->avctx->height
02742             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02743         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02744             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02745             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02746         }
02747         free_tables(h, 0);
02748         flush_dpb(s->avctx);
02749         MPV_common_end(s);
02750     }
02751     if (!s->context_initialized) {
02752         if (h != h0) {
02753             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02754             return -1;
02755         }
02756 
02757         avcodec_set_dimensions(s->avctx, s->width, s->height);
02758         s->avctx->sample_aspect_ratio= h->sps.sar;
02759         av_assert0(s->avctx->sample_aspect_ratio.den);
02760 
02761         if(h->sps.video_signal_type_present_flag){
02762             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02763             if(h->sps.colour_description_present_flag){
02764                 s->avctx->color_primaries = h->sps.color_primaries;
02765                 s->avctx->color_trc       = h->sps.color_trc;
02766                 s->avctx->colorspace      = h->sps.colorspace;
02767             }
02768         }
02769 
02770         if(h->sps.timing_info_present_flag){
02771             int64_t den= h->sps.time_scale;
02772             if(h->x264_build < 44U)
02773                 den *= 2;
02774             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02775                       h->sps.num_units_in_tick, den, 1<<30);
02776         }
02777 
02778         switch (h->sps.bit_depth_luma) {
02779             case 9 :
02780                 if (CHROMA444) {
02781                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02782                         s->avctx->pix_fmt = PIX_FMT_GBRP9;
02783                     } else
02784                         s->avctx->pix_fmt = PIX_FMT_YUV444P9;
02785                 } else if (CHROMA422)
02786                     s->avctx->pix_fmt = PIX_FMT_YUV422P9;
02787                 else
02788                     s->avctx->pix_fmt = PIX_FMT_YUV420P9;
02789                 break;
02790             case 10 :
02791                 if (CHROMA444) {
02792                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02793                         s->avctx->pix_fmt = PIX_FMT_GBRP10;
02794                     } else
02795                         s->avctx->pix_fmt = PIX_FMT_YUV444P10;
02796                 } else if (CHROMA422)
02797                     s->avctx->pix_fmt = PIX_FMT_YUV422P10;
02798                 else
02799                     s->avctx->pix_fmt = PIX_FMT_YUV420P10;
02800                 break;
02801             case 8:
02802                 if (CHROMA444){
02803                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02804                         s->avctx->pix_fmt = PIX_FMT_GBRP;
02805                     } else
02806                         s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02807                 } else if (CHROMA422) {
02808                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
02809                 }else{
02810                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02811                                                              s->avctx->codec->pix_fmts ?
02812                                                              s->avctx->codec->pix_fmts :
02813                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02814                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02815                                                              ff_hwaccel_pixfmt_list_420);
02816                 }
02817                 break;
02818             default:
02819                 av_log(s->avctx, AV_LOG_ERROR,
02820                        "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
02821                 return AVERROR_INVALIDDATA;
02822         }
02823 
02824         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02825 
02826         if (MPV_common_init(s) < 0) {
02827             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02828             return -1;
02829         }
02830         s->first_field = 0;
02831         h->prev_interlaced_frame = 1;
02832 
02833         init_scan_tables(h);
02834         if (ff_h264_alloc_tables(h) < 0) {
02835             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02836             return AVERROR(ENOMEM);
02837         }
02838 
02839         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02840             if (context_init(h) < 0) {
02841                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02842                 return -1;
02843             }
02844         } else {
02845             for(i = 1; i < s->slice_context_count; i++) {
02846                 H264Context *c;
02847                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02848                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02849                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02850                 c->h264dsp = h->h264dsp;
02851                 c->sps = h->sps;
02852                 c->pps = h->pps;
02853                 c->pixel_shift = h->pixel_shift;
02854                 init_scan_tables(c);
02855                 clone_tables(c, h, i);
02856             }
02857 
02858             for(i = 0; i < s->slice_context_count; i++)
02859                 if (context_init(h->thread_context[i]) < 0) {
02860                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02861                     return -1;
02862                 }
02863         }
02864     }
02865 
02866     if(h == h0 && h->dequant_coeff_pps != pps_id){
02867         h->dequant_coeff_pps = pps_id;
02868         init_dequant_tables(h);
02869     }
02870 
02871     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02872 
02873     h->mb_mbaff = 0;
02874     h->mb_aff_frame = 0;
02875     last_pic_structure = s0->picture_structure;
02876     last_pic_dropable  = s0->dropable;
02877     s->dropable        = h->nal_ref_idc == 0;
02878     if(h->sps.frame_mbs_only_flag){
02879         s->picture_structure= PICT_FRAME;
02880     }else{
02881         if(get_bits1(&s->gb)) { //field_pic_flag
02882             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02883         } else {
02884             s->picture_structure= PICT_FRAME;
02885             h->mb_aff_frame = h->sps.mb_aff;
02886         }
02887     }
02888     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02889 
02890     if (h0->current_slice != 0) {
02891         if (last_pic_structure != s->picture_structure ||
02892             last_pic_dropable  != s->dropable) {
02893             av_log(h->s.avctx, AV_LOG_ERROR,
02894                    "Changing field mode (%d -> %d) between slices is not allowed\n",
02895                    last_pic_structure, s->picture_structure);
02896             s->picture_structure = last_pic_structure;
02897             s->dropable          = last_pic_dropable;
02898             return AVERROR_INVALIDDATA;
02899         } else if (!s->current_picture_ptr) {
02900             av_log(s->avctx, AV_LOG_ERROR,
02901                    "unset current_picture_ptr on %d. slice\n",
02902                    h0->current_slice + 1);
02903             return AVERROR_INVALIDDATA;
02904         }
02905     } else {
02906         /* Shorten frame num gaps so we don't have to allocate reference
02907          * frames just to throw them away */
02908         if (h->frame_num != h->prev_frame_num) {
02909             int unwrap_prev_frame_num = h->prev_frame_num;
02910             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
02911 
02912             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02913 
02914             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02915                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02916                 if (unwrap_prev_frame_num < 0)
02917                     unwrap_prev_frame_num += max_frame_num;
02918 
02919                 h->prev_frame_num = unwrap_prev_frame_num;
02920             }
02921         }
02922 
02923         /* See if we have a decoded first field looking for a pair...
02924          * Here, we're using that to see if we should mark previously
02925          * decode frames as "finished".
02926          * We have to do that before the "dummy" in-between frame allocation,
02927          * since that can modify s->current_picture_ptr. */
02928         if (s0->first_field) {
02929             assert(s0->current_picture_ptr);
02930             assert(s0->current_picture_ptr->f.data[0]);
02931             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
02932 
02933             /* Mark old field/frame as completed */
02934             if (!last_pic_dropable && s0->current_picture_ptr->owner2 == s0) {
02935                 ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02936                                           last_pic_structure == PICT_BOTTOM_FIELD);
02937             }
02938 
02939             /* figure out if we have a complementary field pair */
02940             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02941                 /* Previous field is unmatched. Don't display it, but let it
02942                  * remain for reference if marked as such. */
02943                 if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02944                     ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02945                                               last_pic_structure == PICT_TOP_FIELD);
02946                 }
02947             } else {
02948                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02949                     /* This and previous field were reference, but had
02950                      * different frame_nums. Consider this field first in
02951                      * pair. Throw away previous field except for reference
02952                      * purposes. */
02953                     if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02954                         ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02955                                                   last_pic_structure == PICT_TOP_FIELD);
02956                     }
02957                 } else {
02958                     /* Second field in complementary pair */
02959                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
02960                            s->picture_structure == PICT_BOTTOM_FIELD) ||
02961                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
02962                            s->picture_structure == PICT_TOP_FIELD))) {
02963                         av_log(s->avctx, AV_LOG_ERROR,
02964                                "Invalid field mode combination %d/%d\n",
02965                                last_pic_structure, s->picture_structure);
02966                         s->picture_structure = last_pic_structure;
02967                         s->dropable          = last_pic_dropable;
02968                         return AVERROR_INVALIDDATA;
02969                     } else if (last_pic_dropable != s->dropable) {
02970                         av_log(s->avctx, AV_LOG_ERROR,
02971                                "Cannot combine reference and non-reference fields in the same frame\n");
02972                         av_log_ask_for_sample(s->avctx, NULL);
02973                         s->picture_structure = last_pic_structure;
02974                         s->dropable          = last_pic_dropable;
02975                         return AVERROR_INVALIDDATA;
02976                     }
02977 
02978                     /* Take ownership of this buffer. Note that if another thread owned
02979                      * the first field of this buffer, we're not operating on that pointer,
02980                      * so the original thread is still responsible for reporting progress
02981                      * on that first field (or if that was us, we just did that above).
02982                      * By taking ownership, we assign responsibility to ourselves to
02983                      * report progress on the second field. */
02984                     s0->current_picture_ptr->owner2 = s0;
02985                 }
02986             }
02987         }
02988 
02989         while (h->frame_num != h->prev_frame_num &&
02990                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
02991             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02992             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02993             if (ff_h264_frame_start(h) < 0)
02994                 return -1;
02995             h->prev_frame_num++;
02996             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02997             s->current_picture_ptr->frame_num= h->prev_frame_num;
02998             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02999             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
03000             ff_generate_sliding_window_mmcos(h);
03001             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
03002                 (s->avctx->err_recognition & AV_EF_EXPLODE))
03003                 return AVERROR_INVALIDDATA;
03004             /* Error concealment: if a ref is missing, copy the previous ref in its place.
03005              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
03006              * about there being no actual duplicates.
03007              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
03008              * concealing a lost frame, this probably isn't noticeable by comparison, but it should
03009              * be fixed. */
03010             if (h->short_ref_count) {
03011                 if (prev) {
03012                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
03013                                   (const uint8_t**)prev->f.data, prev->f.linesize,
03014                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
03015                     h->short_ref[0]->poc = prev->poc+2;
03016                 }
03017                 h->short_ref[0]->frame_num = h->prev_frame_num;
03018             }
03019         }
03020 
03021         /* See if we have a decoded first field looking for a pair...
03022          * We're using that to see whether to continue decoding in that
03023          * frame, or to allocate a new one. */
03024         if (s0->first_field) {
03025             assert(s0->current_picture_ptr);
03026             assert(s0->current_picture_ptr->f.data[0]);
03027             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
03028 
03029             /* figure out if we have a complementary field pair */
03030             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
03031                 /*
03032                  * Previous field is unmatched. Don't display it, but let it
03033                  * remain for reference if marked as such.
03034                  */
03035                 s0->current_picture_ptr = NULL;
03036                 s0->first_field = FIELD_PICTURE;
03037 
03038             } else {
03039                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
03040                     /* This and the previous field had different frame_nums.
03041                      * Consider this field first in pair. Throw away previous
03042                      * one except for reference purposes. */
03043                     s0->first_field         = 1;
03044                     s0->current_picture_ptr = NULL;
03045 
03046                 } else {
03047                     /* Second field in complementary pair */
03048                     s0->first_field = 0;
03049                 }
03050             }
03051 
03052         } else {
03053             /* Frame or first field in a potentially complementary pair */
03054             assert(!s0->current_picture_ptr);
03055             s0->first_field = FIELD_PICTURE;
03056         }
03057 
03058         if(!FIELD_PICTURE || s0->first_field) {
03059             if (ff_h264_frame_start(h) < 0) {
03060                 s0->first_field = 0;
03061                 return -1;
03062             }
03063         } else {
03064             ff_release_unused_pictures(s, 0);
03065         }
03066     }
03067     if(h != h0)
03068         clone_slice(h, h0);
03069 
03070     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
03071 
03072     assert(s->mb_num == s->mb_width * s->mb_height);
03073     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
03074        first_mb_in_slice                    >= s->mb_num){
03075         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03076         return -1;
03077     }
03078     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03079     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03080     if (s->picture_structure == PICT_BOTTOM_FIELD)
03081         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03082     assert(s->mb_y < s->mb_height);
03083 
03084     if(s->picture_structure==PICT_FRAME){
03085         h->curr_pic_num=   h->frame_num;
03086         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03087     }else{
03088         h->curr_pic_num= 2*h->frame_num + 1;
03089         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03090     }
03091 
03092     if(h->nal_unit_type == NAL_IDR_SLICE){
03093         get_ue_golomb(&s->gb); /* idr_pic_id */
03094     }
03095 
03096     if(h->sps.poc_type==0){
03097         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03098 
03099         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03100             h->delta_poc_bottom= get_se_golomb(&s->gb);
03101         }
03102     }
03103 
03104     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03105         h->delta_poc[0]= get_se_golomb(&s->gb);
03106 
03107         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03108             h->delta_poc[1]= get_se_golomb(&s->gb);
03109     }
03110 
03111     init_poc(h);
03112 
03113     if(h->pps.redundant_pic_cnt_present){
03114         h->redundant_pic_count= get_ue_golomb(&s->gb);
03115     }
03116 
03117     //set defaults, might be overridden a few lines later
03118     h->ref_count[0]= h->pps.ref_count[0];
03119     h->ref_count[1]= h->pps.ref_count[1];
03120 
03121     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
03122         int max_refs = s->picture_structure == PICT_FRAME ? 16 : 32;
03123 
03124         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
03125             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03126         }
03127         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03128 
03129         if(num_ref_idx_active_override_flag){
03130             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03131             if (h->ref_count[0] < 1)
03132                 return AVERROR_INVALIDDATA;
03133             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
03134                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03135                 if (h->ref_count[1] < 1)
03136                     return AVERROR_INVALIDDATA;
03137             }
03138         }
03139 
03140         if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) {
03141             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03142             h->ref_count[0] = h->ref_count[1] = 1;
03143             return AVERROR_INVALIDDATA;
03144         }
03145 
03146         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
03147             h->list_count= 2;
03148         else
03149             h->list_count= 1;
03150     }else
03151         h->list_count= 0;
03152 
03153     if(!default_ref_list_done){
03154         ff_h264_fill_default_ref_list(h);
03155     }
03156 
03157     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
03158         h->ref_count[1]= h->ref_count[0]= 0;
03159         return -1;
03160     }
03161 
03162     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
03163         s->last_picture_ptr= &h->ref_list[0][0];
03164         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03165     }
03166     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
03167         s->next_picture_ptr= &h->ref_list[1][0];
03168         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03169     }
03170 
03171     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
03172        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
03173         pred_weight_table(h);
03174     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03175         implicit_weight_table(h, -1);
03176     }else {
03177         h->use_weight = 0;
03178         for (i = 0; i < 2; i++) {
03179             h->luma_weight_flag[i]   = 0;
03180             h->chroma_weight_flag[i] = 0;
03181         }
03182     }
03183 
03184     if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
03185        (s->avctx->err_recognition & AV_EF_EXPLODE))
03186         return AVERROR_INVALIDDATA;
03187 
03188     if(FRAME_MBAFF){
03189         ff_h264_fill_mbaff_ref_list(h);
03190 
03191         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03192             implicit_weight_table(h, 0);
03193             implicit_weight_table(h, 1);
03194         }
03195     }
03196 
03197     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
03198         ff_h264_direct_dist_scale_factor(h);
03199     ff_h264_direct_ref_list_init(h);
03200 
03201     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
03202         tmp = get_ue_golomb_31(&s->gb);
03203         if(tmp > 2){
03204             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03205             return -1;
03206         }
03207         h->cabac_init_idc= tmp;
03208     }
03209 
03210     h->last_qscale_diff = 0;
03211     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03212     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
03213         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03214         return -1;
03215     }
03216     s->qscale= tmp;
03217     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03218     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03219     //FIXME qscale / qp ... stuff
03220     if(h->slice_type == AV_PICTURE_TYPE_SP){
03221         get_bits1(&s->gb); /* sp_for_switch_flag */
03222     }
03223     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
03224         get_se_golomb(&s->gb); /* slice_qs_delta */
03225     }
03226 
03227     h->deblocking_filter = 1;
03228     h->slice_alpha_c0_offset = 52;
03229     h->slice_beta_offset = 52;
03230     if( h->pps.deblocking_filter_parameters_present ) {
03231         tmp= get_ue_golomb_31(&s->gb);
03232         if(tmp > 2){
03233             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03234             return -1;
03235         }
03236         h->deblocking_filter= tmp;
03237         if(h->deblocking_filter < 2)
03238             h->deblocking_filter^= 1; // 1<->0
03239 
03240         if( h->deblocking_filter ) {
03241             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03242             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03243             if(   h->slice_alpha_c0_offset > 104U
03244                || h->slice_beta_offset     > 104U){
03245                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03246                 return -1;
03247             }
03248         }
03249     }
03250 
03251     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03252        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03253        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03254        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03255         h->deblocking_filter= 0;
03256 
03257     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03258         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03259             /* Cheat slightly for speed:
03260                Do not bother to deblock across slices. */
03261             h->deblocking_filter = 2;
03262         } else {
03263             h0->max_contexts = 1;
03264             if(!h0->single_decode_warning) {
03265                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03266                 h0->single_decode_warning = 1;
03267             }
03268             if (h != h0) {
03269                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03270                 return 1;
03271             }
03272         }
03273     }
03274     h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
03275                  - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
03276                  + 6 * (h->sps.bit_depth_luma - 8);
03277 
03278 #if 0 //FMO
03279     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03280         slice_group_change_cycle= get_bits(&s->gb, ?);
03281 #endif
03282 
03283     h0->last_slice_type = slice_type;
03284     h->slice_num = ++h0->current_slice;
03285     if(h->slice_num >= MAX_SLICES){
03286         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
03287     }
03288 
03289     for(j=0; j<2; j++){
03290         int id_list[16];
03291         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03292         for(i=0; i<16; i++){
03293             id_list[i]= 60;
03294             if (h->ref_list[j][i].f.data[0]) {
03295                 int k;
03296                 uint8_t *base = h->ref_list[j][i].f.base[0];
03297                 for(k=0; k<h->short_ref_count; k++)
03298                     if (h->short_ref[k]->f.base[0] == base) {
03299                         id_list[i]= k;
03300                         break;
03301                     }
03302                 for(k=0; k<h->long_ref_count; k++)
03303                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
03304                         id_list[i]= h->short_ref_count + k;
03305                         break;
03306                     }
03307             }
03308         }
03309 
03310         ref2frm[0]=
03311         ref2frm[1]= -1;
03312         for(i=0; i<16; i++)
03313             ref2frm[i+2]= 4*id_list[i]
03314                           + (h->ref_list[j][i].f.reference & 3);
03315         ref2frm[18+0]=
03316         ref2frm[18+1]= -1;
03317         for(i=16; i<48; i++)
03318             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03319                           + (h->ref_list[j][i].f.reference & 3);
03320     }
03321 
03322     //FIXME: fix draw_edges+PAFF+frame threads
03323     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03324     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03325 
03326     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03327         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03328                h->slice_num,
03329                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03330                first_mb_in_slice,
03331                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03332                pps_id, h->frame_num,
03333                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03334                h->ref_count[0], h->ref_count[1],
03335                s->qscale,
03336                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03337                h->use_weight,
03338                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03339                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03340                );
03341     }
03342 
03343     return 0;
03344 }
03345 
03346 int ff_h264_get_slice_type(const H264Context *h)
03347 {
03348     switch (h->slice_type) {
03349     case AV_PICTURE_TYPE_P:  return 0;
03350     case AV_PICTURE_TYPE_B:  return 1;
03351     case AV_PICTURE_TYPE_I:  return 2;
03352     case AV_PICTURE_TYPE_SP: return 3;
03353     case AV_PICTURE_TYPE_SI: return 4;
03354     default:         return -1;
03355     }
03356 }
03357 
03358 static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
03359                                                       int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
03360 {
03361     int b_stride = h->b_stride;
03362     int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
03363     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
03364     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03365         if(USES_LIST(top_type, list)){
03366             const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
03367             const int b8_xy= 4*top_xy + 2;
03368             int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03369             AV_COPY128(mv_dst - 1*8, s->current_picture.f.motion_val[list][b_xy + 0]);
03370             ref_cache[0 - 1*8]=
03371             ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
03372             ref_cache[2 - 1*8]=
03373             ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
03374         }else{
03375             AV_ZERO128(mv_dst - 1*8);
03376             AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03377         }
03378 
03379         if(!IS_INTERLACED(mb_type^left_type[LTOP])){
03380             if(USES_LIST(left_type[LTOP], list)){
03381                 const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
03382                 const int b8_xy= 4*left_xy[LTOP] + 1;
03383                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03384                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride*0]);
03385                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride*1]);
03386                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride*2]);
03387                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride*3]);
03388                 ref_cache[-1 +  0]=
03389                 ref_cache[-1 +  8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*0]];
03390                 ref_cache[-1 + 16]=
03391                 ref_cache[-1 + 24]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*1]];
03392             }else{
03393                 AV_ZERO32(mv_dst - 1 + 0);
03394                 AV_ZERO32(mv_dst - 1 + 8);
03395                 AV_ZERO32(mv_dst - 1 +16);
03396                 AV_ZERO32(mv_dst - 1 +24);
03397                 ref_cache[-1 +  0]=
03398                 ref_cache[-1 +  8]=
03399                 ref_cache[-1 + 16]=
03400                 ref_cache[-1 + 24]= LIST_NOT_USED;
03401             }
03402         }
03403     }
03404 
03405     if(!USES_LIST(mb_type, list)){
03406         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
03407         AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03408         AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03409         AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03410         AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03411         return;
03412     }
03413 
03414     {
03415         int8_t *ref = &s->current_picture.f.ref_index[list][4*mb_xy];
03416         int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03417         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
03418         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
03419         AV_WN32A(&ref_cache[0*8], ref01);
03420         AV_WN32A(&ref_cache[1*8], ref01);
03421         AV_WN32A(&ref_cache[2*8], ref23);
03422         AV_WN32A(&ref_cache[3*8], ref23);
03423     }
03424 
03425     {
03426         int16_t (*mv_src)[2] = &s->current_picture.f.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03427         AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
03428         AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
03429         AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
03430         AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
03431     }
03432 }
03433 
03438 static int fill_filter_caches(H264Context *h, int mb_type){
03439     MpegEncContext * const s = &h->s;
03440     const int mb_xy= h->mb_xy;
03441     int top_xy, left_xy[LEFT_MBS];
03442     int top_type, left_type[LEFT_MBS];
03443     uint8_t *nnz;
03444     uint8_t *nnz_cache;
03445 
03446     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03447 
03448     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03449      * stuff, I can't imagine that these complex rules are worth it. */
03450 
03451     left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
03452     if(FRAME_MBAFF){
03453         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
03454         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03455         if(s->mb_y&1){
03456             if (left_mb_field_flag != curr_mb_field_flag) {
03457                 left_xy[LTOP] -= s->mb_stride;
03458             }
03459         }else{
03460             if(curr_mb_field_flag){
03461                 top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
03462             }
03463             if (left_mb_field_flag != curr_mb_field_flag) {
03464                 left_xy[LBOT] += s->mb_stride;
03465             }
03466         }
03467     }
03468 
03469     h->top_mb_xy = top_xy;
03470     h->left_mb_xy[LTOP] = left_xy[LTOP];
03471     h->left_mb_xy[LBOT] = left_xy[LBOT];
03472     {
03473         //for sufficiently low qp, filtering wouldn't do anything
03474         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03475         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03476         int qp = s->current_picture.f.qscale_table[mb_xy];
03477         if(qp <= qp_thresh
03478            && (left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh)
03479            && (top_xy        < 0 || ((qp + s->current_picture.f.qscale_table[top_xy       ] + 1) >> 1) <= qp_thresh)) {
03480             if(!FRAME_MBAFF)
03481                 return 1;
03482             if ((left_xy[LTOP] < 0            || ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]        ] + 1) >> 1) <= qp_thresh) &&
03483                 (top_xy        < s->mb_stride || ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
03484                 return 1;
03485         }
03486     }
03487 
03488     top_type        = s->current_picture.f.mb_type[top_xy];
03489     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
03490     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
03491     if(h->deblocking_filter == 2){
03492         if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
03493         if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
03494     }else{
03495         if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
03496         if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
03497     }
03498     h->top_type       = top_type;
03499     h->left_type[LTOP]= left_type[LTOP];
03500     h->left_type[LBOT]= left_type[LBOT];
03501 
03502     if(IS_INTRA(mb_type))
03503         return 0;
03504 
03505     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
03506     if(h->list_count == 2)
03507         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
03508 
03509     nnz = h->non_zero_count[mb_xy];
03510     nnz_cache = h->non_zero_count_cache;
03511     AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
03512     AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
03513     AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
03514     AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
03515     h->cbp= h->cbp_table[mb_xy];
03516 
03517     if(top_type){
03518         nnz = h->non_zero_count[top_xy];
03519         AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
03520     }
03521 
03522     if(left_type[LTOP]){
03523         nnz = h->non_zero_count[left_xy[LTOP]];
03524         nnz_cache[3+8*1]= nnz[3+0*4];
03525         nnz_cache[3+8*2]= nnz[3+1*4];
03526         nnz_cache[3+8*3]= nnz[3+2*4];
03527         nnz_cache[3+8*4]= nnz[3+3*4];
03528     }
03529 
03530     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03531     if(!CABAC && h->pps.transform_8x8_mode){
03532         if(IS_8x8DCT(top_type)){
03533             nnz_cache[4+8*0]=
03534             nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03535             nnz_cache[6+8*0]=
03536             nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03537         }
03538         if(IS_8x8DCT(left_type[LTOP])){
03539             nnz_cache[3+8*1]=
03540             nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
03541         }
03542         if(IS_8x8DCT(left_type[LBOT])){
03543             nnz_cache[3+8*3]=
03544             nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
03545         }
03546 
03547         if(IS_8x8DCT(mb_type)){
03548             nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
03549             nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03550 
03551             nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
03552             nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03553 
03554             nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
03555             nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03556 
03557             nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
03558             nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03559         }
03560     }
03561 
03562     return 0;
03563 }
03564 
03565 static void loop_filter(H264Context *h, int start_x, int end_x){
03566     MpegEncContext * const s = &h->s;
03567     uint8_t  *dest_y, *dest_cb, *dest_cr;
03568     int linesize, uvlinesize, mb_x, mb_y;
03569     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03570     const int old_slice_type= h->slice_type;
03571     const int pixel_shift = h->pixel_shift;
03572     const int block_h = 16 >> s->chroma_y_shift;
03573 
03574     if(h->deblocking_filter) {
03575         for(mb_x= start_x; mb_x<end_x; mb_x++){
03576             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03577                 int mb_xy, mb_type;
03578                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03579                 h->slice_num= h->slice_table[mb_xy];
03580                 mb_type = s->current_picture.f.mb_type[mb_xy];
03581                 h->list_count= h->list_counts[mb_xy];
03582 
03583                 if(FRAME_MBAFF)
03584                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03585 
03586                 s->mb_x= mb_x;
03587                 s->mb_y= mb_y;
03588                 dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03589                 dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03590                 dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03591                     //FIXME simplify above
03592 
03593                 if (MB_FIELD) {
03594                     linesize   = h->mb_linesize   = s->linesize * 2;
03595                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03596                     if(mb_y&1){ //FIXME move out of this function?
03597                         dest_y -= s->linesize*15;
03598                         dest_cb-= s->uvlinesize * (block_h - 1);
03599                         dest_cr-= s->uvlinesize * (block_h - 1);
03600                     }
03601                 } else {
03602                     linesize   = h->mb_linesize   = s->linesize;
03603                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03604                 }
03605                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
03606                 if(fill_filter_caches(h, mb_type))
03607                     continue;
03608                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
03609                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
03610 
03611                 if (FRAME_MBAFF) {
03612                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03613                 } else {
03614                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03615                 }
03616             }
03617         }
03618     }
03619     h->slice_type= old_slice_type;
03620     s->mb_x= end_x;
03621     s->mb_y= end_mb_y - FRAME_MBAFF;
03622     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03623     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03624 }
03625 
03626 static void predict_field_decoding_flag(H264Context *h){
03627     MpegEncContext * const s = &h->s;
03628     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03629     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03630                 ? s->current_picture.f.mb_type[mb_xy - 1]
03631                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03632                 ? s->current_picture.f.mb_type[mb_xy - s->mb_stride]
03633                 : 0;
03634     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03635 }
03636 
03640 static void decode_finish_row(H264Context *h){
03641     MpegEncContext * const s = &h->s;
03642     int top = 16*(s->mb_y >> FIELD_PICTURE);
03643     int height = 16 << FRAME_MBAFF;
03644     int deblock_border = (16 + 4) << FRAME_MBAFF;
03645     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03646 
03647     if (h->deblocking_filter) {
03648         if((top + height) >= pic_height)
03649             height += deblock_border;
03650 
03651         top -= deblock_border;
03652     }
03653 
03654     if (top >= pic_height || (top + height) < h->emu_edge_height)
03655         return;
03656 
03657     height = FFMIN(height, pic_height - top);
03658     if (top < h->emu_edge_height) {
03659         height = top+height;
03660         top = 0;
03661     }
03662 
03663     ff_draw_horiz_band(s, top, height);
03664 
03665     if (s->dropable) return;
03666 
03667     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03668                              s->picture_structure==PICT_BOTTOM_FIELD);
03669 }
03670 
03671 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03672     H264Context *h = *(void**)arg;
03673     MpegEncContext * const s = &h->s;
03674     const int part_mask= s->partitioned_frame ? (ER_AC_END|ER_AC_ERROR) : 0x7F;
03675     int lf_x_start = s->mb_x;
03676 
03677     s->mb_skip_run= -1;
03678 
03679     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03680                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03681 
03682     if( h->pps.cabac ) {
03683         /* realign */
03684         align_get_bits( &s->gb );
03685 
03686         /* init cabac */
03687         ff_init_cabac_states( &h->cabac);
03688         ff_init_cabac_decoder( &h->cabac,
03689                                s->gb.buffer + get_bits_count(&s->gb)/8,
03690                                (get_bits_left(&s->gb) + 7)/8);
03691 
03692         ff_h264_init_cabac_states(h);
03693 
03694         for(;;){
03695 //START_TIMER
03696             int ret = ff_h264_decode_mb_cabac(h);
03697             int eos;
03698 //STOP_TIMER("decode_mb_cabac")
03699 
03700             if(ret>=0) ff_h264_hl_decode_mb(h);
03701 
03702             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03703                 s->mb_y++;
03704 
03705                 ret = ff_h264_decode_mb_cabac(h);
03706 
03707                 if(ret>=0) ff_h264_hl_decode_mb(h);
03708                 s->mb_y--;
03709             }
03710             eos = get_cabac_terminate( &h->cabac );
03711 
03712             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03713                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03714                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03715                 return 0;
03716             }
03717             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03718                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03719                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03720                 return -1;
03721             }
03722 
03723             if( ++s->mb_x >= s->mb_width ) {
03724                 loop_filter(h, lf_x_start, s->mb_x);
03725                 s->mb_x = lf_x_start = 0;
03726                 decode_finish_row(h);
03727                 ++s->mb_y;
03728                 if(FIELD_OR_MBAFF_PICTURE) {
03729                     ++s->mb_y;
03730                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03731                         predict_field_decoding_flag(h);
03732                 }
03733             }
03734 
03735             if( eos || s->mb_y >= s->mb_height ) {
03736                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03737                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03738                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03739                 return 0;
03740             }
03741         }
03742 
03743     } else {
03744         for(;;){
03745             int ret = ff_h264_decode_mb_cavlc(h);
03746 
03747             if(ret>=0) ff_h264_hl_decode_mb(h);
03748 
03749             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03750                 s->mb_y++;
03751                 ret = ff_h264_decode_mb_cavlc(h);
03752 
03753                 if(ret>=0) ff_h264_hl_decode_mb(h);
03754                 s->mb_y--;
03755             }
03756 
03757             if(ret<0){
03758                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03759                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03760                 return -1;
03761             }
03762 
03763             if(++s->mb_x >= s->mb_width){
03764                 loop_filter(h, lf_x_start, s->mb_x);
03765                 s->mb_x = lf_x_start = 0;
03766                 decode_finish_row(h);
03767                 ++s->mb_y;
03768                 if(FIELD_OR_MBAFF_PICTURE) {
03769                     ++s->mb_y;
03770                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03771                         predict_field_decoding_flag(h);
03772                 }
03773                 if(s->mb_y >= s->mb_height){
03774                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03775 
03776                     if (get_bits_left(&s->gb) == 0) {
03777                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03778 
03779                         return 0;
03780                     } else {
03781                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
03782                                         s->mb_x - 1, s->mb_y,
03783                                         ER_MB_END & part_mask);
03784                         return -1;
03785                     }
03786                 }
03787             }
03788 
03789             if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0){
03790                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03791                 if (get_bits_left(&s->gb) == 0) {
03792                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03793                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03794 
03795                     return 0;
03796                 }else{
03797                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03798 
03799                     return -1;
03800                 }
03801             }
03802         }
03803     }
03804 }
03805 
03812 static int execute_decode_slices(H264Context *h, int context_count){
03813     MpegEncContext * const s = &h->s;
03814     AVCodecContext * const avctx= s->avctx;
03815     H264Context *hx;
03816     int i;
03817 
03818     if (s->avctx->hwaccel || s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03819         return 0;
03820     if(context_count == 1) {
03821         return decode_slice(avctx, &h);
03822     } else {
03823         for(i = 1; i < context_count; i++) {
03824             hx = h->thread_context[i];
03825             hx->s.err_recognition = avctx->err_recognition;
03826             hx->s.error_count = 0;
03827         }
03828 
03829         avctx->execute(avctx, decode_slice,
03830                        h->thread_context, NULL, context_count, sizeof(void*));
03831 
03832         /* pull back stuff from slices to master context */
03833         hx = h->thread_context[context_count - 1];
03834         s->mb_x = hx->s.mb_x;
03835         s->mb_y = hx->s.mb_y;
03836         s->dropable = hx->s.dropable;
03837         s->picture_structure = hx->s.picture_structure;
03838         for(i = 1; i < context_count; i++)
03839             h->s.error_count += h->thread_context[i]->s.error_count;
03840     }
03841 
03842     return 0;
03843 }
03844 
03845 
03846 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03847     MpegEncContext * const s = &h->s;
03848     AVCodecContext * const avctx= s->avctx;
03849     H264Context *hx; 
03850     int buf_index;
03851     int context_count;
03852     int next_avc;
03853     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03854     int nals_needed=0; 
03855     int nal_index;
03856 
03857     h->max_contexts = s->slice_context_count;
03858     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03859         h->current_slice = 0;
03860         if (!s->first_field)
03861             s->current_picture_ptr= NULL;
03862         ff_h264_reset_sei(h);
03863     }
03864 
03865     for(;pass <= 1;pass++){
03866         buf_index = 0;
03867         context_count = 0;
03868         next_avc = h->is_avc ? 0 : buf_size;
03869         nal_index = 0;
03870     for(;;){
03871         int consumed;
03872         int dst_length;
03873         int bit_length;
03874         const uint8_t *ptr;
03875         int i, nalsize = 0;
03876         int err;
03877 
03878         if(buf_index >= next_avc) {
03879             if (buf_index >= buf_size - h->nal_length_size) break;
03880             nalsize = 0;
03881             for(i = 0; i < h->nal_length_size; i++)
03882                 nalsize = (nalsize << 8) | buf[buf_index++];
03883             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03884                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03885                 break;
03886             }
03887             next_avc= buf_index + nalsize;
03888         } else {
03889             // start code prefix search
03890             for(; buf_index + 3 < next_avc; buf_index++){
03891                 // This should always succeed in the first iteration.
03892                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03893                     break;
03894             }
03895 
03896 
03897             if (buf_index + 3 >= buf_size) {
03898                 buf_index = buf_size;
03899                 break;
03900             }
03901 
03902             buf_index+=3;
03903             if(buf_index >= next_avc) continue;
03904         }
03905 
03906         hx = h->thread_context[context_count];
03907 
03908         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03909         if (ptr == NULL || dst_length < 0) {
03910             buf_index = -1;
03911             goto end;
03912         }
03913         i= buf_index + consumed;
03914         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03915            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03916             s->workaround_bugs |= FF_BUG_TRUNCATED;
03917 
03918         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03919         while(ptr[dst_length - 1] == 0 && dst_length > 0)
03920             dst_length--;
03921         }
03922         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03923 
03924         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03925             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
03926         }
03927 
03928         if (h->is_avc && (nalsize != consumed) && nalsize){
03929             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03930         }
03931 
03932         buf_index += consumed;
03933         nal_index++;
03934 
03935         if(pass == 0) {
03936             // packets can sometimes contain multiple PPS/SPS
03937             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03938             // if so, when frame threading we can't start the next thread until we've read all of them
03939             switch (hx->nal_unit_type) {
03940                 case NAL_SPS:
03941                 case NAL_PPS:
03942                     nals_needed = nal_index;
03943                     break;
03944                 case NAL_IDR_SLICE:
03945                 case NAL_SLICE:
03946                     init_get_bits(&hx->s.gb, ptr, bit_length);
03947                     if (!get_ue_golomb(&hx->s.gb))
03948                         nals_needed = nal_index;
03949             }
03950             continue;
03951         }
03952 
03953         //FIXME do not discard SEI id
03954         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03955             continue;
03956 
03957       again:
03958         err = 0;
03959         switch(hx->nal_unit_type){
03960         case NAL_IDR_SLICE:
03961             if (h->nal_unit_type != NAL_IDR_SLICE) {
03962                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03963                 buf_index = -1;
03964                 goto end;
03965             }
03966             idr(h); // FIXME ensure we don't lose some frames if there is reordering
03967         case NAL_SLICE:
03968             init_get_bits(&hx->s.gb, ptr, bit_length);
03969             hx->intra_gb_ptr=
03970             hx->inter_gb_ptr= &hx->s.gb;
03971             hx->s.data_partitioning = 0;
03972 
03973             if((err = decode_slice_header(hx, h)))
03974                break;
03975 
03976             s->current_picture_ptr->f.key_frame |=
03977                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03978                     (h->sei_recovery_frame_cnt >= 0);
03979 
03980             if (h->current_slice == 1) {
03981                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03982                     decode_postinit(h, nal_index >= nals_needed);
03983                 }
03984 
03985                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03986                     return -1;
03987                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03988                     ff_vdpau_h264_picture_start(s);
03989             }
03990 
03991             if(hx->redundant_pic_count==0
03992                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03993                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03994                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03995                && avctx->skip_frame < AVDISCARD_ALL){
03996                 if(avctx->hwaccel) {
03997                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03998                         return -1;
03999                 }else
04000                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
04001                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
04002                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
04003                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
04004                 }else
04005                     context_count++;
04006             }
04007             break;
04008         case NAL_DPA:
04009             init_get_bits(&hx->s.gb, ptr, bit_length);
04010             hx->intra_gb_ptr=
04011             hx->inter_gb_ptr= NULL;
04012 
04013             if ((err = decode_slice_header(hx, h)) < 0)
04014                 break;
04015 
04016             hx->s.data_partitioning = 1;
04017 
04018             break;
04019         case NAL_DPB:
04020             init_get_bits(&hx->intra_gb, ptr, bit_length);
04021             hx->intra_gb_ptr= &hx->intra_gb;
04022             break;
04023         case NAL_DPC:
04024             init_get_bits(&hx->inter_gb, ptr, bit_length);
04025             hx->inter_gb_ptr= &hx->inter_gb;
04026 
04027             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
04028                && s->current_picture_ptr
04029                && s->context_initialized
04030                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
04031                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
04032                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
04033                && avctx->skip_frame < AVDISCARD_ALL)
04034                 context_count++;
04035             break;
04036         case NAL_SEI:
04037             init_get_bits(&s->gb, ptr, bit_length);
04038             ff_h264_decode_sei(h);
04039             break;
04040         case NAL_SPS:
04041             init_get_bits(&s->gb, ptr, bit_length);
04042             if (ff_h264_decode_seq_parameter_set(h) < 0 &&
04043                 h->is_avc && (nalsize != consumed) && nalsize) {
04044                 av_log(h->s.avctx, AV_LOG_DEBUG, "SPS decoding failure, "
04045                        "try parsing the coomplete NAL\n");
04046                 init_get_bits(&s->gb, buf + buf_index + 1 - consumed,
04047                               8 * (nalsize - 1));
04048                 ff_h264_decode_seq_parameter_set(h);
04049             }
04050 
04051             if (s->flags & CODEC_FLAG_LOW_DELAY ||
04052                 (h->sps.bitstream_restriction_flag &&
04053                  !h->sps.num_reorder_frames)) {
04054                 if (s->avctx->has_b_frames > 1 || h->delayed_pic[0])
04055                     av_log(avctx, AV_LOG_WARNING, "Delayed frames seen "
04056                            "reenabling low delay requires a codec "
04057                            "flush.\n");
04058                 else
04059                     s->low_delay = 1;
04060             }
04061 
04062             if(avctx->has_b_frames < 2)
04063                 avctx->has_b_frames= !s->low_delay;
04064 
04065             if (h->sps.bit_depth_luma != h->sps.bit_depth_chroma) {
04066                 av_log_missing_feature(s->avctx,
04067                     "Different bit depth between chroma and luma", 1);
04068                 return AVERROR_PATCHWELCOME;
04069             }
04070 
04071             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
04072                 h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
04073                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
04074                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
04075                     h->cur_chroma_format_idc = h->sps.chroma_format_idc;
04076                     h->pixel_shift = h->sps.bit_depth_luma > 8;
04077 
04078                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04079                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04080                     s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
04081                     dsputil_init(&s->dsp, s->avctx);
04082                 } else {
04083                     av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
04084                     buf_index = -1;
04085                     goto end;
04086                 }
04087             }
04088             break;
04089         case NAL_PPS:
04090             init_get_bits(&s->gb, ptr, bit_length);
04091 
04092             ff_h264_decode_picture_parameter_set(h, bit_length);
04093 
04094             break;
04095         case NAL_AUD:
04096         case NAL_END_SEQUENCE:
04097         case NAL_END_STREAM:
04098         case NAL_FILLER_DATA:
04099         case NAL_SPS_EXT:
04100         case NAL_AUXILIARY_SLICE:
04101             break;
04102         default:
04103             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
04104         }
04105 
04106         if(context_count == h->max_contexts) {
04107             execute_decode_slices(h, context_count);
04108             context_count = 0;
04109         }
04110 
04111         if (err < 0)
04112             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
04113         else if(err == 1) {
04114             /* Slice could not be decoded in parallel mode, copy down
04115              * NAL unit stuff to context 0 and restart. Note that
04116              * rbsp_buffer is not transferred, but since we no longer
04117              * run in parallel mode this should not be an issue. */
04118             h->nal_unit_type = hx->nal_unit_type;
04119             h->nal_ref_idc   = hx->nal_ref_idc;
04120             hx = h;
04121             goto again;
04122         }
04123     }
04124     }
04125     if(context_count)
04126         execute_decode_slices(h, context_count);
04127 
04128 end:
04129     /* clean up */
04130     if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
04131         !s->dropable) {
04132         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
04133                                   s->picture_structure == PICT_BOTTOM_FIELD);
04134     }
04135 
04136     return buf_index;
04137 }
04138 
04142 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
04143         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
04144         if(pos+10>buf_size) pos=buf_size; // oops ;)
04145 
04146         return pos;
04147 }
04148 
04149 static int decode_frame(AVCodecContext *avctx,
04150                              void *data, int *data_size,
04151                              AVPacket *avpkt)
04152 {
04153     const uint8_t *buf = avpkt->data;
04154     int buf_size = avpkt->size;
04155     H264Context *h = avctx->priv_data;
04156     MpegEncContext *s = &h->s;
04157     AVFrame *pict = data;
04158     int buf_index = 0;
04159 
04160     s->flags= avctx->flags;
04161     s->flags2= avctx->flags2;
04162 
04163    /* end of stream, output what is still in the buffers */
04164  out:
04165     if (buf_size == 0) {
04166         Picture *out;
04167         int i, out_idx;
04168 
04169         s->current_picture_ptr = NULL;
04170 
04171 //FIXME factorize this with the output code below
04172         out = h->delayed_pic[0];
04173         out_idx = 0;
04174         for (i = 1; h->delayed_pic[i] && !h->delayed_pic[i]->f.key_frame && !h->delayed_pic[i]->mmco_reset; i++)
04175             if(h->delayed_pic[i]->poc < out->poc){
04176                 out = h->delayed_pic[i];
04177                 out_idx = i;
04178             }
04179 
04180         for(i=out_idx; h->delayed_pic[i]; i++)
04181             h->delayed_pic[i] = h->delayed_pic[i+1];
04182 
04183         if(out){
04184             *data_size = sizeof(AVFrame);
04185             *pict= *(AVFrame*)out;
04186         }
04187 
04188         return buf_index;
04189     }
04190 
04191     buf_index=decode_nal_units(h, buf, buf_size);
04192     if(buf_index < 0)
04193         return -1;
04194 
04195     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
04196         buf_size = 0;
04197         goto out;
04198     }
04199 
04200     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
04201         if (avctx->skip_frame >= AVDISCARD_NONREF)
04202             return 0;
04203         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
04204         return -1;
04205     }
04206 
04207     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
04208 
04209         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
04210 
04211         field_end(h, 0);
04212 
04213         if (!h->next_output_pic) {
04214             /* Wait for second field. */
04215             *data_size = 0;
04216 
04217         } else {
04218             *data_size = sizeof(AVFrame);
04219             *pict = *(AVFrame*)h->next_output_pic;
04220         }
04221     }
04222 
04223     assert(pict->data[0] || !*data_size);
04224     ff_print_debug_info(s, pict);
04225 //printf("out %d\n", (int)pict->data[0]);
04226 
04227     return get_consumed_bytes(s, buf_index, buf_size);
04228 }
04229 #if 0
04230 static inline void fill_mb_avail(H264Context *h){
04231     MpegEncContext * const s = &h->s;
04232     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04233 
04234     if(s->mb_y){
04235         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04236         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04237         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04238     }else{
04239         h->mb_avail[0]=
04240         h->mb_avail[1]=
04241         h->mb_avail[2]= 0;
04242     }
04243     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04244     h->mb_avail[4]= 1; //FIXME move out
04245     h->mb_avail[5]= 0; //FIXME move out
04246 }
04247 #endif
04248 
04249 #ifdef TEST
04250 #undef printf
04251 #undef random
04252 #define COUNT 8000
04253 #define SIZE (COUNT*40)
04254 int main(void){
04255     int i;
04256     uint8_t temp[SIZE];
04257     PutBitContext pb;
04258     GetBitContext gb;
04259     DSPContext dsp;
04260     AVCodecContext avctx;
04261 
04262     avctx.av_class = avcodec_get_class();
04263     dsputil_init(&dsp, &avctx);
04264 
04265     init_put_bits(&pb, temp, SIZE);
04266     printf("testing unsigned exp golomb\n");
04267     for(i=0; i<COUNT; i++){
04268         START_TIMER
04269         set_ue_golomb(&pb, i);
04270         STOP_TIMER("set_ue_golomb");
04271     }
04272     flush_put_bits(&pb);
04273 
04274     init_get_bits(&gb, temp, 8*SIZE);
04275     for(i=0; i<COUNT; i++){
04276         int j, s = show_bits(&gb, 24);
04277 
04278         START_TIMER
04279         j= get_ue_golomb(&gb);
04280         if(j != i){
04281             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04282 //            return -1;
04283         }
04284         STOP_TIMER("get_ue_golomb");
04285     }
04286 
04287 
04288     init_put_bits(&pb, temp, SIZE);
04289     printf("testing signed exp golomb\n");
04290     for(i=0; i<COUNT; i++){
04291         START_TIMER
04292         set_se_golomb(&pb, i - COUNT/2);
04293         STOP_TIMER("set_se_golomb");
04294     }
04295     flush_put_bits(&pb);
04296 
04297     init_get_bits(&gb, temp, 8*SIZE);
04298     for(i=0; i<COUNT; i++){
04299         int j, s = show_bits(&gb, 24);
04300 
04301         START_TIMER
04302         j= get_se_golomb(&gb);
04303         if(j != i - COUNT/2){
04304             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04305 //            return -1;
04306         }
04307         STOP_TIMER("get_se_golomb");
04308     }
04309 
04310     printf("Testing RBSP\n");
04311 
04312 
04313     return 0;
04314 }
04315 #endif /* TEST */
04316 
04317 
04318 av_cold void ff_h264_free_context(H264Context *h)
04319 {
04320     int i;
04321 
04322     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04323 
04324     for(i = 0; i < MAX_SPS_COUNT; i++)
04325         av_freep(h->sps_buffers + i);
04326 
04327     for(i = 0; i < MAX_PPS_COUNT; i++)
04328         av_freep(h->pps_buffers + i);
04329 }
04330 
04331 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04332 {
04333     H264Context *h = avctx->priv_data;
04334     MpegEncContext *s = &h->s;
04335 
04336     ff_h264_free_context(h);
04337 
04338     MPV_common_end(s);
04339 
04340 //    memset(h, 0, sizeof(H264Context));
04341 
04342     return 0;
04343 }
04344 
04345 static const AVProfile profiles[] = {
04346     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04347     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04348     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04349     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04350     { FF_PROFILE_H264_HIGH,                 "High"                  },
04351     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04352     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04353     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04354     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04355     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04356     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04357     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04358     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04359     { FF_PROFILE_UNKNOWN },
04360 };
04361 
04362 AVCodec ff_h264_decoder = {
04363     .name           = "h264",
04364     .type           = AVMEDIA_TYPE_VIDEO,
04365     .id             = CODEC_ID_H264,
04366     .priv_data_size = sizeof(H264Context),
04367     .init           = ff_h264_decode_init,
04368     .close          = ff_h264_decode_end,
04369     .decode         = decode_frame,
04370     .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04371                       CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04372     .flush= flush_dpb,
04373     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04374     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04375     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04376     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04377 };
04378 
04379 #if CONFIG_H264_VDPAU_DECODER
04380 AVCodec ff_h264_vdpau_decoder = {
04381     .name           = "h264_vdpau",
04382     .type           = AVMEDIA_TYPE_VIDEO,
04383     .id             = CODEC_ID_H264,
04384     .priv_data_size = sizeof(H264Context),
04385     .init           = ff_h264_decode_init,
04386     .close          = ff_h264_decode_end,
04387     .decode         = decode_frame,
04388     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04389     .flush= flush_dpb,
04390     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04391     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04392     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04393 };
04394 #endif