Libav 0.7.1
libavcodec/h264.c
Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "h264data.h"
00035 #include "h264_mvpred.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "thread.h"
00040 #include "vdpau_internal.h"
00041 #include "libavutil/avassert.h"
00042 
00043 #include "cabac.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_YUVJ420P,
00060     PIX_FMT_NONE
00061 };
00062 
00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00064     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00065 
00066     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00067     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00068     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00069     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00070 }
00071 
00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00076     MpegEncContext * const s = &h->s;
00077     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00078     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00079     int i;
00080 
00081     if(!(h->top_samples_available&0x8000)){
00082         for(i=0; i<4; i++){
00083             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00084             if(status<0){
00085                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00086                 return -1;
00087             } else if(status){
00088                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00089             }
00090         }
00091     }
00092 
00093     if((h->left_samples_available&0x8888)!=0x8888){
00094         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00095         for(i=0; i<4; i++){
00096             if(!(h->left_samples_available&mask[i])){
00097                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00098                 if(status<0){
00099                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00100                     return -1;
00101                 } else if(status){
00102                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00103                 }
00104             }
00105         }
00106     }
00107 
00108     return 0;
00109 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00110 
00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00115     MpegEncContext * const s = &h->s;
00116     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00117     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00118 
00119     if(mode > 6U) {
00120         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00121         return -1;
00122     }
00123 
00124     if(!(h->top_samples_available&0x8000)){
00125         mode= top[ mode ];
00126         if(mode<0){
00127             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00128             return -1;
00129         }
00130     }
00131 
00132     if((h->left_samples_available&0x8080) != 0x8080){
00133         mode= left[ mode ];
00134         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00135             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00136         }
00137         if(mode<0){
00138             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00139             return -1;
00140         }
00141     }
00142 
00143     return mode;
00144 }
00145 
00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00147     int i, si, di;
00148     uint8_t *dst;
00149     int bufidx;
00150 
00151 //    src[0]&0x80;                //forbidden bit
00152     h->nal_ref_idc= src[0]>>5;
00153     h->nal_unit_type= src[0]&0x1F;
00154 
00155     src++; length--;
00156 
00157 #if HAVE_FAST_UNALIGNED
00158 # if HAVE_FAST_64BIT
00159 #   define RS 7
00160     for(i=0; i+1<length; i+=9){
00161         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00162 # else
00163 #   define RS 3
00164     for(i=0; i+1<length; i+=5){
00165         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00166 # endif
00167             continue;
00168         if(i>0 && !src[i]) i--;
00169         while(src[i]) i++;
00170 #else
00171 #   define RS 0
00172     for(i=0; i+1<length; i+=2){
00173         if(src[i]) continue;
00174         if(i>0 && src[i-1]==0) i--;
00175 #endif
00176         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00177             if(src[i+2]!=3){
00178                 /* startcode, so we must be past the end */
00179                 length=i;
00180             }
00181             break;
00182         }
00183         i-= RS;
00184     }
00185 
00186     if(i>=length-1){ //no escaped 0
00187         *dst_length= length;
00188         *consumed= length+1; //+1 for the header
00189         return src;
00190     }
00191 
00192     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00193     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00194     dst= h->rbsp_buffer[bufidx];
00195 
00196     if (dst == NULL){
00197         return NULL;
00198     }
00199 
00200 //printf("decoding esc\n");
00201     memcpy(dst, src, i);
00202     si=di=i;
00203     while(si+2<length){
00204         //remove escapes (very rare 1:2^22)
00205         if(src[si+2]>3){
00206             dst[di++]= src[si++];
00207             dst[di++]= src[si++];
00208         }else if(src[si]==0 && src[si+1]==0){
00209             if(src[si+2]==3){ //escape
00210                 dst[di++]= 0;
00211                 dst[di++]= 0;
00212                 si+=3;
00213                 continue;
00214             }else //next start code
00215                 goto nsc;
00216         }
00217 
00218         dst[di++]= src[si++];
00219     }
00220     while(si<length)
00221         dst[di++]= src[si++];
00222 nsc:
00223 
00224     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00225 
00226     *dst_length= di;
00227     *consumed= si + 1;//+1 for the header
00228 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00229     return dst;
00230 }
00231 
00236 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00237     int v= *src;
00238     int r;
00239 
00240     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00241 
00242     for(r=1; r<9; r++){
00243         if(v&1) return r;
00244         v>>=1;
00245     }
00246     return 0;
00247 }
00248 
00249 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00250                                  int y_offset, int list){
00251     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00252     int filter_height= (raw_my&3) ? 2 : 0;
00253     int full_my= (raw_my>>2) + y_offset;
00254     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00255 
00256     return FFMAX(abs(top), bottom);
00257 }
00258 
00259 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00260                                int y_offset, int list0, int list1, int *nrefs){
00261     MpegEncContext * const s = &h->s;
00262     int my;
00263 
00264     y_offset += 16*(s->mb_y >> MB_FIELD);
00265 
00266     if(list0){
00267         int ref_n = h->ref_cache[0][ scan8[n] ];
00268         Picture *ref= &h->ref_list[0][ref_n];
00269 
00270         // Error resilience puts the current picture in the ref list.
00271         // Don't try to wait on these as it will cause a deadlock.
00272         // Fields can wait on each other, though.
00273         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00274            (ref->reference&3) != s->picture_structure) {
00275             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00276             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00277             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00278         }
00279     }
00280 
00281     if(list1){
00282         int ref_n = h->ref_cache[1][ scan8[n] ];
00283         Picture *ref= &h->ref_list[1][ref_n];
00284 
00285         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00286            (ref->reference&3) != s->picture_structure) {
00287             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00288             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00289             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00290         }
00291     }
00292 }
00293 
00299 static void await_references(H264Context *h){
00300     MpegEncContext * const s = &h->s;
00301     const int mb_xy= h->mb_xy;
00302     const int mb_type= s->current_picture.mb_type[mb_xy];
00303     int refs[2][48];
00304     int nrefs[2] = {0};
00305     int ref, list;
00306 
00307     memset(refs, -1, sizeof(refs));
00308 
00309     if(IS_16X16(mb_type)){
00310         get_lowest_part_y(h, refs, 0, 16, 0,
00311                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00312     }else if(IS_16X8(mb_type)){
00313         get_lowest_part_y(h, refs, 0, 8, 0,
00314                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00315         get_lowest_part_y(h, refs, 8, 8, 8,
00316                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00317     }else if(IS_8X16(mb_type)){
00318         get_lowest_part_y(h, refs, 0, 16, 0,
00319                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00320         get_lowest_part_y(h, refs, 4, 16, 0,
00321                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00322     }else{
00323         int i;
00324 
00325         assert(IS_8X8(mb_type));
00326 
00327         for(i=0; i<4; i++){
00328             const int sub_mb_type= h->sub_mb_type[i];
00329             const int n= 4*i;
00330             int y_offset= (i&2)<<2;
00331 
00332             if(IS_SUB_8X8(sub_mb_type)){
00333                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00334                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00335             }else if(IS_SUB_8X4(sub_mb_type)){
00336                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00337                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00338                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00339                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00340             }else if(IS_SUB_4X8(sub_mb_type)){
00341                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00342                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00343                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00344                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00345             }else{
00346                 int j;
00347                 assert(IS_SUB_4X4(sub_mb_type));
00348                 for(j=0; j<4; j++){
00349                     int sub_y_offset= y_offset + 2*(j&2);
00350                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00351                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00352                 }
00353             }
00354         }
00355     }
00356 
00357     for(list=h->list_count-1; list>=0; list--){
00358         for(ref=0; ref<48 && nrefs[list]; ref++){
00359             int row = refs[list][ref];
00360             if(row >= 0){
00361                 Picture *ref_pic = &h->ref_list[list][ref];
00362                 int ref_field = ref_pic->reference - 1;
00363                 int ref_field_picture = ref_pic->field_picture;
00364                 int pic_height = 16*s->mb_height >> ref_field_picture;
00365 
00366                 row <<= MB_MBAFF;
00367                 nrefs[list]--;
00368 
00369                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00370                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00371                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00372                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00373                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00374                 }else if(FIELD_PICTURE){
00375                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00376                 }else{
00377                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00378                 }
00379             }
00380         }
00381     }
00382 }
00383 
00384 #if 0
00385 
00389 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00390 //    const int qmul= dequant_coeff[qp][0];
00391     int i;
00392     int temp[16]; //FIXME check if this is a good idea
00393     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00394     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00395 
00396     for(i=0; i<4; i++){
00397         const int offset= y_offset[i];
00398         const int z0= block[offset+stride*0] + block[offset+stride*4];
00399         const int z1= block[offset+stride*0] - block[offset+stride*4];
00400         const int z2= block[offset+stride*1] - block[offset+stride*5];
00401         const int z3= block[offset+stride*1] + block[offset+stride*5];
00402 
00403         temp[4*i+0]= z0+z3;
00404         temp[4*i+1]= z1+z2;
00405         temp[4*i+2]= z1-z2;
00406         temp[4*i+3]= z0-z3;
00407     }
00408 
00409     for(i=0; i<4; i++){
00410         const int offset= x_offset[i];
00411         const int z0= temp[4*0+i] + temp[4*2+i];
00412         const int z1= temp[4*0+i] - temp[4*2+i];
00413         const int z2= temp[4*1+i] - temp[4*3+i];
00414         const int z3= temp[4*1+i] + temp[4*3+i];
00415 
00416         block[stride*0 +offset]= (z0 + z3)>>1;
00417         block[stride*2 +offset]= (z1 + z2)>>1;
00418         block[stride*8 +offset]= (z1 - z2)>>1;
00419         block[stride*10+offset]= (z0 - z3)>>1;
00420     }
00421 }
00422 #endif
00423 
00424 #undef xStride
00425 #undef stride
00426 
00427 #if 0
00428 static void chroma_dc_dct_c(DCTELEM *block){
00429     const int stride= 16*2;
00430     const int xStride= 16;
00431     int a,b,c,d,e;
00432 
00433     a= block[stride*0 + xStride*0];
00434     b= block[stride*0 + xStride*1];
00435     c= block[stride*1 + xStride*0];
00436     d= block[stride*1 + xStride*1];
00437 
00438     e= a-b;
00439     a= a+b;
00440     b= c-d;
00441     c= c+d;
00442 
00443     block[stride*0 + xStride*0]= (a+c);
00444     block[stride*0 + xStride*1]= (e+b);
00445     block[stride*1 + xStride*0]= (a-c);
00446     block[stride*1 + xStride*1]= (e-b);
00447 }
00448 #endif
00449 
00450 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00451                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00452                            int src_x_offset, int src_y_offset,
00453                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00454                            int pixel_shift, int chroma444){
00455     MpegEncContext * const s = &h->s;
00456     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00457     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00458     const int luma_xy= (mx&3) + ((my&3)<<2);
00459     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00460     uint8_t * src_y = pic->data[0] + offset;
00461     uint8_t * src_cb, * src_cr;
00462     int extra_width= h->emu_edge_width;
00463     int extra_height= h->emu_edge_height;
00464     int emu=0;
00465     const int full_mx= mx>>2;
00466     const int full_my= my>>2;
00467     const int pic_width  = 16*s->mb_width;
00468     const int pic_height = 16*s->mb_height >> MB_FIELD;
00469 
00470     if(mx&7) extra_width -= 3;
00471     if(my&7) extra_height -= 3;
00472 
00473     if(   full_mx < 0-extra_width
00474        || full_my < 0-extra_height
00475        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00476        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00477         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00478             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00479         emu=1;
00480     }
00481 
00482     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00483     if(!square){
00484         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00485     }
00486 
00487     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00488 
00489     if(chroma444){
00490         src_cb = pic->data[1] + offset;
00491         if(emu){
00492             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00493                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00494             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00495         }
00496         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00497         if(!square){
00498             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00499         }
00500 
00501         src_cr = pic->data[2] + offset;
00502         if(emu){
00503             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00504                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00505             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00506         }
00507         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00508         if(!square){
00509             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00510         }
00511         return;
00512     }
00513 
00514     if(MB_FIELD){
00515         // chroma offset when predicting from a field of opposite parity
00516         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00517         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00518     }
00519     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00520     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00521 
00522     if(emu){
00523         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00524             src_cb= s->edge_emu_buffer;
00525     }
00526     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00527 
00528     if(emu){
00529         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00530             src_cr= s->edge_emu_buffer;
00531     }
00532     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00533 }
00534 
00535 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00536                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00537                            int x_offset, int y_offset,
00538                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00539                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00540                            int list0, int list1, int pixel_shift, int chroma444){
00541     MpegEncContext * const s = &h->s;
00542     qpel_mc_func *qpix_op=  qpix_put;
00543     h264_chroma_mc_func chroma_op= chroma_put;
00544 
00545     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00546     if(chroma444){
00547         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00548         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00549     }else{
00550         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00551         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00552     }
00553     x_offset += 8*s->mb_x;
00554     y_offset += 8*(s->mb_y >> MB_FIELD);
00555 
00556     if(list0){
00557         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00558         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00559                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00560                            qpix_op, chroma_op, pixel_shift, chroma444);
00561 
00562         qpix_op=  qpix_avg;
00563         chroma_op= chroma_avg;
00564     }
00565 
00566     if(list1){
00567         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00568         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00569                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00570                            qpix_op, chroma_op, pixel_shift, chroma444);
00571     }
00572 }
00573 
00574 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00575                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00576                            int x_offset, int y_offset,
00577                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00578                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00579                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00580                            int list0, int list1, int pixel_shift, int chroma444){
00581     MpegEncContext * const s = &h->s;
00582 
00583     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00584     if(chroma444){
00585         chroma_weight_avg = luma_weight_avg;
00586         chroma_weight_op = luma_weight_op;
00587         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00588         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00589     }else{
00590         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00591         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00592     }
00593     x_offset += 8*s->mb_x;
00594     y_offset += 8*(s->mb_y >> MB_FIELD);
00595 
00596     if(list0 && list1){
00597         /* don't optimize for luma-only case, since B-frames usually
00598          * use implicit weights => chroma too. */
00599         uint8_t *tmp_cb = s->obmc_scratchpad;
00600         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00601         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00602         int refn0 = h->ref_cache[0][ scan8[n] ];
00603         int refn1 = h->ref_cache[1][ scan8[n] ];
00604 
00605         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00606                     dest_y, dest_cb, dest_cr,
00607                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00608         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00609                     tmp_y, tmp_cb, tmp_cr,
00610                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00611 
00612         if(h->use_weight == 2){
00613             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00614             int weight1 = 64 - weight0;
00615             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00616             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00617             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00618         }else{
00619             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00620                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00621                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00622             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00623                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00624                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00625             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00626                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00627                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00628         }
00629     }else{
00630         int list = list1 ? 1 : 0;
00631         int refn = h->ref_cache[list][ scan8[n] ];
00632         Picture *ref= &h->ref_list[list][refn];
00633         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00634                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00635                     qpix_put, chroma_put, pixel_shift, chroma444);
00636 
00637         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00638                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00639         if(h->use_weight_chroma){
00640             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00641                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00642             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00643                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00644         }
00645     }
00646 }
00647 
00648 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00649                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00650                            int x_offset, int y_offset,
00651                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00652                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00653                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00654                            int list0, int list1, int pixel_shift, int chroma444){
00655     if((h->use_weight==2 && list0 && list1
00656         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00657        || h->use_weight==1)
00658         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00659                          x_offset, y_offset, qpix_put, chroma_put,
00660                          weight_op[0], weight_op[3], weight_avg[0],
00661                          weight_avg[3], list0, list1, pixel_shift, chroma444);
00662     else
00663         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00664                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00665                     chroma_avg, list0, list1, pixel_shift, chroma444);
00666 }
00667 
00668 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
00669     /* fetch pixels for estimated mv 4 macroblocks ahead
00670      * optimized for 64byte cache lines */
00671     MpegEncContext * const s = &h->s;
00672     const int refn = h->ref_cache[list][scan8[0]];
00673     if(refn >= 0){
00674         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00675         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00676         uint8_t **src= h->ref_list[list][refn].data;
00677         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00678         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00679         if(chroma444){
00680             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00681             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00682         }else{
00683             off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
00684             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00685         }
00686     }
00687 }
00688 
00689 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00690                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00691                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00692                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00693                       int pixel_shift, int chroma444){
00694     MpegEncContext * const s = &h->s;
00695     const int mb_xy= h->mb_xy;
00696     const int mb_type= s->current_picture.mb_type[mb_xy];
00697 
00698     assert(IS_INTER(mb_type));
00699 
00700     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00701         await_references(h);
00702     prefetch_motion(h, 0, pixel_shift, chroma444);
00703 
00704     if(IS_16X16(mb_type)){
00705         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00706                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00707                 weight_op, weight_avg,
00708                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00709                 pixel_shift, chroma444);
00710     }else if(IS_16X8(mb_type)){
00711         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00712                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00713                 &weight_op[1], &weight_avg[1],
00714                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00715                 pixel_shift, chroma444);
00716         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00717                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00718                 &weight_op[1], &weight_avg[1],
00719                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00720                 pixel_shift, chroma444);
00721     }else if(IS_8X16(mb_type)){
00722         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00723                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00724                 &weight_op[2], &weight_avg[2],
00725                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00726                 pixel_shift, chroma444);
00727         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00728                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00729                 &weight_op[2], &weight_avg[2],
00730                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00731                 pixel_shift, chroma444);
00732     }else{
00733         int i;
00734 
00735         assert(IS_8X8(mb_type));
00736 
00737         for(i=0; i<4; i++){
00738             const int sub_mb_type= h->sub_mb_type[i];
00739             const int n= 4*i;
00740             int x_offset= (i&1)<<2;
00741             int y_offset= (i&2)<<1;
00742 
00743             if(IS_SUB_8X8(sub_mb_type)){
00744                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00745                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00746                     &weight_op[3], &weight_avg[3],
00747                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00748                     pixel_shift, chroma444);
00749             }else if(IS_SUB_8X4(sub_mb_type)){
00750                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00751                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00752                     &weight_op[4], &weight_avg[4],
00753                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00754                     pixel_shift, chroma444);
00755                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00756                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00757                     &weight_op[4], &weight_avg[4],
00758                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00759                     pixel_shift, chroma444);
00760             }else if(IS_SUB_4X8(sub_mb_type)){
00761                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00762                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00763                     &weight_op[5], &weight_avg[5],
00764                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00765                     pixel_shift, chroma444);
00766                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00767                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00768                     &weight_op[5], &weight_avg[5],
00769                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00770                     pixel_shift, chroma444);
00771             }else{
00772                 int j;
00773                 assert(IS_SUB_4X4(sub_mb_type));
00774                 for(j=0; j<4; j++){
00775                     int sub_x_offset= x_offset + 2*(j&1);
00776                     int sub_y_offset= y_offset +   (j&2);
00777                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00778                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00779                         &weight_op[6], &weight_avg[6],
00780                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00781                         pixel_shift, chroma444);
00782                 }
00783             }
00784         }
00785     }
00786 
00787     prefetch_motion(h, 1, pixel_shift, chroma444);
00788 }
00789 
00790 #define hl_motion_fn(sh, bits) \
00791 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
00792                                        uint8_t *dest_y, \
00793                                        uint8_t *dest_cb, uint8_t *dest_cr, \
00794                                        qpel_mc_func (*qpix_put)[16], \
00795                                        h264_chroma_mc_func (*chroma_put), \
00796                                        qpel_mc_func (*qpix_avg)[16], \
00797                                        h264_chroma_mc_func (*chroma_avg), \
00798                                        h264_weight_func *weight_op, \
00799                                        h264_biweight_func *weight_avg, \
00800                                        int chroma444) \
00801 { \
00802     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
00803               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
00804 }
00805 hl_motion_fn(0, 8);
00806 hl_motion_fn(1, 16);
00807 
00808 static void free_tables(H264Context *h, int free_rbsp){
00809     int i;
00810     H264Context *hx;
00811 
00812     av_freep(&h->intra4x4_pred_mode);
00813     av_freep(&h->chroma_pred_mode_table);
00814     av_freep(&h->cbp_table);
00815     av_freep(&h->mvd_table[0]);
00816     av_freep(&h->mvd_table[1]);
00817     av_freep(&h->direct_table);
00818     av_freep(&h->non_zero_count);
00819     av_freep(&h->slice_table_base);
00820     h->slice_table= NULL;
00821     av_freep(&h->list_counts);
00822 
00823     av_freep(&h->mb2b_xy);
00824     av_freep(&h->mb2br_xy);
00825 
00826     for(i = 0; i < MAX_THREADS; i++) {
00827         hx = h->thread_context[i];
00828         if(!hx) continue;
00829         av_freep(&hx->top_borders[1]);
00830         av_freep(&hx->top_borders[0]);
00831         av_freep(&hx->s.obmc_scratchpad);
00832         if (free_rbsp){
00833             av_freep(&hx->rbsp_buffer[1]);
00834             av_freep(&hx->rbsp_buffer[0]);
00835             hx->rbsp_buffer_size[0] = 0;
00836             hx->rbsp_buffer_size[1] = 0;
00837         }
00838         if (i) av_freep(&h->thread_context[i]);
00839     }
00840 }
00841 
00842 static void init_dequant8_coeff_table(H264Context *h){
00843     int i,j,q,x;
00844     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00845 
00846     for(i=0; i<6; i++ ){
00847         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00848         for(j=0; j<i; j++){
00849             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00850                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00851                 break;
00852             }
00853         }
00854         if(j<i)
00855             continue;
00856 
00857         for(q=0; q<max_qp+1; q++){
00858             int shift = div6[q];
00859             int idx = rem6[q];
00860             for(x=0; x<64; x++)
00861                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00862                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00863                     h->pps.scaling_matrix8[i][x]) << shift;
00864         }
00865     }
00866 }
00867 
00868 static void init_dequant4_coeff_table(H264Context *h){
00869     int i,j,q,x;
00870     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00871     for(i=0; i<6; i++ ){
00872         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00873         for(j=0; j<i; j++){
00874             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00875                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00876                 break;
00877             }
00878         }
00879         if(j<i)
00880             continue;
00881 
00882         for(q=0; q<max_qp+1; q++){
00883             int shift = div6[q] + 2;
00884             int idx = rem6[q];
00885             for(x=0; x<16; x++)
00886                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00887                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00888                     h->pps.scaling_matrix4[i][x]) << shift;
00889         }
00890     }
00891 }
00892 
00893 static void init_dequant_tables(H264Context *h){
00894     int i,x;
00895     init_dequant4_coeff_table(h);
00896     if(h->pps.transform_8x8_mode)
00897         init_dequant8_coeff_table(h);
00898     if(h->sps.transform_bypass){
00899         for(i=0; i<6; i++)
00900             for(x=0; x<16; x++)
00901                 h->dequant4_coeff[i][0][x] = 1<<6;
00902         if(h->pps.transform_8x8_mode)
00903             for(i=0; i<6; i++)
00904                 for(x=0; x<64; x++)
00905                     h->dequant8_coeff[i][0][x] = 1<<6;
00906     }
00907 }
00908 
00909 
00910 int ff_h264_alloc_tables(H264Context *h){
00911     MpegEncContext * const s = &h->s;
00912     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00913     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00914     int x,y;
00915 
00916     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00917 
00918     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00919     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00920     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00921 
00922     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00923     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00924     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00925     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00926     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00927 
00928     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00929     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00930 
00931     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00932     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00933     for(y=0; y<s->mb_height; y++){
00934         for(x=0; x<s->mb_width; x++){
00935             const int mb_xy= x + y*s->mb_stride;
00936             const int b_xy = 4*x + 4*y*h->b_stride;
00937 
00938             h->mb2b_xy [mb_xy]= b_xy;
00939             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00940         }
00941     }
00942 
00943     s->obmc_scratchpad = NULL;
00944 
00945     if(!h->dequant4_coeff[0])
00946         init_dequant_tables(h);
00947 
00948     return 0;
00949 fail:
00950     free_tables(h, 1);
00951     return -1;
00952 }
00953 
00957 static void clone_tables(H264Context *dst, H264Context *src, int i){
00958     MpegEncContext * const s = &src->s;
00959     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00960     dst->non_zero_count           = src->non_zero_count;
00961     dst->slice_table              = src->slice_table;
00962     dst->cbp_table                = src->cbp_table;
00963     dst->mb2b_xy                  = src->mb2b_xy;
00964     dst->mb2br_xy                 = src->mb2br_xy;
00965     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00966     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00967     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00968     dst->direct_table             = src->direct_table;
00969     dst->list_counts              = src->list_counts;
00970 
00971     dst->s.obmc_scratchpad = NULL;
00972     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
00973 }
00974 
00979 static int context_init(H264Context *h){
00980     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00981     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00982 
00983     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00984     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00985 
00986     return 0;
00987 fail:
00988     return -1; // free_tables will clean up for us
00989 }
00990 
00991 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
00992 
00993 static av_cold void common_init(H264Context *h){
00994     MpegEncContext * const s = &h->s;
00995 
00996     s->width = s->avctx->width;
00997     s->height = s->avctx->height;
00998     s->codec_id= s->avctx->codec->id;
00999 
01000     ff_h264dsp_init(&h->h264dsp, 8);
01001     ff_h264_pred_init(&h->hpc, s->codec_id, 8);
01002 
01003     h->dequant_coeff_pps= -1;
01004     s->unrestricted_mv=1;
01005     s->decode=1; //FIXME
01006 
01007     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01008 
01009     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01010     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01011 }
01012 
01013 int ff_h264_decode_extradata(H264Context *h)
01014 {
01015     AVCodecContext *avctx = h->s.avctx;
01016 
01017     if(avctx->extradata[0] == 1){
01018         int i, cnt, nalsize;
01019         unsigned char *p = avctx->extradata;
01020 
01021         h->is_avc = 1;
01022 
01023         if(avctx->extradata_size < 7) {
01024             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01025             return -1;
01026         }
01027         /* sps and pps in the avcC always have length coded with 2 bytes,
01028            so put a fake nal_length_size = 2 while parsing them */
01029         h->nal_length_size = 2;
01030         // Decode sps from avcC
01031         cnt = *(p+5) & 0x1f; // Number of sps
01032         p += 6;
01033         for (i = 0; i < cnt; i++) {
01034             nalsize = AV_RB16(p) + 2;
01035             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01036                 return -1;
01037             if(decode_nal_units(h, p, nalsize) < 0) {
01038                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01039                 return -1;
01040             }
01041             p += nalsize;
01042         }
01043         // Decode pps from avcC
01044         cnt = *(p++); // Number of pps
01045         for (i = 0; i < cnt; i++) {
01046             nalsize = AV_RB16(p) + 2;
01047             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01048                 return -1;
01049             if (decode_nal_units(h, p, nalsize) < 0) {
01050                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01051                 return -1;
01052             }
01053             p += nalsize;
01054         }
01055         // Now store right nal length size, that will be use to parse all other nals
01056         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
01057     } else {
01058         h->is_avc = 0;
01059         if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
01060             return -1;
01061     }
01062     return 0;
01063 }
01064 
01065 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01066     H264Context *h= avctx->priv_data;
01067     MpegEncContext * const s = &h->s;
01068 
01069     MPV_decode_defaults(s);
01070 
01071     s->avctx = avctx;
01072     common_init(h);
01073 
01074     s->out_format = FMT_H264;
01075     s->workaround_bugs= avctx->workaround_bugs;
01076 
01077     // set defaults
01078 //    s->decode_mb= ff_h263_decode_mb;
01079     s->quarter_sample = 1;
01080     if(!avctx->has_b_frames)
01081     s->low_delay= 1;
01082 
01083     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01084 
01085     ff_h264_decode_init_vlc();
01086 
01087     h->pixel_shift = 0;
01088     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01089 
01090     h->thread_context[0] = h;
01091     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01092     h->prev_poc_msb= 1<<16;
01093     h->x264_build = -1;
01094     ff_h264_reset_sei(h);
01095     if(avctx->codec_id == CODEC_ID_H264){
01096         if(avctx->ticks_per_frame == 1){
01097             s->avctx->time_base.den *=2;
01098         }
01099         avctx->ticks_per_frame = 2;
01100     }
01101 
01102     if(avctx->extradata_size > 0 && avctx->extradata &&
01103         ff_h264_decode_extradata(h))
01104         return -1;
01105 
01106     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01107         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01108         s->low_delay = 0;
01109     }
01110 
01111     return 0;
01112 }
01113 
01114 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01115 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01116 {
01117     int i;
01118 
01119     for (i=0; i<count; i++){
01120         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01121                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01122                 !from[i]));
01123         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01124     }
01125 }
01126 
01127 static void copy_parameter_set(void **to, void **from, int count, int size)
01128 {
01129     int i;
01130 
01131     for (i=0; i<count; i++){
01132         if (to[i] && !from[i]) av_freep(&to[i]);
01133         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01134 
01135         if (from[i]) memcpy(to[i], from[i], size);
01136     }
01137 }
01138 
01139 static int decode_init_thread_copy(AVCodecContext *avctx){
01140     H264Context *h= avctx->priv_data;
01141 
01142     if (!avctx->is_copy) return 0;
01143     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01144     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01145 
01146     return 0;
01147 }
01148 
01149 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01150 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01151     H264Context *h= dst->priv_data, *h1= src->priv_data;
01152     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01153     int inited = s->context_initialized, err;
01154     int i;
01155 
01156     if(dst == src || !s1->context_initialized) return 0;
01157 
01158     err = ff_mpeg_update_thread_context(dst, src);
01159     if(err) return err;
01160 
01161     //FIXME handle width/height changing
01162     if(!inited){
01163         for(i = 0; i < MAX_SPS_COUNT; i++)
01164             av_freep(h->sps_buffers + i);
01165 
01166         for(i = 0; i < MAX_PPS_COUNT; i++)
01167             av_freep(h->pps_buffers + i);
01168 
01169         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01170         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01171         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01172         if (ff_h264_alloc_tables(h) < 0) {
01173             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01174             return AVERROR(ENOMEM);
01175         }
01176         context_init(h);
01177 
01178         for(i=0; i<2; i++){
01179             h->rbsp_buffer[i] = NULL;
01180             h->rbsp_buffer_size[i] = 0;
01181         }
01182 
01183         h->thread_context[0] = h;
01184 
01185         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01186         // so this has to be allocated here
01187         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01188 
01189         s->dsp.clear_blocks(h->mb);
01190         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01191     }
01192 
01193     //extradata/NAL handling
01194     h->is_avc          = h1->is_avc;
01195 
01196     //SPS/PPS
01197     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01198     h->sps             = h1->sps;
01199     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01200     h->pps             = h1->pps;
01201 
01202     //Dequantization matrices
01203     //FIXME these are big - can they be only copied when PPS changes?
01204     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01205 
01206     for(i=0; i<6; i++)
01207         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01208 
01209     for(i=0; i<6; i++)
01210         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01211 
01212     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01213 
01214     //POC timing
01215     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01216 
01217     //reference lists
01218     copy_fields(h, h1, ref_count, list_count);
01219     copy_fields(h, h1, ref_list,  intra_gb);
01220     copy_fields(h, h1, short_ref, cabac_init_idc);
01221 
01222     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01223     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01224     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01225 
01226     h->last_slice_type = h1->last_slice_type;
01227 
01228     if(!s->current_picture_ptr) return 0;
01229 
01230     if(!s->dropable) {
01231         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01232         h->prev_poc_msb     = h->poc_msb;
01233         h->prev_poc_lsb     = h->poc_lsb;
01234     }
01235     h->prev_frame_num_offset= h->frame_num_offset;
01236     h->prev_frame_num       = h->frame_num;
01237     h->outputed_poc         = h->next_outputed_poc;
01238 
01239     return 0;
01240 }
01241 
01242 int ff_h264_frame_start(H264Context *h){
01243     MpegEncContext * const s = &h->s;
01244     int i;
01245     const int pixel_shift = h->pixel_shift;
01246     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01247 
01248     if(MPV_frame_start(s, s->avctx) < 0)
01249         return -1;
01250     ff_er_frame_start(s);
01251     /*
01252      * MPV_frame_start uses pict_type to derive key_frame.
01253      * This is incorrect for H.264; IDR markings must be used.
01254      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01255      * See decode_nal_units().
01256      */
01257     s->current_picture_ptr->key_frame= 0;
01258     s->current_picture_ptr->mmco_reset= 0;
01259 
01260     assert(s->linesize && s->uvlinesize);
01261 
01262     for(i=0; i<16; i++){
01263         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01264         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01265     }
01266     for(i=0; i<16; i++){
01267         h->block_offset[16+i]=
01268         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01269         h->block_offset[48+16+i]=
01270         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01271     }
01272 
01273     /* can't be in alloc_tables because linesize isn't known there.
01274      * FIXME: redo bipred weight to not require extra buffer? */
01275     for(i = 0; i < thread_count; i++)
01276         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01277             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01278 
01279     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01280     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01281 
01282 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
01283 
01284     // We mark the current picture as non-reference after allocating it, so
01285     // that if we break out due to an error it can be released automatically
01286     // in the next MPV_frame_start().
01287     // SVQ3 as well as most other codecs have only last/next/current and thus
01288     // get released even with set reference, besides SVQ3 and others do not
01289     // mark frames as reference later "naturally".
01290     if(s->codec_id != CODEC_ID_SVQ3)
01291         s->current_picture_ptr->reference= 0;
01292 
01293     s->current_picture_ptr->field_poc[0]=
01294     s->current_picture_ptr->field_poc[1]= INT_MAX;
01295 
01296     h->next_output_pic = NULL;
01297 
01298     assert(s->current_picture_ptr->long_ref==0);
01299 
01300     return 0;
01301 }
01302 
01311 static void decode_postinit(H264Context *h, int setup_finished){
01312     MpegEncContext * const s = &h->s;
01313     Picture *out = s->current_picture_ptr;
01314     Picture *cur = s->current_picture_ptr;
01315     int i, pics, out_of_order, out_idx;
01316 
01317     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01318     s->current_picture_ptr->pict_type= s->pict_type;
01319 
01320     if (h->next_output_pic) return;
01321 
01322     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01323         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01324         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01325         //to find this yet, so we assume the worst for now.
01326         //if (setup_finished)
01327         //    ff_thread_finish_setup(s->avctx);
01328         return;
01329     }
01330 
01331     cur->interlaced_frame = 0;
01332     cur->repeat_pict = 0;
01333 
01334     /* Signal interlacing information externally. */
01335     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01336 
01337     if(h->sps.pic_struct_present_flag){
01338         switch (h->sei_pic_struct)
01339         {
01340         case SEI_PIC_STRUCT_FRAME:
01341             break;
01342         case SEI_PIC_STRUCT_TOP_FIELD:
01343         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01344             cur->interlaced_frame = 1;
01345             break;
01346         case SEI_PIC_STRUCT_TOP_BOTTOM:
01347         case SEI_PIC_STRUCT_BOTTOM_TOP:
01348             if (FIELD_OR_MBAFF_PICTURE)
01349                 cur->interlaced_frame = 1;
01350             else
01351                 // try to flag soft telecine progressive
01352                 cur->interlaced_frame = h->prev_interlaced_frame;
01353             break;
01354         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01355         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01356             // Signal the possibility of telecined film externally (pic_struct 5,6)
01357             // From these hints, let the applications decide if they apply deinterlacing.
01358             cur->repeat_pict = 1;
01359             break;
01360         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01361             // Force progressive here, as doubling interlaced frame is a bad idea.
01362             cur->repeat_pict = 2;
01363             break;
01364         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01365             cur->repeat_pict = 4;
01366             break;
01367         }
01368 
01369         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01370             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
01371     }else{
01372         /* Derive interlacing flag from used decoding process. */
01373         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01374     }
01375     h->prev_interlaced_frame = cur->interlaced_frame;
01376 
01377     if (cur->field_poc[0] != cur->field_poc[1]){
01378         /* Derive top_field_first from field pocs. */
01379         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
01380     }else{
01381         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
01382             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01383             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01384               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01385                 cur->top_field_first = 1;
01386             else
01387                 cur->top_field_first = 0;
01388         }else{
01389             /* Most likely progressive */
01390             cur->top_field_first = 0;
01391         }
01392     }
01393 
01394     //FIXME do something with unavailable reference frames
01395 
01396     /* Sort B-frames into display order */
01397 
01398     if(h->sps.bitstream_restriction_flag
01399        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01400         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01401         s->low_delay = 0;
01402     }
01403 
01404     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01405        && !h->sps.bitstream_restriction_flag){
01406         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
01407         s->low_delay= 0;
01408     }
01409 
01410     pics = 0;
01411     while(h->delayed_pic[pics]) pics++;
01412 
01413     assert(pics <= MAX_DELAYED_PIC_COUNT);
01414 
01415     h->delayed_pic[pics++] = cur;
01416     if(cur->reference == 0)
01417         cur->reference = DELAYED_PIC_REF;
01418 
01419     out = h->delayed_pic[0];
01420     out_idx = 0;
01421     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01422         if(h->delayed_pic[i]->poc < out->poc){
01423             out = h->delayed_pic[i];
01424             out_idx = i;
01425         }
01426     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
01427         h->next_outputed_poc= INT_MIN;
01428     out_of_order = out->poc < h->next_outputed_poc;
01429 
01430     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01431         { }
01432     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
01433        || (s->low_delay &&
01434         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
01435          || cur->pict_type == AV_PICTURE_TYPE_B)))
01436     {
01437         s->low_delay = 0;
01438         s->avctx->has_b_frames++;
01439     }
01440 
01441     if(out_of_order || pics > s->avctx->has_b_frames){
01442         out->reference &= ~DELAYED_PIC_REF;
01443         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01444                          // or else the first thread can release the picture and reuse it unsafely
01445         for(i=out_idx; h->delayed_pic[i]; i++)
01446             h->delayed_pic[i] = h->delayed_pic[i+1];
01447     }
01448     if(!out_of_order && pics > s->avctx->has_b_frames){
01449         h->next_output_pic = out;
01450         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
01451             h->next_outputed_poc = INT_MIN;
01452         } else
01453             h->next_outputed_poc = out->poc;
01454     }else{
01455         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01456     }
01457 
01458     if (setup_finished)
01459         ff_thread_finish_setup(s->avctx);
01460 }
01461 
01462 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
01463     MpegEncContext * const s = &h->s;
01464     uint8_t *top_border;
01465     int top_idx = 1;
01466     const int pixel_shift = h->pixel_shift;
01467 
01468     src_y  -=   linesize;
01469     src_cb -= uvlinesize;
01470     src_cr -= uvlinesize;
01471 
01472     if(!simple && FRAME_MBAFF){
01473         if(s->mb_y&1){
01474             if(!MB_MBAFF){
01475                 top_border = h->top_borders[0][s->mb_x];
01476                 AV_COPY128(top_border, src_y + 15*linesize);
01477                 if (pixel_shift)
01478                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01479                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01480                     if(chroma444){
01481                         if (pixel_shift){
01482                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01483                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01484                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01485                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01486                         } else {
01487                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01488                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01489                         }
01490                     } else {
01491                         if (pixel_shift) {
01492                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01493                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01494                         } else {
01495                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01496                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01497                         }
01498                     }
01499                 }
01500             }
01501         }else if(MB_MBAFF){
01502             top_idx = 0;
01503         }else
01504             return;
01505     }
01506 
01507     top_border = h->top_borders[top_idx][s->mb_x];
01508     // There are two lines saved, the line above the the top macroblock of a pair,
01509     // and the line above the bottom macroblock
01510     AV_COPY128(top_border, src_y + 16*linesize);
01511     if (pixel_shift)
01512         AV_COPY128(top_border+16, src_y+16*linesize+16);
01513 
01514     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01515         if(chroma444){
01516             if (pixel_shift){
01517                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01518                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01519                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01520                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01521             } else {
01522                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01523                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01524             }
01525         } else {
01526             if (pixel_shift) {
01527                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01528                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01529             } else {
01530                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01531                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01532             }
01533         }
01534     }
01535 }
01536 
01537 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01538                                   uint8_t *src_cb, uint8_t *src_cr,
01539                                   int linesize, int uvlinesize,
01540                                   int xchg, int chroma444,
01541                                   int simple, int pixel_shift){
01542     MpegEncContext * const s = &h->s;
01543     int deblock_topleft;
01544     int deblock_top;
01545     int top_idx = 1;
01546     uint8_t *top_border_m1;
01547     uint8_t *top_border;
01548 
01549     if(!simple && FRAME_MBAFF){
01550         if(s->mb_y&1){
01551             if(!MB_MBAFF)
01552                 return;
01553         }else{
01554             top_idx = MB_MBAFF ? 0 : 1;
01555         }
01556     }
01557 
01558     if(h->deblocking_filter == 2) {
01559         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01560         deblock_top     = h->top_type;
01561     } else {
01562         deblock_topleft = (s->mb_x > 0);
01563         deblock_top     = (s->mb_y > !!MB_FIELD);
01564     }
01565 
01566     src_y  -=   linesize + 1 + pixel_shift;
01567     src_cb -= uvlinesize + 1 + pixel_shift;
01568     src_cr -= uvlinesize + 1 + pixel_shift;
01569 
01570     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01571     top_border    = h->top_borders[top_idx][s->mb_x];
01572 
01573 #define XCHG(a,b,xchg)\
01574     if (pixel_shift) {\
01575         if (xchg) {\
01576             AV_SWAP64(b+0,a+0);\
01577             AV_SWAP64(b+8,a+8);\
01578         } else {\
01579             AV_COPY128(b,a); \
01580         }\
01581     } else \
01582 if (xchg) AV_SWAP64(b,a);\
01583 else      AV_COPY64(b,a);
01584 
01585     if(deblock_top){
01586         if(deblock_topleft){
01587             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01588         }
01589         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01590         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01591         if(s->mb_x+1 < s->mb_width){
01592             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01593         }
01594     }
01595     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01596         if(chroma444){
01597             if(deblock_topleft){
01598                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01599                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01600             }
01601             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01602             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01603             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01604             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01605             if(s->mb_x+1 < s->mb_width){
01606                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01607                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01608             }
01609         } else {
01610             if(deblock_top){
01611                 if(deblock_topleft){
01612                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01613                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01614                 }
01615                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01616                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01617             }
01618         }
01619     }
01620 }
01621 
01622 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01623     if (high_bit_depth) {
01624         return AV_RN32A(((int32_t*)mb) + index);
01625     } else
01626         return AV_RN16A(mb + index);
01627 }
01628 
01629 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01630     if (high_bit_depth) {
01631         AV_WN32A(((int32_t*)mb) + index, value);
01632     } else
01633         AV_WN16A(mb + index, value);
01634 }
01635 
01636 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01637                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01638 {
01639     MpegEncContext * const s = &h->s;
01640     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01641     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01642     int i;
01643     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01644     block_offset += 16*p;
01645     if(IS_INTRA4x4(mb_type)){
01646         if(simple || !s->encoding){
01647             if(IS_8x8DCT(mb_type)){
01648                 if(transform_bypass){
01649                     idct_dc_add =
01650                     idct_add    = s->dsp.add_pixels8;
01651                 }else{
01652                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01653                     idct_add    = h->h264dsp.h264_idct8_add;
01654                 }
01655                 for(i=0; i<16; i+=4){
01656                     uint8_t * const ptr= dest_y + block_offset[i];
01657                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01658                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01659                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01660                     }else{
01661                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01662                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01663                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01664                         if(nnz){
01665                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01666                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01667                             else
01668                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01669                         }
01670                     }
01671                 }
01672             }else{
01673                 if(transform_bypass){
01674                     idct_dc_add =
01675                     idct_add    = s->dsp.add_pixels4;
01676                 }else{
01677                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01678                     idct_add    = h->h264dsp.h264_idct_add;
01679                 }
01680                 for(i=0; i<16; i++){
01681                     uint8_t * const ptr= dest_y + block_offset[i];
01682                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01683 
01684                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01685                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01686                     }else{
01687                         uint8_t *topright;
01688                         int nnz, tr;
01689                         uint64_t tr_high;
01690                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01691                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01692                             assert(s->mb_y || linesize <= block_offset[i]);
01693                             if(!topright_avail){
01694                                 if (pixel_shift) {
01695                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01696                                     topright= (uint8_t*) &tr_high;
01697                                 } else {
01698                                     tr= ptr[3 - linesize]*0x01010101;
01699                                     topright= (uint8_t*) &tr;
01700                                 }
01701                             }else
01702                                 topright= ptr + (4 << pixel_shift) - linesize;
01703                         }else
01704                             topright= NULL;
01705 
01706                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01707                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01708                         if(nnz){
01709                             if(is_h264){
01710                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01711                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01712                                 else
01713                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01714                             }else
01715                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01716                         }
01717                     }
01718                 }
01719             }
01720         }
01721     }else{
01722         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01723         if(is_h264){
01724             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01725                 if(!transform_bypass)
01726                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01727                 else{
01728                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01729                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01730                     for(i = 0; i < 16; i++)
01731                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01732                 }
01733             }
01734         }else
01735             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01736     }
01737 }
01738 
01739 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01740                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01741 {
01742     MpegEncContext * const s = &h->s;
01743     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01744     int i;
01745     block_offset += 16*p;
01746     if(!IS_INTRA4x4(mb_type)){
01747         if(is_h264){
01748             if(IS_INTRA16x16(mb_type)){
01749                 if(transform_bypass){
01750                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01751                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01752                     }else{
01753                         for(i=0; i<16; i++){
01754                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01755                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01756                         }
01757                     }
01758                 }else{
01759                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01760                 }
01761             }else if(h->cbp&15){
01762                 if(transform_bypass){
01763                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01764                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01765                     for(i=0; i<16; i+=di){
01766                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01767                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01768                         }
01769                     }
01770                 }else{
01771                     if(IS_8x8DCT(mb_type)){
01772                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01773                     }else{
01774                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01775                     }
01776                 }
01777             }
01778         }else{
01779             for(i=0; i<16; i++){
01780                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01781                     uint8_t * const ptr= dest_y + block_offset[i];
01782                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01783                 }
01784             }
01785         }
01786     }
01787 }
01788 
01789 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
01790     MpegEncContext * const s = &h->s;
01791     const int mb_x= s->mb_x;
01792     const int mb_y= s->mb_y;
01793     const int mb_xy= h->mb_xy;
01794     const int mb_type= s->current_picture.mb_type[mb_xy];
01795     uint8_t  *dest_y, *dest_cb, *dest_cr;
01796     int linesize, uvlinesize /*dct_offset*/;
01797     int i, j;
01798     int *block_offset = &h->block_offset[0];
01799     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01800     /* is_h264 should always be true if SVQ3 is disabled. */
01801     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01802     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01803 
01804     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01805     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01806     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01807 
01808     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01809     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01810 
01811     h->list_counts[mb_xy]= h->list_count;
01812 
01813     if (!simple && MB_FIELD) {
01814         linesize   = h->mb_linesize   = s->linesize * 2;
01815         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01816         block_offset = &h->block_offset[48];
01817         if(mb_y&1){ //FIXME move out of this function?
01818             dest_y -= s->linesize*15;
01819             dest_cb-= s->uvlinesize*7;
01820             dest_cr-= s->uvlinesize*7;
01821         }
01822         if(FRAME_MBAFF) {
01823             int list;
01824             for(list=0; list<h->list_count; list++){
01825                 if(!USES_LIST(mb_type, list))
01826                     continue;
01827                 if(IS_16X16(mb_type)){
01828                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01829                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01830                 }else{
01831                     for(i=0; i<16; i+=4){
01832                         int ref = h->ref_cache[list][scan8[i]];
01833                         if(ref >= 0)
01834                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01835                     }
01836                 }
01837             }
01838         }
01839     } else {
01840         linesize   = h->mb_linesize   = s->linesize;
01841         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01842 //        dct_offset = s->linesize * 16;
01843     }
01844 
01845     if (!simple && IS_INTRA_PCM(mb_type)) {
01846         if (pixel_shift) {
01847             const int bit_depth = h->sps.bit_depth_luma;
01848             int j;
01849             GetBitContext gb;
01850             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01851 
01852             for (i = 0; i < 16; i++) {
01853                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01854                 for (j = 0; j < 16; j++)
01855                     tmp_y[j] = get_bits(&gb, bit_depth);
01856             }
01857             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01858                 if (!h->sps.chroma_format_idc) {
01859                     for (i = 0; i < 8; i++) {
01860                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01861                         for (j = 0; j < 8; j++) {
01862                             tmp_cb[j] = 1 << (bit_depth - 1);
01863                         }
01864                     }
01865                     for (i = 0; i < 8; i++) {
01866                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01867                         for (j = 0; j < 8; j++) {
01868                             tmp_cr[j] = 1 << (bit_depth - 1);
01869                         }
01870                     }
01871                 } else {
01872                     for (i = 0; i < 8; i++) {
01873                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01874                         for (j = 0; j < 8; j++)
01875                             tmp_cb[j] = get_bits(&gb, bit_depth);
01876                     }
01877                     for (i = 0; i < 8; i++) {
01878                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01879                         for (j = 0; j < 8; j++)
01880                             tmp_cr[j] = get_bits(&gb, bit_depth);
01881                     }
01882                 }
01883             }
01884         } else {
01885             for (i=0; i<16; i++) {
01886                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01887             }
01888             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01889                 if (!h->sps.chroma_format_idc) {
01890                     for (i = 0; i < 8; i++) {
01891                         memset(dest_cb + i*uvlinesize, 128, 8);
01892                         memset(dest_cr + i*uvlinesize, 128, 8);
01893                     }
01894                 } else {
01895                     for (i = 0; i < 8; i++) {
01896                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
01897                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
01898                     }
01899                 }
01900             }
01901         }
01902     } else {
01903         if(IS_INTRA(mb_type)){
01904             if(h->deblocking_filter)
01905                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
01906 
01907             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01908                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01909                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01910             }
01911 
01912             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01913 
01914             if(h->deblocking_filter)
01915                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
01916         }else if(is_h264){
01917             if (pixel_shift) {
01918                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
01919                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01920                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01921                              h->h264dsp.weight_h264_pixels_tab,
01922                              h->h264dsp.biweight_h264_pixels_tab, 0);
01923             } else
01924                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
01925                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01926                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01927                             h->h264dsp.weight_h264_pixels_tab,
01928                             h->h264dsp.biweight_h264_pixels_tab, 0);
01929         }
01930 
01931         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01932 
01933         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01934             uint8_t *dest[2] = {dest_cb, dest_cr};
01935             if(transform_bypass){
01936                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01937                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
01938                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
01939                 }else{
01940                     idct_add = s->dsp.add_pixels4;
01941                     for(j=1; j<3; j++){
01942                         for(i=j*16; i<j*16+4; i++){
01943                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
01944                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
01945                         }
01946                     }
01947                 }
01948             }else{
01949                 if(is_h264){
01950                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
01951                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01952                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
01953                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01954                     h->h264dsp.h264_idct_add8(dest, block_offset,
01955                                               h->mb, uvlinesize,
01956                                               h->non_zero_count_cache);
01957                 }else{
01958                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01959                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01960                     for(j=1; j<3; j++){
01961                         for(i=j*16; i<j*16+4; i++){
01962                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01963                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
01964                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
01965                             }
01966                         }
01967                     }
01968                 }
01969             }
01970         }
01971     }
01972     if(h->cbp || IS_INTRA(mb_type))
01973     {
01974         s->dsp.clear_blocks(h->mb);
01975         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
01976     }
01977 }
01978 
01979 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
01980     MpegEncContext * const s = &h->s;
01981     const int mb_x= s->mb_x;
01982     const int mb_y= s->mb_y;
01983     const int mb_xy= h->mb_xy;
01984     const int mb_type= s->current_picture.mb_type[mb_xy];
01985     uint8_t  *dest[3];
01986     int linesize;
01987     int i, j, p;
01988     int *block_offset = &h->block_offset[0];
01989     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01990     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
01991 
01992     for (p = 0; p < plane_count; p++)
01993     {
01994         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
01995         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01996     }
01997 
01998     h->list_counts[mb_xy]= h->list_count;
01999 
02000     if (!simple && MB_FIELD) {
02001         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02002         block_offset = &h->block_offset[48];
02003         if(mb_y&1) //FIXME move out of this function?
02004             for (p = 0; p < 3; p++)
02005                 dest[p] -= s->linesize*15;
02006         if(FRAME_MBAFF) {
02007             int list;
02008             for(list=0; list<h->list_count; list++){
02009                 if(!USES_LIST(mb_type, list))
02010                     continue;
02011                 if(IS_16X16(mb_type)){
02012                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02013                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02014                 }else{
02015                     for(i=0; i<16; i+=4){
02016                         int ref = h->ref_cache[list][scan8[i]];
02017                         if(ref >= 0)
02018                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02019                     }
02020                 }
02021             }
02022         }
02023     } else {
02024         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02025     }
02026 
02027     if (!simple && IS_INTRA_PCM(mb_type)) {
02028         if (pixel_shift) {
02029             const int bit_depth = h->sps.bit_depth_luma;
02030             GetBitContext gb;
02031             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02032 
02033             for (p = 0; p < plane_count; p++) {
02034                 for (i = 0; i < 16; i++) {
02035                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02036                     for (j = 0; j < 16; j++)
02037                         tmp[j] = get_bits(&gb, bit_depth);
02038                 }
02039             }
02040         } else {
02041             for (p = 0; p < plane_count; p++) {
02042                 for (i = 0; i < 16; i++) {
02043                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02044                 }
02045             }
02046         }
02047     } else {
02048         if(IS_INTRA(mb_type)){
02049             if(h->deblocking_filter)
02050                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02051 
02052             for (p = 0; p < plane_count; p++)
02053                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02054 
02055             if(h->deblocking_filter)
02056                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02057         }else{
02058             if (pixel_shift) {
02059                 hl_motion_16(h, dest[0], dest[1], dest[2],
02060                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02061                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02062                              h->h264dsp.weight_h264_pixels_tab,
02063                              h->h264dsp.biweight_h264_pixels_tab, 1);
02064             } else
02065                 hl_motion_8(h, dest[0], dest[1], dest[2],
02066                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02067                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02068                             h->h264dsp.weight_h264_pixels_tab,
02069                             h->h264dsp.biweight_h264_pixels_tab, 1);
02070         }
02071 
02072         for (p = 0; p < plane_count; p++)
02073             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02074     }
02075     if(h->cbp || IS_INTRA(mb_type))
02076     {
02077         s->dsp.clear_blocks(h->mb);
02078         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02079     }
02080 }
02081 
02085 #define hl_decode_mb_simple(sh, bits) \
02086 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02087     hl_decode_mb_internal(h, 1, sh); \
02088 }
02089 hl_decode_mb_simple(0, 8);
02090 hl_decode_mb_simple(1, 16);
02091 
02095 static void av_noinline hl_decode_mb_complex(H264Context *h){
02096     hl_decode_mb_internal(h, 0, h->pixel_shift);
02097 }
02098 
02099 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02100     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02101 }
02102 
02103 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02104     hl_decode_mb_444_internal(h, 1, 0);
02105 }
02106 
02107 void ff_h264_hl_decode_mb(H264Context *h){
02108     MpegEncContext * const s = &h->s;
02109     const int mb_xy= h->mb_xy;
02110     const int mb_type= s->current_picture.mb_type[mb_xy];
02111     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02112 
02113     if (CHROMA444) {
02114         if(is_complex || h->pixel_shift)
02115             hl_decode_mb_444_complex(h);
02116         else
02117             hl_decode_mb_444_simple(h);
02118     } else if (is_complex) {
02119         hl_decode_mb_complex(h);
02120     } else if (h->pixel_shift) {
02121         hl_decode_mb_simple_16(h);
02122     } else
02123         hl_decode_mb_simple_8(h);
02124 }
02125 
02126 static int pred_weight_table(H264Context *h){
02127     MpegEncContext * const s = &h->s;
02128     int list, i;
02129     int luma_def, chroma_def;
02130 
02131     h->use_weight= 0;
02132     h->use_weight_chroma= 0;
02133     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02134     if(h->sps.chroma_format_idc)
02135         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02136     luma_def = 1<<h->luma_log2_weight_denom;
02137     chroma_def = 1<<h->chroma_log2_weight_denom;
02138 
02139     for(list=0; list<2; list++){
02140         h->luma_weight_flag[list]   = 0;
02141         h->chroma_weight_flag[list] = 0;
02142         for(i=0; i<h->ref_count[list]; i++){
02143             int luma_weight_flag, chroma_weight_flag;
02144 
02145             luma_weight_flag= get_bits1(&s->gb);
02146             if(luma_weight_flag){
02147                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02148                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02149                 if(   h->luma_weight[i][list][0] != luma_def
02150                    || h->luma_weight[i][list][1] != 0) {
02151                     h->use_weight= 1;
02152                     h->luma_weight_flag[list]= 1;
02153                 }
02154             }else{
02155                 h->luma_weight[i][list][0]= luma_def;
02156                 h->luma_weight[i][list][1]= 0;
02157             }
02158 
02159             if(h->sps.chroma_format_idc){
02160                 chroma_weight_flag= get_bits1(&s->gb);
02161                 if(chroma_weight_flag){
02162                     int j;
02163                     for(j=0; j<2; j++){
02164                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02165                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02166                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02167                            || h->chroma_weight[i][list][j][1] != 0) {
02168                             h->use_weight_chroma= 1;
02169                             h->chroma_weight_flag[list]= 1;
02170                         }
02171                     }
02172                 }else{
02173                     int j;
02174                     for(j=0; j<2; j++){
02175                         h->chroma_weight[i][list][j][0]= chroma_def;
02176                         h->chroma_weight[i][list][j][1]= 0;
02177                     }
02178                 }
02179             }
02180         }
02181         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02182     }
02183     h->use_weight= h->use_weight || h->use_weight_chroma;
02184     return 0;
02185 }
02186 
02192 static void implicit_weight_table(H264Context *h, int field){
02193     MpegEncContext * const s = &h->s;
02194     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02195 
02196     for (i = 0; i < 2; i++) {
02197         h->luma_weight_flag[i]   = 0;
02198         h->chroma_weight_flag[i] = 0;
02199     }
02200 
02201     if(field < 0){
02202         if (s->picture_structure == PICT_FRAME) {
02203             cur_poc = s->current_picture_ptr->poc;
02204         } else {
02205             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02206         }
02207     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02208        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02209         h->use_weight= 0;
02210         h->use_weight_chroma= 0;
02211         return;
02212     }
02213         ref_start= 0;
02214         ref_count0= h->ref_count[0];
02215         ref_count1= h->ref_count[1];
02216     }else{
02217         cur_poc = s->current_picture_ptr->field_poc[field];
02218         ref_start= 16;
02219         ref_count0= 16+2*h->ref_count[0];
02220         ref_count1= 16+2*h->ref_count[1];
02221     }
02222 
02223     h->use_weight= 2;
02224     h->use_weight_chroma= 2;
02225     h->luma_log2_weight_denom= 5;
02226     h->chroma_log2_weight_denom= 5;
02227 
02228     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02229         int poc0 = h->ref_list[0][ref0].poc;
02230         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02231             int w = 32;
02232             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02233                 int poc1 = h->ref_list[1][ref1].poc;
02234                 int td = av_clip(poc1 - poc0, -128, 127);
02235                 if(td){
02236                     int tb = av_clip(cur_poc - poc0, -128, 127);
02237                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02238                     int dist_scale_factor = (tb*tx + 32) >> 8;
02239                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02240                         w = 64 - dist_scale_factor;
02241                 }
02242             }
02243             if(field<0){
02244                 h->implicit_weight[ref0][ref1][0]=
02245                 h->implicit_weight[ref0][ref1][1]= w;
02246             }else{
02247                 h->implicit_weight[ref0][ref1][field]=w;
02248             }
02249         }
02250     }
02251 }
02252 
02256 static void idr(H264Context *h){
02257     ff_h264_remove_all_refs(h);
02258     h->prev_frame_num= 0;
02259     h->prev_frame_num_offset= 0;
02260     h->prev_poc_msb=
02261     h->prev_poc_lsb= 0;
02262 }
02263 
02264 /* forget old pics after a seek */
02265 static void flush_dpb(AVCodecContext *avctx){
02266     H264Context *h= avctx->priv_data;
02267     int i;
02268     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
02269         if(h->delayed_pic[i])
02270             h->delayed_pic[i]->reference= 0;
02271         h->delayed_pic[i]= NULL;
02272     }
02273     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02274     h->prev_interlaced_frame = 1;
02275     idr(h);
02276     if(h->s.current_picture_ptr)
02277         h->s.current_picture_ptr->reference= 0;
02278     h->s.first_field= 0;
02279     ff_h264_reset_sei(h);
02280     ff_mpeg_flush(avctx);
02281 }
02282 
02283 static int init_poc(H264Context *h){
02284     MpegEncContext * const s = &h->s;
02285     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02286     int field_poc[2];
02287     Picture *cur = s->current_picture_ptr;
02288 
02289     h->frame_num_offset= h->prev_frame_num_offset;
02290     if(h->frame_num < h->prev_frame_num)
02291         h->frame_num_offset += max_frame_num;
02292 
02293     if(h->sps.poc_type==0){
02294         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02295 
02296         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02297             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02298         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02299             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02300         else
02301             h->poc_msb = h->prev_poc_msb;
02302 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02303         field_poc[0] =
02304         field_poc[1] = h->poc_msb + h->poc_lsb;
02305         if(s->picture_structure == PICT_FRAME)
02306             field_poc[1] += h->delta_poc_bottom;
02307     }else if(h->sps.poc_type==1){
02308         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02309         int i;
02310 
02311         if(h->sps.poc_cycle_length != 0)
02312             abs_frame_num = h->frame_num_offset + h->frame_num;
02313         else
02314             abs_frame_num = 0;
02315 
02316         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02317             abs_frame_num--;
02318 
02319         expected_delta_per_poc_cycle = 0;
02320         for(i=0; i < h->sps.poc_cycle_length; i++)
02321             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02322 
02323         if(abs_frame_num > 0){
02324             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02325             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02326 
02327             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02328             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02329                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02330         } else
02331             expectedpoc = 0;
02332 
02333         if(h->nal_ref_idc == 0)
02334             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02335 
02336         field_poc[0] = expectedpoc + h->delta_poc[0];
02337         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02338 
02339         if(s->picture_structure == PICT_FRAME)
02340             field_poc[1] += h->delta_poc[1];
02341     }else{
02342         int poc= 2*(h->frame_num_offset + h->frame_num);
02343 
02344         if(!h->nal_ref_idc)
02345             poc--;
02346 
02347         field_poc[0]= poc;
02348         field_poc[1]= poc;
02349     }
02350 
02351     if(s->picture_structure != PICT_BOTTOM_FIELD)
02352         s->current_picture_ptr->field_poc[0]= field_poc[0];
02353     if(s->picture_structure != PICT_TOP_FIELD)
02354         s->current_picture_ptr->field_poc[1]= field_poc[1];
02355     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02356 
02357     return 0;
02358 }
02359 
02360 
02364 static void init_scan_tables(H264Context *h){
02365     int i;
02366     for(i=0; i<16; i++){
02367 #define T(x) (x>>2) | ((x<<2) & 0xF)
02368         h->zigzag_scan[i] = T(zigzag_scan[i]);
02369         h-> field_scan[i] = T( field_scan[i]);
02370 #undef T
02371     }
02372     for(i=0; i<64; i++){
02373 #define T(x) (x>>3) | ((x&7)<<3)
02374         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02375         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02376         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02377         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02378 #undef T
02379     }
02380     if(h->sps.transform_bypass){ //FIXME same ugly
02381         h->zigzag_scan_q0          = zigzag_scan;
02382         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02383         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02384         h->field_scan_q0           = field_scan;
02385         h->field_scan8x8_q0        = field_scan8x8;
02386         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02387     }else{
02388         h->zigzag_scan_q0          = h->zigzag_scan;
02389         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02390         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02391         h->field_scan_q0           = h->field_scan;
02392         h->field_scan8x8_q0        = h->field_scan8x8;
02393         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02394     }
02395 }
02396 
02397 static void field_end(H264Context *h, int in_setup){
02398     MpegEncContext * const s = &h->s;
02399     AVCodecContext * const avctx= s->avctx;
02400     s->mb_y= 0;
02401 
02402     if (!in_setup && !s->dropable)
02403         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02404                                  s->picture_structure==PICT_BOTTOM_FIELD);
02405 
02406     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02407         ff_vdpau_h264_set_reference_frames(s);
02408 
02409     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02410         if(!s->dropable) {
02411             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02412             h->prev_poc_msb= h->poc_msb;
02413             h->prev_poc_lsb= h->poc_lsb;
02414         }
02415         h->prev_frame_num_offset= h->frame_num_offset;
02416         h->prev_frame_num= h->frame_num;
02417         h->outputed_poc = h->next_outputed_poc;
02418     }
02419 
02420     if (avctx->hwaccel) {
02421         if (avctx->hwaccel->end_frame(avctx) < 0)
02422             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02423     }
02424 
02425     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02426         ff_vdpau_h264_picture_complete(s);
02427 
02428     /*
02429      * FIXME: Error handling code does not seem to support interlaced
02430      * when slices span multiple rows
02431      * The ff_er_add_slice calls don't work right for bottom
02432      * fields; they cause massive erroneous error concealing
02433      * Error marking covers both fields (top and bottom).
02434      * This causes a mismatched s->error_count
02435      * and a bad error table. Further, the error count goes to
02436      * INT_MAX when called for bottom field, because mb_y is
02437      * past end by one (callers fault) and resync_mb_y != 0
02438      * causes problems for the first MB line, too.
02439      */
02440     if (!FIELD_PICTURE)
02441         ff_er_frame_end(s);
02442 
02443     MPV_frame_end(s);
02444 
02445     h->current_slice=0;
02446 }
02447 
02451 static void clone_slice(H264Context *dst, H264Context *src)
02452 {
02453     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02454     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02455     dst->s.current_picture      = src->s.current_picture;
02456     dst->s.linesize             = src->s.linesize;
02457     dst->s.uvlinesize           = src->s.uvlinesize;
02458     dst->s.first_field          = src->s.first_field;
02459 
02460     dst->prev_poc_msb           = src->prev_poc_msb;
02461     dst->prev_poc_lsb           = src->prev_poc_lsb;
02462     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02463     dst->prev_frame_num         = src->prev_frame_num;
02464     dst->short_ref_count        = src->short_ref_count;
02465 
02466     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02467     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02468     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02469     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02470 
02471     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02472     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02473 }
02474 
02482 int ff_h264_get_profile(SPS *sps)
02483 {
02484     int profile = sps->profile_idc;
02485 
02486     switch(sps->profile_idc) {
02487     case FF_PROFILE_H264_BASELINE:
02488         // constraint_set1_flag set to 1
02489         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02490         break;
02491     case FF_PROFILE_H264_HIGH_10:
02492     case FF_PROFILE_H264_HIGH_422:
02493     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02494         // constraint_set3_flag set to 1
02495         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02496         break;
02497     }
02498 
02499     return profile;
02500 }
02501 
02511 static int decode_slice_header(H264Context *h, H264Context *h0){
02512     MpegEncContext * const s = &h->s;
02513     MpegEncContext * const s0 = &h0->s;
02514     unsigned int first_mb_in_slice;
02515     unsigned int pps_id;
02516     int num_ref_idx_active_override_flag;
02517     unsigned int slice_type, tmp, i, j;
02518     int default_ref_list_done = 0;
02519     int last_pic_structure;
02520 
02521     s->dropable= h->nal_ref_idc == 0;
02522 
02523     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02524     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02525         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02526         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02527     }else{
02528         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02529         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02530     }
02531 
02532     first_mb_in_slice= get_ue_golomb(&s->gb);
02533 
02534     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02535         if(h0->current_slice && FIELD_PICTURE){
02536             field_end(h, 1);
02537         }
02538 
02539         h0->current_slice = 0;
02540         if (!s0->first_field)
02541             s->current_picture_ptr= NULL;
02542     }
02543 
02544     slice_type= get_ue_golomb_31(&s->gb);
02545     if(slice_type > 9){
02546         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02547         return -1;
02548     }
02549     if(slice_type > 4){
02550         slice_type -= 5;
02551         h->slice_type_fixed=1;
02552     }else
02553         h->slice_type_fixed=0;
02554 
02555     slice_type= golomb_to_pict_type[ slice_type ];
02556     if (slice_type == AV_PICTURE_TYPE_I
02557         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02558         default_ref_list_done = 1;
02559     }
02560     h->slice_type= slice_type;
02561     h->slice_type_nos= slice_type & 3;
02562 
02563     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02564 
02565     pps_id= get_ue_golomb(&s->gb);
02566     if(pps_id>=MAX_PPS_COUNT){
02567         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02568         return -1;
02569     }
02570     if(!h0->pps_buffers[pps_id]) {
02571         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02572         return -1;
02573     }
02574     h->pps= *h0->pps_buffers[pps_id];
02575 
02576     if(!h0->sps_buffers[h->pps.sps_id]) {
02577         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02578         return -1;
02579     }
02580     h->sps = *h0->sps_buffers[h->pps.sps_id];
02581 
02582     s->avctx->profile = ff_h264_get_profile(&h->sps);
02583     s->avctx->level   = h->sps.level_idc;
02584     s->avctx->refs    = h->sps.ref_frame_count;
02585 
02586     if(h == h0 && h->dequant_coeff_pps != pps_id){
02587         h->dequant_coeff_pps = pps_id;
02588         init_dequant_tables(h);
02589     }
02590 
02591     s->mb_width= h->sps.mb_width;
02592     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02593 
02594     h->b_stride=  s->mb_width*4;
02595 
02596     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02597     if(h->sps.frame_mbs_only_flag)
02598         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02599     else
02600         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02601 
02602     if (s->context_initialized
02603         && (   s->width != s->avctx->width || s->height != s->avctx->height
02604             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02605         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02606             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02607             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02608         }
02609         free_tables(h, 0);
02610         flush_dpb(s->avctx);
02611         MPV_common_end(s);
02612     }
02613     if (!s->context_initialized) {
02614         if (h != h0) {
02615             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02616             return -1;
02617         }
02618 
02619         avcodec_set_dimensions(s->avctx, s->width, s->height);
02620         s->avctx->sample_aspect_ratio= h->sps.sar;
02621         av_assert0(s->avctx->sample_aspect_ratio.den);
02622 
02623         h->s.avctx->coded_width = 16*s->mb_width;
02624         h->s.avctx->coded_height = 16*s->mb_height;
02625 
02626         if(h->sps.video_signal_type_present_flag){
02627             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02628             if(h->sps.colour_description_present_flag){
02629                 s->avctx->color_primaries = h->sps.color_primaries;
02630                 s->avctx->color_trc       = h->sps.color_trc;
02631                 s->avctx->colorspace      = h->sps.colorspace;
02632             }
02633         }
02634 
02635         if(h->sps.timing_info_present_flag){
02636             int64_t den= h->sps.time_scale;
02637             if(h->x264_build < 44U)
02638                 den *= 2;
02639             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02640                       h->sps.num_units_in_tick, den, 1<<30);
02641         }
02642 
02643         switch (h->sps.bit_depth_luma) {
02644             case 9 :
02645                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
02646                 break;
02647             case 10 :
02648                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
02649                 break;
02650             default:
02651                 if (CHROMA444){
02652                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02653                 }else{
02654                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02655                                                              s->avctx->codec->pix_fmts ?
02656                                                              s->avctx->codec->pix_fmts :
02657                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02658                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02659                                                              ff_hwaccel_pixfmt_list_420);
02660                 }
02661         }
02662 
02663         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02664 
02665         if (MPV_common_init(s) < 0) {
02666             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02667             return -1;
02668         }
02669         s->first_field = 0;
02670         h->prev_interlaced_frame = 1;
02671 
02672         init_scan_tables(h);
02673         if (ff_h264_alloc_tables(h) < 0) {
02674             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02675             return AVERROR(ENOMEM);
02676         }
02677 
02678         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02679             if (context_init(h) < 0) {
02680                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02681                 return -1;
02682             }
02683         } else {
02684             for(i = 1; i < s->avctx->thread_count; i++) {
02685                 H264Context *c;
02686                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02687                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02688                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02689                 c->h264dsp = h->h264dsp;
02690                 c->sps = h->sps;
02691                 c->pps = h->pps;
02692                 c->pixel_shift = h->pixel_shift;
02693                 init_scan_tables(c);
02694                 clone_tables(c, h, i);
02695             }
02696 
02697             for(i = 0; i < s->avctx->thread_count; i++)
02698                 if (context_init(h->thread_context[i]) < 0) {
02699                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02700                     return -1;
02701                 }
02702         }
02703     }
02704 
02705     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02706 
02707     h->mb_mbaff = 0;
02708     h->mb_aff_frame = 0;
02709     last_pic_structure = s0->picture_structure;
02710     if(h->sps.frame_mbs_only_flag){
02711         s->picture_structure= PICT_FRAME;
02712     }else{
02713         if(get_bits1(&s->gb)) { //field_pic_flag
02714             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02715         } else {
02716             s->picture_structure= PICT_FRAME;
02717             h->mb_aff_frame = h->sps.mb_aff;
02718         }
02719     }
02720     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02721 
02722     if(h0->current_slice == 0){
02723         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02724         if(h->frame_num != h->prev_frame_num) {
02725             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02726 
02727             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02728 
02729             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02730                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02731                 if (unwrap_prev_frame_num < 0)
02732                     unwrap_prev_frame_num += max_frame_num;
02733 
02734                 h->prev_frame_num = unwrap_prev_frame_num;
02735             }
02736         }
02737 
02738         while(h->frame_num !=  h->prev_frame_num &&
02739               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02740             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02741             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02742             if (ff_h264_frame_start(h) < 0)
02743                 return -1;
02744             h->prev_frame_num++;
02745             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02746             s->current_picture_ptr->frame_num= h->prev_frame_num;
02747             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02748             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02749             ff_generate_sliding_window_mmcos(h);
02750             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02751             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02752              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02753              * about there being no actual duplicates.
02754              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02755              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02756              * be fixed. */
02757             if (h->short_ref_count) {
02758                 if (prev) {
02759                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
02760                                   (const uint8_t**)prev->data, prev->linesize,
02761                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02762                     h->short_ref[0]->poc = prev->poc+2;
02763                 }
02764                 h->short_ref[0]->frame_num = h->prev_frame_num;
02765             }
02766         }
02767 
02768         /* See if we have a decoded first field looking for a pair... */
02769         if (s0->first_field) {
02770             assert(s0->current_picture_ptr);
02771             assert(s0->current_picture_ptr->data[0]);
02772             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
02773 
02774             /* figure out if we have a complementary field pair */
02775             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02776                 /*
02777                  * Previous field is unmatched. Don't display it, but let it
02778                  * remain for reference if marked as such.
02779                  */
02780                 s0->current_picture_ptr = NULL;
02781                 s0->first_field = FIELD_PICTURE;
02782 
02783             } else {
02784                 if (h->nal_ref_idc &&
02785                         s0->current_picture_ptr->reference &&
02786                         s0->current_picture_ptr->frame_num != h->frame_num) {
02787                     /*
02788                      * This and previous field were reference, but had
02789                      * different frame_nums. Consider this field first in
02790                      * pair. Throw away previous field except for reference
02791                      * purposes.
02792                      */
02793                     s0->first_field = 1;
02794                     s0->current_picture_ptr = NULL;
02795 
02796                 } else {
02797                     /* Second field in complementary pair */
02798                     s0->first_field = 0;
02799                 }
02800             }
02801 
02802         } else {
02803             /* Frame or first field in a potentially complementary pair */
02804             assert(!s0->current_picture_ptr);
02805             s0->first_field = FIELD_PICTURE;
02806         }
02807 
02808         if(!FIELD_PICTURE || s0->first_field) {
02809             if (ff_h264_frame_start(h) < 0) {
02810                 s0->first_field = 0;
02811                 return -1;
02812             }
02813         } else {
02814             ff_release_unused_pictures(s, 0);
02815         }
02816     }
02817     if(h != h0)
02818         clone_slice(h, h0);
02819 
02820     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02821 
02822     assert(s->mb_num == s->mb_width * s->mb_height);
02823     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02824        first_mb_in_slice                    >= s->mb_num){
02825         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
02826         return -1;
02827     }
02828     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
02829     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
02830     if (s->picture_structure == PICT_BOTTOM_FIELD)
02831         s->resync_mb_y = s->mb_y = s->mb_y + 1;
02832     assert(s->mb_y < s->mb_height);
02833 
02834     if(s->picture_structure==PICT_FRAME){
02835         h->curr_pic_num=   h->frame_num;
02836         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
02837     }else{
02838         h->curr_pic_num= 2*h->frame_num + 1;
02839         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
02840     }
02841 
02842     if(h->nal_unit_type == NAL_IDR_SLICE){
02843         get_ue_golomb(&s->gb); /* idr_pic_id */
02844     }
02845 
02846     if(h->sps.poc_type==0){
02847         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
02848 
02849         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
02850             h->delta_poc_bottom= get_se_golomb(&s->gb);
02851         }
02852     }
02853 
02854     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
02855         h->delta_poc[0]= get_se_golomb(&s->gb);
02856 
02857         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
02858             h->delta_poc[1]= get_se_golomb(&s->gb);
02859     }
02860 
02861     init_poc(h);
02862 
02863     if(h->pps.redundant_pic_cnt_present){
02864         h->redundant_pic_count= get_ue_golomb(&s->gb);
02865     }
02866 
02867     //set defaults, might be overridden a few lines later
02868     h->ref_count[0]= h->pps.ref_count[0];
02869     h->ref_count[1]= h->pps.ref_count[1];
02870 
02871     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
02872         int max_refs = s->picture_structure == PICT_FRAME ? 16 : 32;
02873 
02874         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
02875             h->direct_spatial_mv_pred= get_bits1(&s->gb);
02876         }
02877         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02878 
02879         if(num_ref_idx_active_override_flag){
02880             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02881             if(h->slice_type_nos==AV_PICTURE_TYPE_B)
02882                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02883         }
02884 
02885         if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) {
02886             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02887             h->ref_count[0] = h->ref_count[1] = 1;
02888             return AVERROR_INVALIDDATA;
02889         }
02890 
02891         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
02892             h->list_count= 2;
02893         else
02894             h->list_count= 1;
02895     }else
02896         h->list_count= 0;
02897 
02898     if(!default_ref_list_done){
02899         ff_h264_fill_default_ref_list(h);
02900     }
02901 
02902     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
02903         h->ref_count[1]= h->ref_count[0]= 0;
02904         return -1;
02905     }
02906 
02907     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
02908         s->last_picture_ptr= &h->ref_list[0][0];
02909         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02910     }
02911     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
02912         s->next_picture_ptr= &h->ref_list[1][0];
02913         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02914     }
02915 
02916     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
02917        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
02918         pred_weight_table(h);
02919     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02920         implicit_weight_table(h, -1);
02921     }else {
02922         h->use_weight = 0;
02923         for (i = 0; i < 2; i++) {
02924             h->luma_weight_flag[i]   = 0;
02925             h->chroma_weight_flag[i] = 0;
02926         }
02927     }
02928 
02929     if(h->nal_ref_idc)
02930         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02931 
02932     if(FRAME_MBAFF){
02933         ff_h264_fill_mbaff_ref_list(h);
02934 
02935         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02936             implicit_weight_table(h, 0);
02937             implicit_weight_table(h, 1);
02938         }
02939     }
02940 
02941     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
02942         ff_h264_direct_dist_scale_factor(h);
02943     ff_h264_direct_ref_list_init(h);
02944 
02945     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
02946         tmp = get_ue_golomb_31(&s->gb);
02947         if(tmp > 2){
02948             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02949             return -1;
02950         }
02951         h->cabac_init_idc= tmp;
02952     }
02953 
02954     h->last_qscale_diff = 0;
02955     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02956     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
02957         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02958         return -1;
02959     }
02960     s->qscale= tmp;
02961     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02962     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02963     //FIXME qscale / qp ... stuff
02964     if(h->slice_type == AV_PICTURE_TYPE_SP){
02965         get_bits1(&s->gb); /* sp_for_switch_flag */
02966     }
02967     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
02968         get_se_golomb(&s->gb); /* slice_qs_delta */
02969     }
02970 
02971     h->deblocking_filter = 1;
02972     h->slice_alpha_c0_offset = 52;
02973     h->slice_beta_offset = 52;
02974     if( h->pps.deblocking_filter_parameters_present ) {
02975         tmp= get_ue_golomb_31(&s->gb);
02976         if(tmp > 2){
02977             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
02978             return -1;
02979         }
02980         h->deblocking_filter= tmp;
02981         if(h->deblocking_filter < 2)
02982             h->deblocking_filter^= 1; // 1<->0
02983 
02984         if( h->deblocking_filter ) {
02985             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
02986             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
02987             if(   h->slice_alpha_c0_offset > 104U
02988                || h->slice_beta_offset     > 104U){
02989                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
02990                 return -1;
02991             }
02992         }
02993     }
02994 
02995     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
02996        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
02997        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
02998        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
02999         h->deblocking_filter= 0;
03000 
03001     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03002         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03003             /* Cheat slightly for speed:
03004                Do not bother to deblock across slices. */
03005             h->deblocking_filter = 2;
03006         } else {
03007             h0->max_contexts = 1;
03008             if(!h0->single_decode_warning) {
03009                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03010                 h0->single_decode_warning = 1;
03011             }
03012             if (h != h0) {
03013                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03014                 return 1;
03015             }
03016         }
03017     }
03018     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
03019 
03020 #if 0 //FMO
03021     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03022         slice_group_change_cycle= get_bits(&s->gb, ?);
03023 #endif
03024 
03025     h0->last_slice_type = slice_type;
03026     h->slice_num = ++h0->current_slice;
03027     if(h->slice_num >= MAX_SLICES){
03028         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
03029     }
03030 
03031     for(j=0; j<2; j++){
03032         int id_list[16];
03033         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03034         for(i=0; i<16; i++){
03035             id_list[i]= 60;
03036             if(h->ref_list[j][i].data[0]){
03037                 int k;
03038                 uint8_t *base= h->ref_list[j][i].base[0];
03039                 for(k=0; k<h->short_ref_count; k++)
03040                     if(h->short_ref[k]->base[0] == base){
03041                         id_list[i]= k;
03042                         break;
03043                     }
03044                 for(k=0; k<h->long_ref_count; k++)
03045                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
03046                         id_list[i]= h->short_ref_count + k;
03047                         break;
03048                     }
03049             }
03050         }
03051 
03052         ref2frm[0]=
03053         ref2frm[1]= -1;
03054         for(i=0; i<16; i++)
03055             ref2frm[i+2]= 4*id_list[i]
03056                           +(h->ref_list[j][i].reference&3);
03057         ref2frm[18+0]=
03058         ref2frm[18+1]= -1;
03059         for(i=16; i<48; i++)
03060             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03061                           +(h->ref_list[j][i].reference&3);
03062     }
03063 
03064     //FIXME: fix draw_edges+PAFF+frame threads
03065     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03066     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03067 
03068     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03069         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03070                h->slice_num,
03071                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03072                first_mb_in_slice,
03073                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03074                pps_id, h->frame_num,
03075                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03076                h->ref_count[0], h->ref_count[1],
03077                s->qscale,
03078                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03079                h->use_weight,
03080                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03081                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03082                );
03083     }
03084 
03085     return 0;
03086 }
03087 
03088 int ff_h264_get_slice_type(const H264Context *h)
03089 {
03090     switch (h->slice_type) {
03091     case AV_PICTURE_TYPE_P:  return 0;
03092     case AV_PICTURE_TYPE_B:  return 1;
03093     case AV_PICTURE_TYPE_I:  return 2;
03094     case AV_PICTURE_TYPE_SP: return 3;
03095     case AV_PICTURE_TYPE_SI: return 4;
03096     default:         return -1;
03097     }
03098 }
03099 
03104 static int fill_filter_caches(H264Context *h, int mb_type){
03105     MpegEncContext * const s = &h->s;
03106     const int mb_xy= h->mb_xy;
03107     int top_xy, left_xy[2];
03108     int top_type, left_type[2];
03109 
03110     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03111 
03112     //FIXME deblocking could skip the intra and nnz parts.
03113 
03114     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03115      * stuff, I can't imagine that these complex rules are worth it. */
03116 
03117     left_xy[1] = left_xy[0] = mb_xy-1;
03118     if(FRAME_MBAFF){
03119         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
03120         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03121         if(s->mb_y&1){
03122             if (left_mb_field_flag != curr_mb_field_flag) {
03123                 left_xy[0] -= s->mb_stride;
03124             }
03125         }else{
03126             if(curr_mb_field_flag){
03127                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
03128             }
03129             if (left_mb_field_flag != curr_mb_field_flag) {
03130                 left_xy[1] += s->mb_stride;
03131             }
03132         }
03133     }
03134 
03135     h->top_mb_xy = top_xy;
03136     h->left_mb_xy[0] = left_xy[0];
03137     h->left_mb_xy[1] = left_xy[1];
03138     {
03139         //for sufficiently low qp, filtering wouldn't do anything
03140         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03141         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03142         int qp = s->current_picture.qscale_table[mb_xy];
03143         if(qp <= qp_thresh
03144            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
03145            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
03146             if(!FRAME_MBAFF)
03147                 return 1;
03148             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
03149                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
03150                 return 1;
03151         }
03152     }
03153 
03154     top_type     = s->current_picture.mb_type[top_xy]    ;
03155     left_type[0] = s->current_picture.mb_type[left_xy[0]];
03156     left_type[1] = s->current_picture.mb_type[left_xy[1]];
03157     if(h->deblocking_filter == 2){
03158         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
03159         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
03160     }else{
03161         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
03162         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
03163     }
03164     h->top_type    = top_type    ;
03165     h->left_type[0]= left_type[0];
03166     h->left_type[1]= left_type[1];
03167 
03168     if(IS_INTRA(mb_type))
03169         return 0;
03170 
03171     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
03172     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
03173     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
03174     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
03175 
03176     h->cbp= h->cbp_table[mb_xy];
03177 
03178     {
03179         int list;
03180         for(list=0; list<h->list_count; list++){
03181             int8_t *ref;
03182             int y, b_stride;
03183             int16_t (*mv_dst)[2];
03184             int16_t (*mv_src)[2];
03185 
03186             if(!USES_LIST(mb_type, list)){
03187                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
03188                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03189                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03190                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03191                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03192                 continue;
03193             }
03194 
03195             ref = &s->current_picture.ref_index[list][4*mb_xy];
03196             {
03197                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03198                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03199                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03200                 ref += 2;
03201                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03202                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03203             }
03204 
03205             b_stride = h->b_stride;
03206             mv_dst   = &h->mv_cache[list][scan8[0]];
03207             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03208             for(y=0; y<4; y++){
03209                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
03210             }
03211 
03212         }
03213     }
03214 
03215 
03216 /*
03217 0 . T T. T T T T
03218 1 L . .L . . . .
03219 2 L . .L . . . .
03220 3 . T TL . . . .
03221 4 L . .L . . . .
03222 5 L . .. . . . .
03223 */
03224 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
03225     if(top_type){
03226         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
03227     }
03228 
03229     if(left_type[0]){
03230         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
03231         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
03232         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
03233         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
03234     }
03235 
03236     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03237     if(!CABAC && h->pps.transform_8x8_mode){
03238         if(IS_8x8DCT(top_type)){
03239             h->non_zero_count_cache[4+8*0]=
03240             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03241             h->non_zero_count_cache[6+8*0]=
03242             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03243         }
03244         if(IS_8x8DCT(left_type[0])){
03245             h->non_zero_count_cache[3+8*1]=
03246             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
03247         }
03248         if(IS_8x8DCT(left_type[1])){
03249             h->non_zero_count_cache[3+8*3]=
03250             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
03251         }
03252 
03253         if(IS_8x8DCT(mb_type)){
03254             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
03255             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03256 
03257             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
03258             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03259 
03260             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
03261             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03262 
03263             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
03264             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03265         }
03266     }
03267 
03268     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03269         int list;
03270         for(list=0; list<h->list_count; list++){
03271             if(USES_LIST(top_type, list)){
03272                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
03273                 const int b8_xy= 4*top_xy + 2;
03274                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03275                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
03276                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
03277                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
03278                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
03279                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
03280             }else{
03281                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
03282                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03283             }
03284 
03285             if(!IS_INTERLACED(mb_type^left_type[0])){
03286                 if(USES_LIST(left_type[0], list)){
03287                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
03288                     const int b8_xy= 4*left_xy[0] + 1;
03289                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03290                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
03291                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
03292                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
03293                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
03294                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
03295                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
03296                     h->ref_cache[list][scan8[0] - 1 +16 ]=
03297                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
03298                 }else{
03299                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
03300                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
03301                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
03302                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
03303                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
03304                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
03305                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
03306                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
03307                 }
03308             }
03309         }
03310     }
03311 
03312     return 0;
03313 }
03314 
03315 static void loop_filter(H264Context *h, int start_x, int end_x){
03316     MpegEncContext * const s = &h->s;
03317     uint8_t  *dest_y, *dest_cb, *dest_cr;
03318     int linesize, uvlinesize, mb_x, mb_y;
03319     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03320     const int old_slice_type= h->slice_type;
03321     const int pixel_shift = h->pixel_shift;
03322 
03323     if(h->deblocking_filter) {
03324         for(mb_x= start_x; mb_x<end_x; mb_x++){
03325             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03326                 int mb_xy, mb_type;
03327                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03328                 h->slice_num= h->slice_table[mb_xy];
03329                 mb_type= s->current_picture.mb_type[mb_xy];
03330                 h->list_count= h->list_counts[mb_xy];
03331 
03332                 if(FRAME_MBAFF)
03333                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03334 
03335                 s->mb_x= mb_x;
03336                 s->mb_y= mb_y;
03337                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03338                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03339                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03340                     //FIXME simplify above
03341 
03342                 if (MB_FIELD) {
03343                     linesize   = h->mb_linesize   = s->linesize * 2;
03344                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03345                     if(mb_y&1){ //FIXME move out of this function?
03346                         dest_y -= s->linesize*15;
03347                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
03348                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
03349                     }
03350                 } else {
03351                     linesize   = h->mb_linesize   = s->linesize;
03352                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03353                 }
03354                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
03355                 if(fill_filter_caches(h, mb_type))
03356                     continue;
03357                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
03358                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
03359 
03360                 if (FRAME_MBAFF) {
03361                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03362                 } else {
03363                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03364                 }
03365             }
03366         }
03367     }
03368     h->slice_type= old_slice_type;
03369     s->mb_x= end_x;
03370     s->mb_y= end_mb_y - FRAME_MBAFF;
03371     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03372     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03373 }
03374 
03375 static void predict_field_decoding_flag(H264Context *h){
03376     MpegEncContext * const s = &h->s;
03377     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03378     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03379                 ? s->current_picture.mb_type[mb_xy-1]
03380                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03381                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
03382                 : 0;
03383     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03384 }
03385 
03389 static void decode_finish_row(H264Context *h){
03390     MpegEncContext * const s = &h->s;
03391     int top = 16*(s->mb_y >> FIELD_PICTURE);
03392     int height = 16 << FRAME_MBAFF;
03393     int deblock_border = (16 + 4) << FRAME_MBAFF;
03394     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03395 
03396     if (h->deblocking_filter) {
03397         if((top + height) >= pic_height)
03398             height += deblock_border;
03399 
03400         top -= deblock_border;
03401     }
03402 
03403     if (top >= pic_height || (top + height) < h->emu_edge_height)
03404         return;
03405 
03406     height = FFMIN(height, pic_height - top);
03407     if (top < h->emu_edge_height) {
03408         height = top+height;
03409         top = 0;
03410     }
03411 
03412     ff_draw_horiz_band(s, top, height);
03413 
03414     if (s->dropable) return;
03415 
03416     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03417                              s->picture_structure==PICT_BOTTOM_FIELD);
03418 }
03419 
03420 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03421     H264Context *h = *(void**)arg;
03422     MpegEncContext * const s = &h->s;
03423     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03424     int lf_x_start = s->mb_x;
03425 
03426     s->mb_skip_run= -1;
03427 
03428     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03429                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03430 
03431     if( h->pps.cabac ) {
03432         /* realign */
03433         align_get_bits( &s->gb );
03434 
03435         /* init cabac */
03436         ff_init_cabac_states( &h->cabac);
03437         ff_init_cabac_decoder( &h->cabac,
03438                                s->gb.buffer + get_bits_count(&s->gb)/8,
03439                                (get_bits_left(&s->gb) + 7)/8);
03440 
03441         ff_h264_init_cabac_states(h);
03442 
03443         for(;;){
03444 //START_TIMER
03445             int ret = ff_h264_decode_mb_cabac(h);
03446             int eos;
03447 //STOP_TIMER("decode_mb_cabac")
03448 
03449             if(ret>=0) ff_h264_hl_decode_mb(h);
03450 
03451             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03452                 s->mb_y++;
03453 
03454                 ret = ff_h264_decode_mb_cabac(h);
03455 
03456                 if(ret>=0) ff_h264_hl_decode_mb(h);
03457                 s->mb_y--;
03458             }
03459             eos = get_cabac_terminate( &h->cabac );
03460 
03461             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03462                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03463                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03464                 return 0;
03465             }
03466             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03467                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03468                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03469                 return -1;
03470             }
03471 
03472             if( ++s->mb_x >= s->mb_width ) {
03473                 loop_filter(h, lf_x_start, s->mb_x);
03474                 s->mb_x = lf_x_start = 0;
03475                 decode_finish_row(h);
03476                 ++s->mb_y;
03477                 if(FIELD_OR_MBAFF_PICTURE) {
03478                     ++s->mb_y;
03479                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03480                         predict_field_decoding_flag(h);
03481                 }
03482             }
03483 
03484             if( eos || s->mb_y >= s->mb_height ) {
03485                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03486                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03487                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03488                 return 0;
03489             }
03490         }
03491 
03492     } else {
03493         for(;;){
03494             int ret = ff_h264_decode_mb_cavlc(h);
03495 
03496             if(ret>=0) ff_h264_hl_decode_mb(h);
03497 
03498             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03499                 s->mb_y++;
03500                 ret = ff_h264_decode_mb_cavlc(h);
03501 
03502                 if(ret>=0) ff_h264_hl_decode_mb(h);
03503                 s->mb_y--;
03504             }
03505 
03506             if(ret<0){
03507                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03508                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03509                 return -1;
03510             }
03511 
03512             if(++s->mb_x >= s->mb_width){
03513                 loop_filter(h, lf_x_start, s->mb_x);
03514                 s->mb_x = lf_x_start = 0;
03515                 decode_finish_row(h);
03516                 ++s->mb_y;
03517                 if(FIELD_OR_MBAFF_PICTURE) {
03518                     ++s->mb_y;
03519                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03520                         predict_field_decoding_flag(h);
03521                 }
03522                 if(s->mb_y >= s->mb_height){
03523                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03524 
03525                     if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
03526                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03527 
03528                         return 0;
03529                     }else{
03530                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03531 
03532                         return -1;
03533                     }
03534                 }
03535             }
03536 
03537             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03538                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03539                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03540                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03541                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03542 
03543                     return 0;
03544                 }else{
03545                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03546 
03547                     return -1;
03548                 }
03549             }
03550         }
03551     }
03552 
03553 #if 0
03554     for(;s->mb_y < s->mb_height; s->mb_y++){
03555         for(;s->mb_x < s->mb_width; s->mb_x++){
03556             int ret= decode_mb(h);
03557 
03558             ff_h264_hl_decode_mb(h);
03559 
03560             if(ret<0){
03561                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03562                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03563 
03564                 return -1;
03565             }
03566 
03567             if(++s->mb_x >= s->mb_width){
03568                 s->mb_x=0;
03569                 if(++s->mb_y >= s->mb_height){
03570                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
03571                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03572 
03573                         return 0;
03574                     }else{
03575                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03576 
03577                         return -1;
03578                     }
03579                 }
03580             }
03581 
03582             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
03583                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
03584                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03585 
03586                     return 0;
03587                 }else{
03588                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03589 
03590                     return -1;
03591                 }
03592             }
03593         }
03594         s->mb_x=0;
03595         ff_draw_horiz_band(s, 16*s->mb_y, 16);
03596     }
03597 #endif
03598     return -1; //not reached
03599 }
03600 
03607 static void execute_decode_slices(H264Context *h, int context_count){
03608     MpegEncContext * const s = &h->s;
03609     AVCodecContext * const avctx= s->avctx;
03610     H264Context *hx;
03611     int i;
03612 
03613     if (s->avctx->hwaccel)
03614         return;
03615     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03616         return;
03617     if(context_count == 1) {
03618         decode_slice(avctx, &h);
03619     } else {
03620         for(i = 1; i < context_count; i++) {
03621             hx = h->thread_context[i];
03622             hx->s.error_recognition = avctx->error_recognition;
03623             hx->s.error_count = 0;
03624         }
03625 
03626         avctx->execute(avctx, (void *)decode_slice,
03627                        h->thread_context, NULL, context_count, sizeof(void*));
03628 
03629         /* pull back stuff from slices to master context */
03630         hx = h->thread_context[context_count - 1];
03631         s->mb_x = hx->s.mb_x;
03632         s->mb_y = hx->s.mb_y;
03633         s->dropable = hx->s.dropable;
03634         s->picture_structure = hx->s.picture_structure;
03635         for(i = 1; i < context_count; i++)
03636             h->s.error_count += h->thread_context[i]->s.error_count;
03637     }
03638 }
03639 
03640 
03641 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03642     MpegEncContext * const s = &h->s;
03643     AVCodecContext * const avctx= s->avctx;
03644     H264Context *hx; 
03645     int buf_index;
03646     int context_count;
03647     int next_avc;
03648     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03649     int nals_needed=0; 
03650     int nal_index;
03651 
03652     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03653     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03654         h->current_slice = 0;
03655         if (!s->first_field)
03656             s->current_picture_ptr= NULL;
03657         ff_h264_reset_sei(h);
03658     }
03659 
03660     for(;pass <= 1;pass++){
03661         buf_index = 0;
03662         context_count = 0;
03663         next_avc = h->is_avc ? 0 : buf_size;
03664         nal_index = 0;
03665     for(;;){
03666         int consumed;
03667         int dst_length;
03668         int bit_length;
03669         const uint8_t *ptr;
03670         int i, nalsize = 0;
03671         int err;
03672 
03673         if(buf_index >= next_avc) {
03674             if(buf_index >= buf_size) break;
03675             nalsize = 0;
03676             for(i = 0; i < h->nal_length_size; i++)
03677                 nalsize = (nalsize << 8) | buf[buf_index++];
03678             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03679                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03680                 break;
03681             }
03682             next_avc= buf_index + nalsize;
03683         } else {
03684             // start code prefix search
03685             for(; buf_index + 3 < next_avc; buf_index++){
03686                 // This should always succeed in the first iteration.
03687                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03688                     break;
03689             }
03690 
03691             if(buf_index+3 >= buf_size) break;
03692 
03693             buf_index+=3;
03694             if(buf_index >= next_avc) continue;
03695         }
03696 
03697         hx = h->thread_context[context_count];
03698 
03699         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03700         if (ptr==NULL || dst_length < 0){
03701             return -1;
03702         }
03703         i= buf_index + consumed;
03704         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03705            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03706             s->workaround_bugs |= FF_BUG_TRUNCATED;
03707 
03708         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03709         while(ptr[dst_length - 1] == 0 && dst_length > 0)
03710             dst_length--;
03711         }
03712         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03713 
03714         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03715             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
03716         }
03717 
03718         if (h->is_avc && (nalsize != consumed) && nalsize){
03719             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03720         }
03721 
03722         buf_index += consumed;
03723         nal_index++;
03724 
03725         if(pass == 0) {
03726             // packets can sometimes contain multiple PPS/SPS
03727             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03728             // if so, when frame threading we can't start the next thread until we've read all of them
03729             switch (hx->nal_unit_type) {
03730                 case NAL_SPS:
03731                 case NAL_PPS:
03732                     nals_needed = nal_index;
03733                     break;
03734                 case NAL_IDR_SLICE:
03735                 case NAL_SLICE:
03736                     init_get_bits(&hx->s.gb, ptr, bit_length);
03737                     if (!get_ue_golomb(&hx->s.gb))
03738                         nals_needed = nal_index;
03739             }
03740             continue;
03741         }
03742 
03743         //FIXME do not discard SEI id
03744         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03745             continue;
03746 
03747       again:
03748         err = 0;
03749         switch(hx->nal_unit_type){
03750         case NAL_IDR_SLICE:
03751             if (h->nal_unit_type != NAL_IDR_SLICE) {
03752                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03753                 return -1;
03754             }
03755             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03756         case NAL_SLICE:
03757             init_get_bits(&hx->s.gb, ptr, bit_length);
03758             hx->intra_gb_ptr=
03759             hx->inter_gb_ptr= &hx->s.gb;
03760             hx->s.data_partitioning = 0;
03761 
03762             if((err = decode_slice_header(hx, h)))
03763                break;
03764 
03765             s->current_picture_ptr->key_frame |=
03766                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03767                     (h->sei_recovery_frame_cnt >= 0);
03768 
03769             if (h->current_slice == 1) {
03770                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03771                     decode_postinit(h, nal_index >= nals_needed);
03772                 }
03773 
03774                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03775                     return -1;
03776                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03777                     ff_vdpau_h264_picture_start(s);
03778             }
03779 
03780             if(hx->redundant_pic_count==0
03781                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03782                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03783                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03784                && avctx->skip_frame < AVDISCARD_ALL){
03785                 if(avctx->hwaccel) {
03786                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03787                         return -1;
03788                 }else
03789                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03790                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03791                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03792                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03793                 }else
03794                     context_count++;
03795             }
03796             break;
03797         case NAL_DPA:
03798             init_get_bits(&hx->s.gb, ptr, bit_length);
03799             hx->intra_gb_ptr=
03800             hx->inter_gb_ptr= NULL;
03801 
03802             if ((err = decode_slice_header(hx, h)) < 0)
03803                 break;
03804 
03805             hx->s.data_partitioning = 1;
03806 
03807             break;
03808         case NAL_DPB:
03809             init_get_bits(&hx->intra_gb, ptr, bit_length);
03810             hx->intra_gb_ptr= &hx->intra_gb;
03811             break;
03812         case NAL_DPC:
03813             init_get_bits(&hx->inter_gb, ptr, bit_length);
03814             hx->inter_gb_ptr= &hx->inter_gb;
03815 
03816             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03817                && s->context_initialized
03818                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03819                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03820                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03821                && avctx->skip_frame < AVDISCARD_ALL)
03822                 context_count++;
03823             break;
03824         case NAL_SEI:
03825             init_get_bits(&s->gb, ptr, bit_length);
03826             ff_h264_decode_sei(h);
03827             break;
03828         case NAL_SPS:
03829             init_get_bits(&s->gb, ptr, bit_length);
03830             ff_h264_decode_seq_parameter_set(h);
03831 
03832             if (s->flags& CODEC_FLAG_LOW_DELAY ||
03833                 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))
03834                 s->low_delay=1;
03835 
03836             if(avctx->has_b_frames < 2)
03837                 avctx->has_b_frames= !s->low_delay;
03838 
03839             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
03840                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
03841                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
03842                     h->pixel_shift = h->sps.bit_depth_luma > 8;
03843 
03844                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
03845                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
03846                     dsputil_init(&s->dsp, s->avctx);
03847                 } else {
03848                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
03849                     return -1;
03850                 }
03851             }
03852             break;
03853         case NAL_PPS:
03854             init_get_bits(&s->gb, ptr, bit_length);
03855 
03856             ff_h264_decode_picture_parameter_set(h, bit_length);
03857 
03858             break;
03859         case NAL_AUD:
03860         case NAL_END_SEQUENCE:
03861         case NAL_END_STREAM:
03862         case NAL_FILLER_DATA:
03863         case NAL_SPS_EXT:
03864         case NAL_AUXILIARY_SLICE:
03865             break;
03866         default:
03867             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
03868         }
03869 
03870         if(context_count == h->max_contexts) {
03871             execute_decode_slices(h, context_count);
03872             context_count = 0;
03873         }
03874 
03875         if (err < 0)
03876             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
03877         else if(err == 1) {
03878             /* Slice could not be decoded in parallel mode, copy down
03879              * NAL unit stuff to context 0 and restart. Note that
03880              * rbsp_buffer is not transferred, but since we no longer
03881              * run in parallel mode this should not be an issue. */
03882             h->nal_unit_type = hx->nal_unit_type;
03883             h->nal_ref_idc   = hx->nal_ref_idc;
03884             hx = h;
03885             goto again;
03886         }
03887     }
03888     }
03889     if(context_count)
03890         execute_decode_slices(h, context_count);
03891     return buf_index;
03892 }
03893 
03897 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
03898         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
03899         if(pos+10>buf_size) pos=buf_size; // oops ;)
03900 
03901         return pos;
03902 }
03903 
03904 static int decode_frame(AVCodecContext *avctx,
03905                              void *data, int *data_size,
03906                              AVPacket *avpkt)
03907 {
03908     const uint8_t *buf = avpkt->data;
03909     int buf_size = avpkt->size;
03910     H264Context *h = avctx->priv_data;
03911     MpegEncContext *s = &h->s;
03912     AVFrame *pict = data;
03913     int buf_index;
03914 
03915     s->flags= avctx->flags;
03916     s->flags2= avctx->flags2;
03917 
03918    /* end of stream, output what is still in the buffers */
03919  out:
03920     if (buf_size == 0) {
03921         Picture *out;
03922         int i, out_idx;
03923 
03924         s->current_picture_ptr = NULL;
03925 
03926 //FIXME factorize this with the output code below
03927         out = h->delayed_pic[0];
03928         out_idx = 0;
03929         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03930             if(h->delayed_pic[i]->poc < out->poc){
03931                 out = h->delayed_pic[i];
03932                 out_idx = i;
03933             }
03934 
03935         for(i=out_idx; h->delayed_pic[i]; i++)
03936             h->delayed_pic[i] = h->delayed_pic[i+1];
03937 
03938         if(out){
03939             *data_size = sizeof(AVFrame);
03940             *pict= *(AVFrame*)out;
03941         }
03942 
03943         return 0;
03944     }
03945 
03946     buf_index=decode_nal_units(h, buf, buf_size);
03947     if(buf_index < 0)
03948         return -1;
03949 
03950     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
03951         buf_size = 0;
03952         goto out;
03953     }
03954 
03955     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
03956         if (avctx->skip_frame >= AVDISCARD_NONREF)
03957             return 0;
03958         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
03959         return -1;
03960     }
03961 
03962     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
03963 
03964         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
03965 
03966         field_end(h, 0);
03967 
03968         if (!h->next_output_pic) {
03969             /* Wait for second field. */
03970             *data_size = 0;
03971 
03972         } else {
03973             *data_size = sizeof(AVFrame);
03974             *pict = *(AVFrame*)h->next_output_pic;
03975         }
03976     }
03977 
03978     assert(pict->data[0] || !*data_size);
03979     ff_print_debug_info(s, pict);
03980 //printf("out %d\n", (int)pict->data[0]);
03981 
03982     return get_consumed_bytes(s, buf_index, buf_size);
03983 }
03984 #if 0
03985 static inline void fill_mb_avail(H264Context *h){
03986     MpegEncContext * const s = &h->s;
03987     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03988 
03989     if(s->mb_y){
03990         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
03991         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
03992         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
03993     }else{
03994         h->mb_avail[0]=
03995         h->mb_avail[1]=
03996         h->mb_avail[2]= 0;
03997     }
03998     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
03999     h->mb_avail[4]= 1; //FIXME move out
04000     h->mb_avail[5]= 0; //FIXME move out
04001 }
04002 #endif
04003 
04004 #ifdef TEST
04005 #undef printf
04006 #undef random
04007 #define COUNT 8000
04008 #define SIZE (COUNT*40)
04009 int main(void){
04010     int i;
04011     uint8_t temp[SIZE];
04012     PutBitContext pb;
04013     GetBitContext gb;
04014 //    int int_temp[10000];
04015     DSPContext dsp;
04016     AVCodecContext avctx;
04017 
04018     dsputil_init(&dsp, &avctx);
04019 
04020     init_put_bits(&pb, temp, SIZE);
04021     printf("testing unsigned exp golomb\n");
04022     for(i=0; i<COUNT; i++){
04023         START_TIMER
04024         set_ue_golomb(&pb, i);
04025         STOP_TIMER("set_ue_golomb");
04026     }
04027     flush_put_bits(&pb);
04028 
04029     init_get_bits(&gb, temp, 8*SIZE);
04030     for(i=0; i<COUNT; i++){
04031         int j, s;
04032 
04033         s= show_bits(&gb, 24);
04034 
04035         START_TIMER
04036         j= get_ue_golomb(&gb);
04037         if(j != i){
04038             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04039 //            return -1;
04040         }
04041         STOP_TIMER("get_ue_golomb");
04042     }
04043 
04044 
04045     init_put_bits(&pb, temp, SIZE);
04046     printf("testing signed exp golomb\n");
04047     for(i=0; i<COUNT; i++){
04048         START_TIMER
04049         set_se_golomb(&pb, i - COUNT/2);
04050         STOP_TIMER("set_se_golomb");
04051     }
04052     flush_put_bits(&pb);
04053 
04054     init_get_bits(&gb, temp, 8*SIZE);
04055     for(i=0; i<COUNT; i++){
04056         int j, s;
04057 
04058         s= show_bits(&gb, 24);
04059 
04060         START_TIMER
04061         j= get_se_golomb(&gb);
04062         if(j != i - COUNT/2){
04063             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04064 //            return -1;
04065         }
04066         STOP_TIMER("get_se_golomb");
04067     }
04068 
04069 #if 0
04070     printf("testing 4x4 (I)DCT\n");
04071 
04072     DCTELEM block[16];
04073     uint8_t src[16], ref[16];
04074     uint64_t error= 0, max_error=0;
04075 
04076     for(i=0; i<COUNT; i++){
04077         int j;
04078 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
04079         for(j=0; j<16; j++){
04080             ref[j]= random()%255;
04081             src[j]= random()%255;
04082         }
04083 
04084         h264_diff_dct_c(block, src, ref, 4);
04085 
04086         //normalize
04087         for(j=0; j<16; j++){
04088 //            printf("%d ", block[j]);
04089             block[j]= block[j]*4;
04090             if(j&1) block[j]= (block[j]*4 + 2)/5;
04091             if(j&4) block[j]= (block[j]*4 + 2)/5;
04092         }
04093 //        printf("\n");
04094 
04095         h->h264dsp.h264_idct_add(ref, block, 4);
04096 /*        for(j=0; j<16; j++){
04097             printf("%d ", ref[j]);
04098         }
04099         printf("\n");*/
04100 
04101         for(j=0; j<16; j++){
04102             int diff= FFABS(src[j] - ref[j]);
04103 
04104             error+= diff*diff;
04105             max_error= FFMAX(max_error, diff);
04106         }
04107     }
04108     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
04109     printf("testing quantizer\n");
04110     for(qp=0; qp<52; qp++){
04111         for(i=0; i<16; i++)
04112             src1_block[i]= src2_block[i]= random()%255;
04113 
04114     }
04115     printf("Testing NAL layer\n");
04116 
04117     uint8_t bitstream[COUNT];
04118     uint8_t nal[COUNT*2];
04119     H264Context h;
04120     memset(&h, 0, sizeof(H264Context));
04121 
04122     for(i=0; i<COUNT; i++){
04123         int zeros= i;
04124         int nal_length;
04125         int consumed;
04126         int out_length;
04127         uint8_t *out;
04128         int j;
04129 
04130         for(j=0; j<COUNT; j++){
04131             bitstream[j]= (random() % 255) + 1;
04132         }
04133 
04134         for(j=0; j<zeros; j++){
04135             int pos= random() % COUNT;
04136             while(bitstream[pos] == 0){
04137                 pos++;
04138                 pos %= COUNT;
04139             }
04140             bitstream[pos]=0;
04141         }
04142 
04143         START_TIMER
04144 
04145         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
04146         if(nal_length<0){
04147             printf("encoding failed\n");
04148             return -1;
04149         }
04150 
04151         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
04152 
04153         STOP_TIMER("NAL")
04154 
04155         if(out_length != COUNT){
04156             printf("incorrect length %d %d\n", out_length, COUNT);
04157             return -1;
04158         }
04159 
04160         if(consumed != nal_length){
04161             printf("incorrect consumed length %d %d\n", nal_length, consumed);
04162             return -1;
04163         }
04164 
04165         if(memcmp(bitstream, out, COUNT)){
04166             printf("mismatch\n");
04167             return -1;
04168         }
04169     }
04170 #endif
04171 
04172     printf("Testing RBSP\n");
04173 
04174 
04175     return 0;
04176 }
04177 #endif /* TEST */
04178 
04179 
04180 av_cold void ff_h264_free_context(H264Context *h)
04181 {
04182     int i;
04183 
04184     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04185 
04186     for(i = 0; i < MAX_SPS_COUNT; i++)
04187         av_freep(h->sps_buffers + i);
04188 
04189     for(i = 0; i < MAX_PPS_COUNT; i++)
04190         av_freep(h->pps_buffers + i);
04191 }
04192 
04193 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04194 {
04195     H264Context *h = avctx->priv_data;
04196     MpegEncContext *s = &h->s;
04197 
04198     ff_h264_free_context(h);
04199 
04200     MPV_common_end(s);
04201 
04202 //    memset(h, 0, sizeof(H264Context));
04203 
04204     return 0;
04205 }
04206 
04207 static const AVProfile profiles[] = {
04208     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04209     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04210     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04211     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04212     { FF_PROFILE_H264_HIGH,                 "High"                  },
04213     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04214     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04215     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04216     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04217     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04218     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04219     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04220     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04221     { FF_PROFILE_UNKNOWN },
04222 };
04223 
04224 AVCodec ff_h264_decoder = {
04225     "h264",
04226     AVMEDIA_TYPE_VIDEO,
04227     CODEC_ID_H264,
04228     sizeof(H264Context),
04229     ff_h264_decode_init,
04230     NULL,
04231     ff_h264_decode_end,
04232     decode_frame,
04233     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04234         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04235     .flush= flush_dpb,
04236     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04237     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04238     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04239     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04240 };
04241 
04242 #if CONFIG_H264_VDPAU_DECODER
04243 AVCodec ff_h264_vdpau_decoder = {
04244     "h264_vdpau",
04245     AVMEDIA_TYPE_VIDEO,
04246     CODEC_ID_H264,
04247     sizeof(H264Context),
04248     ff_h264_decode_init,
04249     NULL,
04250     ff_h264_decode_end,
04251     decode_frame,
04252     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04253     .flush= flush_dpb,
04254     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04255     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04256     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04257 };
04258 #endif