Libav 0.7.1
|
00001 /* 00002 * H.26L/H.264/AVC/JVT/14496-10/... decoder 00003 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00028 #include "libavutil/imgutils.h" 00029 #include "internal.h" 00030 #include "dsputil.h" 00031 #include "avcodec.h" 00032 #include "mpegvideo.h" 00033 #include "h264.h" 00034 #include "h264data.h" 00035 #include "h264_mvpred.h" 00036 #include "golomb.h" 00037 #include "mathops.h" 00038 #include "rectangle.h" 00039 #include "thread.h" 00040 #include "vdpau_internal.h" 00041 #include "libavutil/avassert.h" 00042 00043 #include "cabac.h" 00044 00045 //#undef NDEBUG 00046 #include <assert.h> 00047 00048 static const uint8_t rem6[QP_MAX_NUM+1]={ 00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 00050 }; 00051 00052 static const uint8_t div6[QP_MAX_NUM+1]={ 00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10, 00054 }; 00055 00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = { 00057 PIX_FMT_DXVA2_VLD, 00058 PIX_FMT_VAAPI_VLD, 00059 PIX_FMT_YUVJ420P, 00060 PIX_FMT_NONE 00061 }; 00062 00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){ 00064 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; 00065 00066 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4); 00067 mode[4]= h->intra4x4_pred_mode_cache[7+8*3]; 00068 mode[5]= h->intra4x4_pred_mode_cache[7+8*2]; 00069 mode[6]= h->intra4x4_pred_mode_cache[7+8*1]; 00070 } 00071 00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){ 00076 MpegEncContext * const s = &h->s; 00077 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; 00078 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; 00079 int i; 00080 00081 if(!(h->top_samples_available&0x8000)){ 00082 for(i=0; i<4; i++){ 00083 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; 00084 if(status<0){ 00085 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 00086 return -1; 00087 } else if(status){ 00088 h->intra4x4_pred_mode_cache[scan8[0] + i]= status; 00089 } 00090 } 00091 } 00092 00093 if((h->left_samples_available&0x8888)!=0x8888){ 00094 static const int mask[4]={0x8000,0x2000,0x80,0x20}; 00095 for(i=0; i<4; i++){ 00096 if(!(h->left_samples_available&mask[i])){ 00097 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; 00098 if(status<0){ 00099 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 00100 return -1; 00101 } else if(status){ 00102 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; 00103 } 00104 } 00105 } 00106 } 00107 00108 return 0; 00109 } //FIXME cleanup like ff_h264_check_intra_pred_mode 00110 00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){ 00115 MpegEncContext * const s = &h->s; 00116 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; 00117 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; 00118 00119 if(mode > 6U) { 00120 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); 00121 return -1; 00122 } 00123 00124 if(!(h->top_samples_available&0x8000)){ 00125 mode= top[ mode ]; 00126 if(mode<0){ 00127 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 00128 return -1; 00129 } 00130 } 00131 00132 if((h->left_samples_available&0x8080) != 0x8080){ 00133 mode= left[ mode ]; 00134 if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred 00135 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); 00136 } 00137 if(mode<0){ 00138 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 00139 return -1; 00140 } 00141 } 00142 00143 return mode; 00144 } 00145 00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ 00147 int i, si, di; 00148 uint8_t *dst; 00149 int bufidx; 00150 00151 // src[0]&0x80; //forbidden bit 00152 h->nal_ref_idc= src[0]>>5; 00153 h->nal_unit_type= src[0]&0x1F; 00154 00155 src++; length--; 00156 00157 #if HAVE_FAST_UNALIGNED 00158 # if HAVE_FAST_64BIT 00159 # define RS 7 00160 for(i=0; i+1<length; i+=9){ 00161 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) 00162 # else 00163 # define RS 3 00164 for(i=0; i+1<length; i+=5){ 00165 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) 00166 # endif 00167 continue; 00168 if(i>0 && !src[i]) i--; 00169 while(src[i]) i++; 00170 #else 00171 # define RS 0 00172 for(i=0; i+1<length; i+=2){ 00173 if(src[i]) continue; 00174 if(i>0 && src[i-1]==0) i--; 00175 #endif 00176 if(i+2<length && src[i+1]==0 && src[i+2]<=3){ 00177 if(src[i+2]!=3){ 00178 /* startcode, so we must be past the end */ 00179 length=i; 00180 } 00181 break; 00182 } 00183 i-= RS; 00184 } 00185 00186 if(i>=length-1){ //no escaped 0 00187 *dst_length= length; 00188 *consumed= length+1; //+1 for the header 00189 return src; 00190 } 00191 00192 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data 00193 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); 00194 dst= h->rbsp_buffer[bufidx]; 00195 00196 if (dst == NULL){ 00197 return NULL; 00198 } 00199 00200 //printf("decoding esc\n"); 00201 memcpy(dst, src, i); 00202 si=di=i; 00203 while(si+2<length){ 00204 //remove escapes (very rare 1:2^22) 00205 if(src[si+2]>3){ 00206 dst[di++]= src[si++]; 00207 dst[di++]= src[si++]; 00208 }else if(src[si]==0 && src[si+1]==0){ 00209 if(src[si+2]==3){ //escape 00210 dst[di++]= 0; 00211 dst[di++]= 0; 00212 si+=3; 00213 continue; 00214 }else //next start code 00215 goto nsc; 00216 } 00217 00218 dst[di++]= src[si++]; 00219 } 00220 while(si<length) 00221 dst[di++]= src[si++]; 00222 nsc: 00223 00224 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); 00225 00226 *dst_length= di; 00227 *consumed= si + 1;//+1 for the header 00228 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding) 00229 return dst; 00230 } 00231 00236 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ 00237 int v= *src; 00238 int r; 00239 00240 tprintf(h->s.avctx, "rbsp trailing %X\n", v); 00241 00242 for(r=1; r<9; r++){ 00243 if(v&1) return r; 00244 v>>=1; 00245 } 00246 return 0; 00247 } 00248 00249 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height, 00250 int y_offset, int list){ 00251 int raw_my= h->mv_cache[list][ scan8[n] ][1]; 00252 int filter_height= (raw_my&3) ? 2 : 0; 00253 int full_my= (raw_my>>2) + y_offset; 00254 int top = full_my - filter_height, bottom = full_my + height + filter_height; 00255 00256 return FFMAX(abs(top), bottom); 00257 } 00258 00259 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height, 00260 int y_offset, int list0, int list1, int *nrefs){ 00261 MpegEncContext * const s = &h->s; 00262 int my; 00263 00264 y_offset += 16*(s->mb_y >> MB_FIELD); 00265 00266 if(list0){ 00267 int ref_n = h->ref_cache[0][ scan8[n] ]; 00268 Picture *ref= &h->ref_list[0][ref_n]; 00269 00270 // Error resilience puts the current picture in the ref list. 00271 // Don't try to wait on these as it will cause a deadlock. 00272 // Fields can wait on each other, though. 00273 if(ref->thread_opaque != s->current_picture.thread_opaque || 00274 (ref->reference&3) != s->picture_structure) { 00275 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0); 00276 if (refs[0][ref_n] < 0) nrefs[0] += 1; 00277 refs[0][ref_n] = FFMAX(refs[0][ref_n], my); 00278 } 00279 } 00280 00281 if(list1){ 00282 int ref_n = h->ref_cache[1][ scan8[n] ]; 00283 Picture *ref= &h->ref_list[1][ref_n]; 00284 00285 if(ref->thread_opaque != s->current_picture.thread_opaque || 00286 (ref->reference&3) != s->picture_structure) { 00287 my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1); 00288 if (refs[1][ref_n] < 0) nrefs[1] += 1; 00289 refs[1][ref_n] = FFMAX(refs[1][ref_n], my); 00290 } 00291 } 00292 } 00293 00299 static void await_references(H264Context *h){ 00300 MpegEncContext * const s = &h->s; 00301 const int mb_xy= h->mb_xy; 00302 const int mb_type= s->current_picture.mb_type[mb_xy]; 00303 int refs[2][48]; 00304 int nrefs[2] = {0}; 00305 int ref, list; 00306 00307 memset(refs, -1, sizeof(refs)); 00308 00309 if(IS_16X16(mb_type)){ 00310 get_lowest_part_y(h, refs, 0, 16, 0, 00311 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00312 }else if(IS_16X8(mb_type)){ 00313 get_lowest_part_y(h, refs, 0, 8, 0, 00314 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00315 get_lowest_part_y(h, refs, 8, 8, 8, 00316 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); 00317 }else if(IS_8X16(mb_type)){ 00318 get_lowest_part_y(h, refs, 0, 16, 0, 00319 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); 00320 get_lowest_part_y(h, refs, 4, 16, 0, 00321 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); 00322 }else{ 00323 int i; 00324 00325 assert(IS_8X8(mb_type)); 00326 00327 for(i=0; i<4; i++){ 00328 const int sub_mb_type= h->sub_mb_type[i]; 00329 const int n= 4*i; 00330 int y_offset= (i&2)<<2; 00331 00332 if(IS_SUB_8X8(sub_mb_type)){ 00333 get_lowest_part_y(h, refs, n , 8, y_offset, 00334 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00335 }else if(IS_SUB_8X4(sub_mb_type)){ 00336 get_lowest_part_y(h, refs, n , 4, y_offset, 00337 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00338 get_lowest_part_y(h, refs, n+2, 4, y_offset+4, 00339 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00340 }else if(IS_SUB_4X8(sub_mb_type)){ 00341 get_lowest_part_y(h, refs, n , 8, y_offset, 00342 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00343 get_lowest_part_y(h, refs, n+1, 8, y_offset, 00344 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00345 }else{ 00346 int j; 00347 assert(IS_SUB_4X4(sub_mb_type)); 00348 for(j=0; j<4; j++){ 00349 int sub_y_offset= y_offset + 2*(j&2); 00350 get_lowest_part_y(h, refs, n+j, 4, sub_y_offset, 00351 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs); 00352 } 00353 } 00354 } 00355 } 00356 00357 for(list=h->list_count-1; list>=0; list--){ 00358 for(ref=0; ref<48 && nrefs[list]; ref++){ 00359 int row = refs[list][ref]; 00360 if(row >= 0){ 00361 Picture *ref_pic = &h->ref_list[list][ref]; 00362 int ref_field = ref_pic->reference - 1; 00363 int ref_field_picture = ref_pic->field_picture; 00364 int pic_height = 16*s->mb_height >> ref_field_picture; 00365 00366 row <<= MB_MBAFF; 00367 nrefs[list]--; 00368 00369 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields 00370 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1); 00371 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0); 00372 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame 00373 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0); 00374 }else if(FIELD_PICTURE){ 00375 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field); 00376 }else{ 00377 ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0); 00378 } 00379 } 00380 } 00381 } 00382 } 00383 00384 #if 0 00385 00389 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ 00390 // const int qmul= dequant_coeff[qp][0]; 00391 int i; 00392 int temp[16]; //FIXME check if this is a good idea 00393 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 00394 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 00395 00396 for(i=0; i<4; i++){ 00397 const int offset= y_offset[i]; 00398 const int z0= block[offset+stride*0] + block[offset+stride*4]; 00399 const int z1= block[offset+stride*0] - block[offset+stride*4]; 00400 const int z2= block[offset+stride*1] - block[offset+stride*5]; 00401 const int z3= block[offset+stride*1] + block[offset+stride*5]; 00402 00403 temp[4*i+0]= z0+z3; 00404 temp[4*i+1]= z1+z2; 00405 temp[4*i+2]= z1-z2; 00406 temp[4*i+3]= z0-z3; 00407 } 00408 00409 for(i=0; i<4; i++){ 00410 const int offset= x_offset[i]; 00411 const int z0= temp[4*0+i] + temp[4*2+i]; 00412 const int z1= temp[4*0+i] - temp[4*2+i]; 00413 const int z2= temp[4*1+i] - temp[4*3+i]; 00414 const int z3= temp[4*1+i] + temp[4*3+i]; 00415 00416 block[stride*0 +offset]= (z0 + z3)>>1; 00417 block[stride*2 +offset]= (z1 + z2)>>1; 00418 block[stride*8 +offset]= (z1 - z2)>>1; 00419 block[stride*10+offset]= (z0 - z3)>>1; 00420 } 00421 } 00422 #endif 00423 00424 #undef xStride 00425 #undef stride 00426 00427 #if 0 00428 static void chroma_dc_dct_c(DCTELEM *block){ 00429 const int stride= 16*2; 00430 const int xStride= 16; 00431 int a,b,c,d,e; 00432 00433 a= block[stride*0 + xStride*0]; 00434 b= block[stride*0 + xStride*1]; 00435 c= block[stride*1 + xStride*0]; 00436 d= block[stride*1 + xStride*1]; 00437 00438 e= a-b; 00439 a= a+b; 00440 b= c-d; 00441 c= c+d; 00442 00443 block[stride*0 + xStride*0]= (a+c); 00444 block[stride*0 + xStride*1]= (e+b); 00445 block[stride*1 + xStride*0]= (a-c); 00446 block[stride*1 + xStride*1]= (e-b); 00447 } 00448 #endif 00449 00450 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, 00451 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00452 int src_x_offset, int src_y_offset, 00453 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, 00454 int pixel_shift, int chroma444){ 00455 MpegEncContext * const s = &h->s; 00456 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; 00457 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; 00458 const int luma_xy= (mx&3) + ((my&3)<<2); 00459 int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize; 00460 uint8_t * src_y = pic->data[0] + offset; 00461 uint8_t * src_cb, * src_cr; 00462 int extra_width= h->emu_edge_width; 00463 int extra_height= h->emu_edge_height; 00464 int emu=0; 00465 const int full_mx= mx>>2; 00466 const int full_my= my>>2; 00467 const int pic_width = 16*s->mb_width; 00468 const int pic_height = 16*s->mb_height >> MB_FIELD; 00469 00470 if(mx&7) extra_width -= 3; 00471 if(my&7) extra_height -= 3; 00472 00473 if( full_mx < 0-extra_width 00474 || full_my < 0-extra_height 00475 || full_mx + 16/*FIXME*/ > pic_width + extra_width 00476 || full_my + 16/*FIXME*/ > pic_height + extra_height){ 00477 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00478 src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00479 emu=1; 00480 } 00481 00482 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? 00483 if(!square){ 00484 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); 00485 } 00486 00487 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; 00488 00489 if(chroma444){ 00490 src_cb = pic->data[1] + offset; 00491 if(emu){ 00492 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 00493 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00494 src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00495 } 00496 qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps? 00497 if(!square){ 00498 qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize); 00499 } 00500 00501 src_cr = pic->data[2] + offset; 00502 if(emu){ 00503 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 00504 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 00505 src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; 00506 } 00507 qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps? 00508 if(!square){ 00509 qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize); 00510 } 00511 return; 00512 } 00513 00514 if(MB_FIELD){ 00515 // chroma offset when predicting from a field of opposite parity 00516 my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); 00517 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); 00518 } 00519 src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; 00520 src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize; 00521 00522 if(emu){ 00523 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 00524 src_cb= s->edge_emu_buffer; 00525 } 00526 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); 00527 00528 if(emu){ 00529 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 00530 src_cr= s->edge_emu_buffer; 00531 } 00532 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); 00533 } 00534 00535 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, 00536 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00537 int x_offset, int y_offset, 00538 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00539 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 00540 int list0, int list1, int pixel_shift, int chroma444){ 00541 MpegEncContext * const s = &h->s; 00542 qpel_mc_func *qpix_op= qpix_put; 00543 h264_chroma_mc_func chroma_op= chroma_put; 00544 00545 dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00546 if(chroma444){ 00547 dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00548 dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00549 }else{ 00550 dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00551 dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00552 } 00553 x_offset += 8*s->mb_x; 00554 y_offset += 8*(s->mb_y >> MB_FIELD); 00555 00556 if(list0){ 00557 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; 00558 mc_dir_part(h, ref, n, square, chroma_height, delta, 0, 00559 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00560 qpix_op, chroma_op, pixel_shift, chroma444); 00561 00562 qpix_op= qpix_avg; 00563 chroma_op= chroma_avg; 00564 } 00565 00566 if(list1){ 00567 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; 00568 mc_dir_part(h, ref, n, square, chroma_height, delta, 1, 00569 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00570 qpix_op, chroma_op, pixel_shift, chroma444); 00571 } 00572 } 00573 00574 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, 00575 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00576 int x_offset, int y_offset, 00577 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00578 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, 00579 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, 00580 int list0, int list1, int pixel_shift, int chroma444){ 00581 MpegEncContext * const s = &h->s; 00582 00583 dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00584 if(chroma444){ 00585 chroma_weight_avg = luma_weight_avg; 00586 chroma_weight_op = luma_weight_op; 00587 dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00588 dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; 00589 }else{ 00590 dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00591 dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; 00592 } 00593 x_offset += 8*s->mb_x; 00594 y_offset += 8*(s->mb_y >> MB_FIELD); 00595 00596 if(list0 && list1){ 00597 /* don't optimize for luma-only case, since B-frames usually 00598 * use implicit weights => chroma too. */ 00599 uint8_t *tmp_cb = s->obmc_scratchpad; 00600 uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift); 00601 uint8_t *tmp_y = s->obmc_scratchpad + 16*h->mb_uvlinesize; 00602 int refn0 = h->ref_cache[0][ scan8[n] ]; 00603 int refn1 = h->ref_cache[1][ scan8[n] ]; 00604 00605 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, 00606 dest_y, dest_cb, dest_cr, 00607 x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); 00608 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, 00609 tmp_y, tmp_cb, tmp_cr, 00610 x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); 00611 00612 if(h->use_weight == 2){ 00613 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; 00614 int weight1 = 64 - weight0; 00615 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); 00616 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); 00617 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); 00618 }else{ 00619 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, 00620 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], 00621 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); 00622 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00623 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], 00624 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); 00625 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00626 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], 00627 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); 00628 } 00629 }else{ 00630 int list = list1 ? 1 : 0; 00631 int refn = h->ref_cache[list][ scan8[n] ]; 00632 Picture *ref= &h->ref_list[list][refn]; 00633 mc_dir_part(h, ref, n, square, chroma_height, delta, list, 00634 dest_y, dest_cb, dest_cr, x_offset, y_offset, 00635 qpix_put, chroma_put, pixel_shift, chroma444); 00636 00637 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, 00638 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); 00639 if(h->use_weight_chroma){ 00640 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00641 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); 00642 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 00643 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); 00644 } 00645 } 00646 } 00647 00648 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, 00649 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00650 int x_offset, int y_offset, 00651 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 00652 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 00653 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 00654 int list0, int list1, int pixel_shift, int chroma444){ 00655 if((h->use_weight==2 && list0 && list1 00656 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) 00657 || h->use_weight==1) 00658 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 00659 x_offset, y_offset, qpix_put, chroma_put, 00660 weight_op[0], weight_op[3], weight_avg[0], 00661 weight_avg[3], list0, list1, pixel_shift, chroma444); 00662 else 00663 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 00664 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, 00665 chroma_avg, list0, list1, pixel_shift, chroma444); 00666 } 00667 00668 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){ 00669 /* fetch pixels for estimated mv 4 macroblocks ahead 00670 * optimized for 64byte cache lines */ 00671 MpegEncContext * const s = &h->s; 00672 const int refn = h->ref_cache[list][scan8[0]]; 00673 if(refn >= 0){ 00674 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; 00675 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; 00676 uint8_t **src= h->ref_list[list][refn].data; 00677 int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); 00678 s->dsp.prefetch(src[0]+off, s->linesize, 4); 00679 if(chroma444){ 00680 s->dsp.prefetch(src[1]+off, s->linesize, 4); 00681 s->dsp.prefetch(src[2]+off, s->linesize, 4); 00682 }else{ 00683 off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift); 00684 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 00685 } 00686 } 00687 } 00688 00689 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 00690 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 00691 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 00692 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 00693 int pixel_shift, int chroma444){ 00694 MpegEncContext * const s = &h->s; 00695 const int mb_xy= h->mb_xy; 00696 const int mb_type= s->current_picture.mb_type[mb_xy]; 00697 00698 assert(IS_INTER(mb_type)); 00699 00700 if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) 00701 await_references(h); 00702 prefetch_motion(h, 0, pixel_shift, chroma444); 00703 00704 if(IS_16X16(mb_type)){ 00705 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 00706 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 00707 weight_op, weight_avg, 00708 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00709 pixel_shift, chroma444); 00710 }else if(IS_16X8(mb_type)){ 00711 mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, 00712 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 00713 &weight_op[1], &weight_avg[1], 00714 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00715 pixel_shift, chroma444); 00716 mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, 00717 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 00718 &weight_op[1], &weight_avg[1], 00719 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 00720 pixel_shift, chroma444); 00721 }else if(IS_8X16(mb_type)){ 00722 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, 00723 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00724 &weight_op[2], &weight_avg[2], 00725 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 00726 pixel_shift, chroma444); 00727 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, 00728 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00729 &weight_op[2], &weight_avg[2], 00730 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 00731 pixel_shift, chroma444); 00732 }else{ 00733 int i; 00734 00735 assert(IS_8X8(mb_type)); 00736 00737 for(i=0; i<4; i++){ 00738 const int sub_mb_type= h->sub_mb_type[i]; 00739 const int n= 4*i; 00740 int x_offset= (i&1)<<2; 00741 int y_offset= (i&2)<<1; 00742 00743 if(IS_SUB_8X8(sub_mb_type)){ 00744 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00745 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 00746 &weight_op[3], &weight_avg[3], 00747 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00748 pixel_shift, chroma444); 00749 }else if(IS_SUB_8X4(sub_mb_type)){ 00750 mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00751 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 00752 &weight_op[4], &weight_avg[4], 00753 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00754 pixel_shift, chroma444); 00755 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, 00756 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 00757 &weight_op[4], &weight_avg[4], 00758 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00759 pixel_shift, chroma444); 00760 }else if(IS_SUB_4X8(sub_mb_type)){ 00761 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, 00762 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00763 &weight_op[5], &weight_avg[5], 00764 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00765 pixel_shift, chroma444); 00766 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, 00767 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00768 &weight_op[5], &weight_avg[5], 00769 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00770 pixel_shift, chroma444); 00771 }else{ 00772 int j; 00773 assert(IS_SUB_4X4(sub_mb_type)); 00774 for(j=0; j<4; j++){ 00775 int sub_x_offset= x_offset + 2*(j&1); 00776 int sub_y_offset= y_offset + (j&2); 00777 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, 00778 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 00779 &weight_op[6], &weight_avg[6], 00780 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 00781 pixel_shift, chroma444); 00782 } 00783 } 00784 } 00785 } 00786 00787 prefetch_motion(h, 1, pixel_shift, chroma444); 00788 } 00789 00790 #define hl_motion_fn(sh, bits) \ 00791 static av_always_inline void hl_motion_ ## bits(H264Context *h, \ 00792 uint8_t *dest_y, \ 00793 uint8_t *dest_cb, uint8_t *dest_cr, \ 00794 qpel_mc_func (*qpix_put)[16], \ 00795 h264_chroma_mc_func (*chroma_put), \ 00796 qpel_mc_func (*qpix_avg)[16], \ 00797 h264_chroma_mc_func (*chroma_avg), \ 00798 h264_weight_func *weight_op, \ 00799 h264_biweight_func *weight_avg, \ 00800 int chroma444) \ 00801 { \ 00802 hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \ 00803 qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \ 00804 } 00805 hl_motion_fn(0, 8); 00806 hl_motion_fn(1, 16); 00807 00808 static void free_tables(H264Context *h, int free_rbsp){ 00809 int i; 00810 H264Context *hx; 00811 00812 av_freep(&h->intra4x4_pred_mode); 00813 av_freep(&h->chroma_pred_mode_table); 00814 av_freep(&h->cbp_table); 00815 av_freep(&h->mvd_table[0]); 00816 av_freep(&h->mvd_table[1]); 00817 av_freep(&h->direct_table); 00818 av_freep(&h->non_zero_count); 00819 av_freep(&h->slice_table_base); 00820 h->slice_table= NULL; 00821 av_freep(&h->list_counts); 00822 00823 av_freep(&h->mb2b_xy); 00824 av_freep(&h->mb2br_xy); 00825 00826 for(i = 0; i < MAX_THREADS; i++) { 00827 hx = h->thread_context[i]; 00828 if(!hx) continue; 00829 av_freep(&hx->top_borders[1]); 00830 av_freep(&hx->top_borders[0]); 00831 av_freep(&hx->s.obmc_scratchpad); 00832 if (free_rbsp){ 00833 av_freep(&hx->rbsp_buffer[1]); 00834 av_freep(&hx->rbsp_buffer[0]); 00835 hx->rbsp_buffer_size[0] = 0; 00836 hx->rbsp_buffer_size[1] = 0; 00837 } 00838 if (i) av_freep(&h->thread_context[i]); 00839 } 00840 } 00841 00842 static void init_dequant8_coeff_table(H264Context *h){ 00843 int i,j,q,x; 00844 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); 00845 00846 for(i=0; i<6; i++ ){ 00847 h->dequant8_coeff[i] = h->dequant8_buffer[i]; 00848 for(j=0; j<i; j++){ 00849 if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){ 00850 h->dequant8_coeff[i] = h->dequant8_buffer[j]; 00851 break; 00852 } 00853 } 00854 if(j<i) 00855 continue; 00856 00857 for(q=0; q<max_qp+1; q++){ 00858 int shift = div6[q]; 00859 int idx = rem6[q]; 00860 for(x=0; x<64; x++) 00861 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] = 00862 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * 00863 h->pps.scaling_matrix8[i][x]) << shift; 00864 } 00865 } 00866 } 00867 00868 static void init_dequant4_coeff_table(H264Context *h){ 00869 int i,j,q,x; 00870 const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8); 00871 for(i=0; i<6; i++ ){ 00872 h->dequant4_coeff[i] = h->dequant4_buffer[i]; 00873 for(j=0; j<i; j++){ 00874 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ 00875 h->dequant4_coeff[i] = h->dequant4_buffer[j]; 00876 break; 00877 } 00878 } 00879 if(j<i) 00880 continue; 00881 00882 for(q=0; q<max_qp+1; q++){ 00883 int shift = div6[q] + 2; 00884 int idx = rem6[q]; 00885 for(x=0; x<16; x++) 00886 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] = 00887 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * 00888 h->pps.scaling_matrix4[i][x]) << shift; 00889 } 00890 } 00891 } 00892 00893 static void init_dequant_tables(H264Context *h){ 00894 int i,x; 00895 init_dequant4_coeff_table(h); 00896 if(h->pps.transform_8x8_mode) 00897 init_dequant8_coeff_table(h); 00898 if(h->sps.transform_bypass){ 00899 for(i=0; i<6; i++) 00900 for(x=0; x<16; x++) 00901 h->dequant4_coeff[i][0][x] = 1<<6; 00902 if(h->pps.transform_8x8_mode) 00903 for(i=0; i<6; i++) 00904 for(x=0; x<64; x++) 00905 h->dequant8_coeff[i][0][x] = 1<<6; 00906 } 00907 } 00908 00909 00910 int ff_h264_alloc_tables(H264Context *h){ 00911 MpegEncContext * const s = &h->s; 00912 const int big_mb_num= s->mb_stride * (s->mb_height+1); 00913 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count; 00914 int x,y; 00915 00916 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) 00917 00918 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 48 * sizeof(uint8_t), fail) 00919 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) 00920 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) 00921 00922 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail) 00923 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail); 00924 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail); 00925 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail); 00926 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail) 00927 00928 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); 00929 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; 00930 00931 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); 00932 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); 00933 for(y=0; y<s->mb_height; y++){ 00934 for(x=0; x<s->mb_width; x++){ 00935 const int mb_xy= x + y*s->mb_stride; 00936 const int b_xy = 4*x + 4*y*h->b_stride; 00937 00938 h->mb2b_xy [mb_xy]= b_xy; 00939 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); 00940 } 00941 } 00942 00943 s->obmc_scratchpad = NULL; 00944 00945 if(!h->dequant4_coeff[0]) 00946 init_dequant_tables(h); 00947 00948 return 0; 00949 fail: 00950 free_tables(h, 1); 00951 return -1; 00952 } 00953 00957 static void clone_tables(H264Context *dst, H264Context *src, int i){ 00958 MpegEncContext * const s = &src->s; 00959 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride; 00960 dst->non_zero_count = src->non_zero_count; 00961 dst->slice_table = src->slice_table; 00962 dst->cbp_table = src->cbp_table; 00963 dst->mb2b_xy = src->mb2b_xy; 00964 dst->mb2br_xy = src->mb2br_xy; 00965 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; 00966 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride; 00967 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride; 00968 dst->direct_table = src->direct_table; 00969 dst->list_counts = src->list_counts; 00970 00971 dst->s.obmc_scratchpad = NULL; 00972 ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); 00973 } 00974 00979 static int context_init(H264Context *h){ 00980 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) 00981 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail) 00982 00983 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = 00984 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; 00985 00986 return 0; 00987 fail: 00988 return -1; // free_tables will clean up for us 00989 } 00990 00991 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size); 00992 00993 static av_cold void common_init(H264Context *h){ 00994 MpegEncContext * const s = &h->s; 00995 00996 s->width = s->avctx->width; 00997 s->height = s->avctx->height; 00998 s->codec_id= s->avctx->codec->id; 00999 01000 ff_h264dsp_init(&h->h264dsp, 8); 01001 ff_h264_pred_init(&h->hpc, s->codec_id, 8); 01002 01003 h->dequant_coeff_pps= -1; 01004 s->unrestricted_mv=1; 01005 s->decode=1; //FIXME 01006 01007 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early 01008 01009 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); 01010 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); 01011 } 01012 01013 int ff_h264_decode_extradata(H264Context *h) 01014 { 01015 AVCodecContext *avctx = h->s.avctx; 01016 01017 if(avctx->extradata[0] == 1){ 01018 int i, cnt, nalsize; 01019 unsigned char *p = avctx->extradata; 01020 01021 h->is_avc = 1; 01022 01023 if(avctx->extradata_size < 7) { 01024 av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); 01025 return -1; 01026 } 01027 /* sps and pps in the avcC always have length coded with 2 bytes, 01028 so put a fake nal_length_size = 2 while parsing them */ 01029 h->nal_length_size = 2; 01030 // Decode sps from avcC 01031 cnt = *(p+5) & 0x1f; // Number of sps 01032 p += 6; 01033 for (i = 0; i < cnt; i++) { 01034 nalsize = AV_RB16(p) + 2; 01035 if (p - avctx->extradata + nalsize > avctx->extradata_size) 01036 return -1; 01037 if(decode_nal_units(h, p, nalsize) < 0) { 01038 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); 01039 return -1; 01040 } 01041 p += nalsize; 01042 } 01043 // Decode pps from avcC 01044 cnt = *(p++); // Number of pps 01045 for (i = 0; i < cnt; i++) { 01046 nalsize = AV_RB16(p) + 2; 01047 if (p - avctx->extradata + nalsize > avctx->extradata_size) 01048 return -1; 01049 if (decode_nal_units(h, p, nalsize) < 0) { 01050 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); 01051 return -1; 01052 } 01053 p += nalsize; 01054 } 01055 // Now store right nal length size, that will be use to parse all other nals 01056 h->nal_length_size = (avctx->extradata[4] & 0x03) + 1; 01057 } else { 01058 h->is_avc = 0; 01059 if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0) 01060 return -1; 01061 } 01062 return 0; 01063 } 01064 01065 av_cold int ff_h264_decode_init(AVCodecContext *avctx){ 01066 H264Context *h= avctx->priv_data; 01067 MpegEncContext * const s = &h->s; 01068 01069 MPV_decode_defaults(s); 01070 01071 s->avctx = avctx; 01072 common_init(h); 01073 01074 s->out_format = FMT_H264; 01075 s->workaround_bugs= avctx->workaround_bugs; 01076 01077 // set defaults 01078 // s->decode_mb= ff_h263_decode_mb; 01079 s->quarter_sample = 1; 01080 if(!avctx->has_b_frames) 01081 s->low_delay= 1; 01082 01083 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; 01084 01085 ff_h264_decode_init_vlc(); 01086 01087 h->pixel_shift = 0; 01088 h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; 01089 01090 h->thread_context[0] = h; 01091 h->outputed_poc = h->next_outputed_poc = INT_MIN; 01092 h->prev_poc_msb= 1<<16; 01093 h->x264_build = -1; 01094 ff_h264_reset_sei(h); 01095 if(avctx->codec_id == CODEC_ID_H264){ 01096 if(avctx->ticks_per_frame == 1){ 01097 s->avctx->time_base.den *=2; 01098 } 01099 avctx->ticks_per_frame = 2; 01100 } 01101 01102 if(avctx->extradata_size > 0 && avctx->extradata && 01103 ff_h264_decode_extradata(h)) 01104 return -1; 01105 01106 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 01107 s->avctx->has_b_frames = h->sps.num_reorder_frames; 01108 s->low_delay = 0; 01109 } 01110 01111 return 0; 01112 } 01113 01114 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size)))) 01115 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) 01116 { 01117 int i; 01118 01119 for (i=0; i<count; i++){ 01120 assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) || 01121 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) || 01122 !from[i])); 01123 to[i] = REBASE_PICTURE(from[i], new_base, old_base); 01124 } 01125 } 01126 01127 static void copy_parameter_set(void **to, void **from, int count, int size) 01128 { 01129 int i; 01130 01131 for (i=0; i<count; i++){ 01132 if (to[i] && !from[i]) av_freep(&to[i]); 01133 else if (from[i] && !to[i]) to[i] = av_malloc(size); 01134 01135 if (from[i]) memcpy(to[i], from[i], size); 01136 } 01137 } 01138 01139 static int decode_init_thread_copy(AVCodecContext *avctx){ 01140 H264Context *h= avctx->priv_data; 01141 01142 if (!avctx->is_copy) return 0; 01143 memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); 01144 memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); 01145 01146 return 0; 01147 } 01148 01149 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field) 01150 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){ 01151 H264Context *h= dst->priv_data, *h1= src->priv_data; 01152 MpegEncContext * const s = &h->s, * const s1 = &h1->s; 01153 int inited = s->context_initialized, err; 01154 int i; 01155 01156 if(dst == src || !s1->context_initialized) return 0; 01157 01158 err = ff_mpeg_update_thread_context(dst, src); 01159 if(err) return err; 01160 01161 //FIXME handle width/height changing 01162 if(!inited){ 01163 for(i = 0; i < MAX_SPS_COUNT; i++) 01164 av_freep(h->sps_buffers + i); 01165 01166 for(i = 0; i < MAX_PPS_COUNT; i++) 01167 av_freep(h->pps_buffers + i); 01168 01169 memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc 01170 memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); 01171 memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); 01172 if (ff_h264_alloc_tables(h) < 0) { 01173 av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n"); 01174 return AVERROR(ENOMEM); 01175 } 01176 context_init(h); 01177 01178 for(i=0; i<2; i++){ 01179 h->rbsp_buffer[i] = NULL; 01180 h->rbsp_buffer_size[i] = 0; 01181 } 01182 01183 h->thread_context[0] = h; 01184 01185 // frame_start may not be called for the next thread (if it's decoding a bottom field) 01186 // so this has to be allocated here 01187 h->s.obmc_scratchpad = av_malloc(16*6*s->linesize); 01188 01189 s->dsp.clear_blocks(h->mb); 01190 s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift)); 01191 } 01192 01193 //extradata/NAL handling 01194 h->is_avc = h1->is_avc; 01195 01196 //SPS/PPS 01197 copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS)); 01198 h->sps = h1->sps; 01199 copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS)); 01200 h->pps = h1->pps; 01201 01202 //Dequantization matrices 01203 //FIXME these are big - can they be only copied when PPS changes? 01204 copy_fields(h, h1, dequant4_buffer, dequant4_coeff); 01205 01206 for(i=0; i<6; i++) 01207 h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]); 01208 01209 for(i=0; i<6; i++) 01210 h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]); 01211 01212 h->dequant_coeff_pps = h1->dequant_coeff_pps; 01213 01214 //POC timing 01215 copy_fields(h, h1, poc_lsb, redundant_pic_count); 01216 01217 //reference lists 01218 copy_fields(h, h1, ref_count, list_count); 01219 copy_fields(h, h1, ref_list, intra_gb); 01220 copy_fields(h, h1, short_ref, cabac_init_idc); 01221 01222 copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1); 01223 copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1); 01224 copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1); 01225 01226 h->last_slice_type = h1->last_slice_type; 01227 01228 if(!s->current_picture_ptr) return 0; 01229 01230 if(!s->dropable) { 01231 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 01232 h->prev_poc_msb = h->poc_msb; 01233 h->prev_poc_lsb = h->poc_lsb; 01234 } 01235 h->prev_frame_num_offset= h->frame_num_offset; 01236 h->prev_frame_num = h->frame_num; 01237 h->outputed_poc = h->next_outputed_poc; 01238 01239 return 0; 01240 } 01241 01242 int ff_h264_frame_start(H264Context *h){ 01243 MpegEncContext * const s = &h->s; 01244 int i; 01245 const int pixel_shift = h->pixel_shift; 01246 int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1; 01247 01248 if(MPV_frame_start(s, s->avctx) < 0) 01249 return -1; 01250 ff_er_frame_start(s); 01251 /* 01252 * MPV_frame_start uses pict_type to derive key_frame. 01253 * This is incorrect for H.264; IDR markings must be used. 01254 * Zero here; IDR markings per slice in frame or fields are ORed in later. 01255 * See decode_nal_units(). 01256 */ 01257 s->current_picture_ptr->key_frame= 0; 01258 s->current_picture_ptr->mmco_reset= 0; 01259 01260 assert(s->linesize && s->uvlinesize); 01261 01262 for(i=0; i<16; i++){ 01263 h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3); 01264 h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3); 01265 } 01266 for(i=0; i<16; i++){ 01267 h->block_offset[16+i]= 01268 h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); 01269 h->block_offset[48+16+i]= 01270 h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); 01271 } 01272 01273 /* can't be in alloc_tables because linesize isn't known there. 01274 * FIXME: redo bipred weight to not require extra buffer? */ 01275 for(i = 0; i < thread_count; i++) 01276 if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad) 01277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize); 01278 01279 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ 01280 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); 01281 01282 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; 01283 01284 // We mark the current picture as non-reference after allocating it, so 01285 // that if we break out due to an error it can be released automatically 01286 // in the next MPV_frame_start(). 01287 // SVQ3 as well as most other codecs have only last/next/current and thus 01288 // get released even with set reference, besides SVQ3 and others do not 01289 // mark frames as reference later "naturally". 01290 if(s->codec_id != CODEC_ID_SVQ3) 01291 s->current_picture_ptr->reference= 0; 01292 01293 s->current_picture_ptr->field_poc[0]= 01294 s->current_picture_ptr->field_poc[1]= INT_MAX; 01295 01296 h->next_output_pic = NULL; 01297 01298 assert(s->current_picture_ptr->long_ref==0); 01299 01300 return 0; 01301 } 01302 01311 static void decode_postinit(H264Context *h, int setup_finished){ 01312 MpegEncContext * const s = &h->s; 01313 Picture *out = s->current_picture_ptr; 01314 Picture *cur = s->current_picture_ptr; 01315 int i, pics, out_of_order, out_idx; 01316 01317 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; 01318 s->current_picture_ptr->pict_type= s->pict_type; 01319 01320 if (h->next_output_pic) return; 01321 01322 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { 01323 //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here. 01324 //If we have one field per packet, we can. The check in decode_nal_units() is not good enough 01325 //to find this yet, so we assume the worst for now. 01326 //if (setup_finished) 01327 // ff_thread_finish_setup(s->avctx); 01328 return; 01329 } 01330 01331 cur->interlaced_frame = 0; 01332 cur->repeat_pict = 0; 01333 01334 /* Signal interlacing information externally. */ 01335 /* Prioritize picture timing SEI information over used decoding process if it exists. */ 01336 01337 if(h->sps.pic_struct_present_flag){ 01338 switch (h->sei_pic_struct) 01339 { 01340 case SEI_PIC_STRUCT_FRAME: 01341 break; 01342 case SEI_PIC_STRUCT_TOP_FIELD: 01343 case SEI_PIC_STRUCT_BOTTOM_FIELD: 01344 cur->interlaced_frame = 1; 01345 break; 01346 case SEI_PIC_STRUCT_TOP_BOTTOM: 01347 case SEI_PIC_STRUCT_BOTTOM_TOP: 01348 if (FIELD_OR_MBAFF_PICTURE) 01349 cur->interlaced_frame = 1; 01350 else 01351 // try to flag soft telecine progressive 01352 cur->interlaced_frame = h->prev_interlaced_frame; 01353 break; 01354 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: 01355 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: 01356 // Signal the possibility of telecined film externally (pic_struct 5,6) 01357 // From these hints, let the applications decide if they apply deinterlacing. 01358 cur->repeat_pict = 1; 01359 break; 01360 case SEI_PIC_STRUCT_FRAME_DOUBLING: 01361 // Force progressive here, as doubling interlaced frame is a bad idea. 01362 cur->repeat_pict = 2; 01363 break; 01364 case SEI_PIC_STRUCT_FRAME_TRIPLING: 01365 cur->repeat_pict = 4; 01366 break; 01367 } 01368 01369 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) 01370 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0; 01371 }else{ 01372 /* Derive interlacing flag from used decoding process. */ 01373 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; 01374 } 01375 h->prev_interlaced_frame = cur->interlaced_frame; 01376 01377 if (cur->field_poc[0] != cur->field_poc[1]){ 01378 /* Derive top_field_first from field pocs. */ 01379 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; 01380 }else{ 01381 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ 01382 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ 01383 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM 01384 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) 01385 cur->top_field_first = 1; 01386 else 01387 cur->top_field_first = 0; 01388 }else{ 01389 /* Most likely progressive */ 01390 cur->top_field_first = 0; 01391 } 01392 } 01393 01394 //FIXME do something with unavailable reference frames 01395 01396 /* Sort B-frames into display order */ 01397 01398 if(h->sps.bitstream_restriction_flag 01399 && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 01400 s->avctx->has_b_frames = h->sps.num_reorder_frames; 01401 s->low_delay = 0; 01402 } 01403 01404 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT 01405 && !h->sps.bitstream_restriction_flag){ 01406 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; 01407 s->low_delay= 0; 01408 } 01409 01410 pics = 0; 01411 while(h->delayed_pic[pics]) pics++; 01412 01413 assert(pics <= MAX_DELAYED_PIC_COUNT); 01414 01415 h->delayed_pic[pics++] = cur; 01416 if(cur->reference == 0) 01417 cur->reference = DELAYED_PIC_REF; 01418 01419 out = h->delayed_pic[0]; 01420 out_idx = 0; 01421 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 01422 if(h->delayed_pic[i]->poc < out->poc){ 01423 out = h->delayed_pic[i]; 01424 out_idx = i; 01425 } 01426 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) 01427 h->next_outputed_poc= INT_MIN; 01428 out_of_order = out->poc < h->next_outputed_poc; 01429 01430 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) 01431 { } 01432 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) 01433 || (s->low_delay && 01434 ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) 01435 || cur->pict_type == AV_PICTURE_TYPE_B))) 01436 { 01437 s->low_delay = 0; 01438 s->avctx->has_b_frames++; 01439 } 01440 01441 if(out_of_order || pics > s->avctx->has_b_frames){ 01442 out->reference &= ~DELAYED_PIC_REF; 01443 out->owner2 = s; // for frame threading, the owner must be the second field's thread 01444 // or else the first thread can release the picture and reuse it unsafely 01445 for(i=out_idx; h->delayed_pic[i]; i++) 01446 h->delayed_pic[i] = h->delayed_pic[i+1]; 01447 } 01448 if(!out_of_order && pics > s->avctx->has_b_frames){ 01449 h->next_output_pic = out; 01450 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) { 01451 h->next_outputed_poc = INT_MIN; 01452 } else 01453 h->next_outputed_poc = out->poc; 01454 }else{ 01455 av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); 01456 } 01457 01458 if (setup_finished) 01459 ff_thread_finish_setup(s->avctx); 01460 } 01461 01462 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){ 01463 MpegEncContext * const s = &h->s; 01464 uint8_t *top_border; 01465 int top_idx = 1; 01466 const int pixel_shift = h->pixel_shift; 01467 01468 src_y -= linesize; 01469 src_cb -= uvlinesize; 01470 src_cr -= uvlinesize; 01471 01472 if(!simple && FRAME_MBAFF){ 01473 if(s->mb_y&1){ 01474 if(!MB_MBAFF){ 01475 top_border = h->top_borders[0][s->mb_x]; 01476 AV_COPY128(top_border, src_y + 15*linesize); 01477 if (pixel_shift) 01478 AV_COPY128(top_border+16, src_y+15*linesize+16); 01479 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01480 if(chroma444){ 01481 if (pixel_shift){ 01482 AV_COPY128(top_border+32, src_cb + 15*uvlinesize); 01483 AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16); 01484 AV_COPY128(top_border+64, src_cr + 15*uvlinesize); 01485 AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16); 01486 } else { 01487 AV_COPY128(top_border+16, src_cb + 15*uvlinesize); 01488 AV_COPY128(top_border+32, src_cr + 15*uvlinesize); 01489 } 01490 } else { 01491 if (pixel_shift) { 01492 AV_COPY128(top_border+32, src_cb+7*uvlinesize); 01493 AV_COPY128(top_border+48, src_cr+7*uvlinesize); 01494 } else { 01495 AV_COPY64(top_border+16, src_cb+7*uvlinesize); 01496 AV_COPY64(top_border+24, src_cr+7*uvlinesize); 01497 } 01498 } 01499 } 01500 } 01501 }else if(MB_MBAFF){ 01502 top_idx = 0; 01503 }else 01504 return; 01505 } 01506 01507 top_border = h->top_borders[top_idx][s->mb_x]; 01508 // There are two lines saved, the line above the the top macroblock of a pair, 01509 // and the line above the bottom macroblock 01510 AV_COPY128(top_border, src_y + 16*linesize); 01511 if (pixel_shift) 01512 AV_COPY128(top_border+16, src_y+16*linesize+16); 01513 01514 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01515 if(chroma444){ 01516 if (pixel_shift){ 01517 AV_COPY128(top_border+32, src_cb + 16*linesize); 01518 AV_COPY128(top_border+48, src_cb + 16*linesize+16); 01519 AV_COPY128(top_border+64, src_cr + 16*linesize); 01520 AV_COPY128(top_border+80, src_cr + 16*linesize+16); 01521 } else { 01522 AV_COPY128(top_border+16, src_cb + 16*linesize); 01523 AV_COPY128(top_border+32, src_cr + 16*linesize); 01524 } 01525 } else { 01526 if (pixel_shift) { 01527 AV_COPY128(top_border+32, src_cb+8*uvlinesize); 01528 AV_COPY128(top_border+48, src_cr+8*uvlinesize); 01529 } else { 01530 AV_COPY64(top_border+16, src_cb+8*uvlinesize); 01531 AV_COPY64(top_border+24, src_cr+8*uvlinesize); 01532 } 01533 } 01534 } 01535 } 01536 01537 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, 01538 uint8_t *src_cb, uint8_t *src_cr, 01539 int linesize, int uvlinesize, 01540 int xchg, int chroma444, 01541 int simple, int pixel_shift){ 01542 MpegEncContext * const s = &h->s; 01543 int deblock_topleft; 01544 int deblock_top; 01545 int top_idx = 1; 01546 uint8_t *top_border_m1; 01547 uint8_t *top_border; 01548 01549 if(!simple && FRAME_MBAFF){ 01550 if(s->mb_y&1){ 01551 if(!MB_MBAFF) 01552 return; 01553 }else{ 01554 top_idx = MB_MBAFF ? 0 : 1; 01555 } 01556 } 01557 01558 if(h->deblocking_filter == 2) { 01559 deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num; 01560 deblock_top = h->top_type; 01561 } else { 01562 deblock_topleft = (s->mb_x > 0); 01563 deblock_top = (s->mb_y > !!MB_FIELD); 01564 } 01565 01566 src_y -= linesize + 1 + pixel_shift; 01567 src_cb -= uvlinesize + 1 + pixel_shift; 01568 src_cr -= uvlinesize + 1 + pixel_shift; 01569 01570 top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; 01571 top_border = h->top_borders[top_idx][s->mb_x]; 01572 01573 #define XCHG(a,b,xchg)\ 01574 if (pixel_shift) {\ 01575 if (xchg) {\ 01576 AV_SWAP64(b+0,a+0);\ 01577 AV_SWAP64(b+8,a+8);\ 01578 } else {\ 01579 AV_COPY128(b,a); \ 01580 }\ 01581 } else \ 01582 if (xchg) AV_SWAP64(b,a);\ 01583 else AV_COPY64(b,a); 01584 01585 if(deblock_top){ 01586 if(deblock_topleft){ 01587 XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1); 01588 } 01589 XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); 01590 XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); 01591 if(s->mb_x+1 < s->mb_width){ 01592 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1); 01593 } 01594 } 01595 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01596 if(chroma444){ 01597 if(deblock_topleft){ 01598 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); 01599 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); 01600 } 01601 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); 01602 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); 01603 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); 01604 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); 01605 if(s->mb_x+1 < s->mb_width){ 01606 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); 01607 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); 01608 } 01609 } else { 01610 if(deblock_top){ 01611 if(deblock_topleft){ 01612 XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); 01613 XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); 01614 } 01615 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1); 01616 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1); 01617 } 01618 } 01619 } 01620 } 01621 01622 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) { 01623 if (high_bit_depth) { 01624 return AV_RN32A(((int32_t*)mb) + index); 01625 } else 01626 return AV_RN16A(mb + index); 01627 } 01628 01629 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) { 01630 if (high_bit_depth) { 01631 AV_WN32A(((int32_t*)mb) + index, value); 01632 } else 01633 AV_WN16A(mb + index, value); 01634 } 01635 01636 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, 01637 int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) 01638 { 01639 MpegEncContext * const s = &h->s; 01640 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01641 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); 01642 int i; 01643 int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1]; 01644 block_offset += 16*p; 01645 if(IS_INTRA4x4(mb_type)){ 01646 if(simple || !s->encoding){ 01647 if(IS_8x8DCT(mb_type)){ 01648 if(transform_bypass){ 01649 idct_dc_add = 01650 idct_add = s->dsp.add_pixels8; 01651 }else{ 01652 idct_dc_add = h->h264dsp.h264_idct8_dc_add; 01653 idct_add = h->h264dsp.h264_idct8_add; 01654 } 01655 for(i=0; i<16; i+=4){ 01656 uint8_t * const ptr= dest_y + block_offset[i]; 01657 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 01658 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 01659 h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01660 }else{ 01661 const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; 01662 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, 01663 (h->topright_samples_available<<i)&0x4000, linesize); 01664 if(nnz){ 01665 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01666 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01667 else 01668 idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01669 } 01670 } 01671 } 01672 }else{ 01673 if(transform_bypass){ 01674 idct_dc_add = 01675 idct_add = s->dsp.add_pixels4; 01676 }else{ 01677 idct_dc_add = h->h264dsp.h264_idct_dc_add; 01678 idct_add = h->h264dsp.h264_idct_add; 01679 } 01680 for(i=0; i<16; i++){ 01681 uint8_t * const ptr= dest_y + block_offset[i]; 01682 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 01683 01684 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 01685 h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01686 }else{ 01687 uint8_t *topright; 01688 int nnz, tr; 01689 uint64_t tr_high; 01690 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ 01691 const int topright_avail= (h->topright_samples_available<<i)&0x8000; 01692 assert(s->mb_y || linesize <= block_offset[i]); 01693 if(!topright_avail){ 01694 if (pixel_shift) { 01695 tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; 01696 topright= (uint8_t*) &tr_high; 01697 } else { 01698 tr= ptr[3 - linesize]*0x01010101; 01699 topright= (uint8_t*) &tr; 01700 } 01701 }else 01702 topright= ptr + (4 << pixel_shift) - linesize; 01703 }else 01704 topright= NULL; 01705 01706 h->hpc.pred4x4[ dir ](ptr, topright, linesize); 01707 nnz = h->non_zero_count_cache[ scan8[i+p*16] ]; 01708 if(nnz){ 01709 if(is_h264){ 01710 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01711 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01712 else 01713 idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize); 01714 }else 01715 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0); 01716 } 01717 } 01718 } 01719 } 01720 } 01721 }else{ 01722 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); 01723 if(is_h264){ 01724 if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){ 01725 if(!transform_bypass) 01726 h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]); 01727 else{ 01728 static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, 01729 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; 01730 for(i = 0; i < 16; i++) 01731 dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i)); 01732 } 01733 } 01734 }else 01735 ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale); 01736 } 01737 } 01738 01739 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass, 01740 int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p) 01741 { 01742 MpegEncContext * const s = &h->s; 01743 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01744 int i; 01745 block_offset += 16*p; 01746 if(!IS_INTRA4x4(mb_type)){ 01747 if(is_h264){ 01748 if(IS_INTRA16x16(mb_type)){ 01749 if(transform_bypass){ 01750 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ 01751 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize); 01752 }else{ 01753 for(i=0; i<16; i++){ 01754 if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256)) 01755 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); 01756 } 01757 } 01758 }else{ 01759 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01760 } 01761 }else if(h->cbp&15){ 01762 if(transform_bypass){ 01763 const int di = IS_8x8DCT(mb_type) ? 4 : 1; 01764 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; 01765 for(i=0; i<16; i+=di){ 01766 if(h->non_zero_count_cache[ scan8[i+p*16] ]){ 01767 idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize); 01768 } 01769 } 01770 }else{ 01771 if(IS_8x8DCT(mb_type)){ 01772 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01773 }else{ 01774 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8); 01775 } 01776 } 01777 } 01778 }else{ 01779 for(i=0; i<16; i++){ 01780 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below 01781 uint8_t * const ptr= dest_y + block_offset[i]; 01782 ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); 01783 } 01784 } 01785 } 01786 } 01787 } 01788 01789 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ 01790 MpegEncContext * const s = &h->s; 01791 const int mb_x= s->mb_x; 01792 const int mb_y= s->mb_y; 01793 const int mb_xy= h->mb_xy; 01794 const int mb_type= s->current_picture.mb_type[mb_xy]; 01795 uint8_t *dest_y, *dest_cb, *dest_cr; 01796 int linesize, uvlinesize /*dct_offset*/; 01797 int i, j; 01798 int *block_offset = &h->block_offset[0]; 01799 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 01800 /* is_h264 should always be true if SVQ3 is disabled. */ 01801 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; 01802 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 01803 01804 dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; 01805 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; 01806 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; 01807 01808 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); 01809 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); 01810 01811 h->list_counts[mb_xy]= h->list_count; 01812 01813 if (!simple && MB_FIELD) { 01814 linesize = h->mb_linesize = s->linesize * 2; 01815 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 01816 block_offset = &h->block_offset[48]; 01817 if(mb_y&1){ //FIXME move out of this function? 01818 dest_y -= s->linesize*15; 01819 dest_cb-= s->uvlinesize*7; 01820 dest_cr-= s->uvlinesize*7; 01821 } 01822 if(FRAME_MBAFF) { 01823 int list; 01824 for(list=0; list<h->list_count; list++){ 01825 if(!USES_LIST(mb_type, list)) 01826 continue; 01827 if(IS_16X16(mb_type)){ 01828 int8_t *ref = &h->ref_cache[list][scan8[0]]; 01829 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 01830 }else{ 01831 for(i=0; i<16; i+=4){ 01832 int ref = h->ref_cache[list][scan8[i]]; 01833 if(ref >= 0) 01834 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 01835 } 01836 } 01837 } 01838 } 01839 } else { 01840 linesize = h->mb_linesize = s->linesize; 01841 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 01842 // dct_offset = s->linesize * 16; 01843 } 01844 01845 if (!simple && IS_INTRA_PCM(mb_type)) { 01846 if (pixel_shift) { 01847 const int bit_depth = h->sps.bit_depth_luma; 01848 int j; 01849 GetBitContext gb; 01850 init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth); 01851 01852 for (i = 0; i < 16; i++) { 01853 uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize); 01854 for (j = 0; j < 16; j++) 01855 tmp_y[j] = get_bits(&gb, bit_depth); 01856 } 01857 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01858 if (!h->sps.chroma_format_idc) { 01859 for (i = 0; i < 8; i++) { 01860 uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); 01861 for (j = 0; j < 8; j++) { 01862 tmp_cb[j] = 1 << (bit_depth - 1); 01863 } 01864 } 01865 for (i = 0; i < 8; i++) { 01866 uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); 01867 for (j = 0; j < 8; j++) { 01868 tmp_cr[j] = 1 << (bit_depth - 1); 01869 } 01870 } 01871 } else { 01872 for (i = 0; i < 8; i++) { 01873 uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); 01874 for (j = 0; j < 8; j++) 01875 tmp_cb[j] = get_bits(&gb, bit_depth); 01876 } 01877 for (i = 0; i < 8; i++) { 01878 uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); 01879 for (j = 0; j < 8; j++) 01880 tmp_cr[j] = get_bits(&gb, bit_depth); 01881 } 01882 } 01883 } 01884 } else { 01885 for (i=0; i<16; i++) { 01886 memcpy(dest_y + i* linesize, h->mb + i*8, 16); 01887 } 01888 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01889 if (!h->sps.chroma_format_idc) { 01890 for (i = 0; i < 8; i++) { 01891 memset(dest_cb + i*uvlinesize, 128, 8); 01892 memset(dest_cr + i*uvlinesize, 128, 8); 01893 } 01894 } else { 01895 for (i = 0; i < 8; i++) { 01896 memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); 01897 memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); 01898 } 01899 } 01900 } 01901 } 01902 } else { 01903 if(IS_INTRA(mb_type)){ 01904 if(h->deblocking_filter) 01905 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift); 01906 01907 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 01908 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); 01909 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); 01910 } 01911 01912 hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); 01913 01914 if(h->deblocking_filter) 01915 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); 01916 }else if(is_h264){ 01917 if (pixel_shift) { 01918 hl_motion_16(h, dest_y, dest_cb, dest_cr, 01919 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 01920 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 01921 h->h264dsp.weight_h264_pixels_tab, 01922 h->h264dsp.biweight_h264_pixels_tab, 0); 01923 } else 01924 hl_motion_8(h, dest_y, dest_cb, dest_cr, 01925 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 01926 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 01927 h->h264dsp.weight_h264_pixels_tab, 01928 h->h264dsp.biweight_h264_pixels_tab, 0); 01929 } 01930 01931 hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); 01932 01933 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ 01934 uint8_t *dest[2] = {dest_cb, dest_cr}; 01935 if(transform_bypass){ 01936 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ 01937 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize); 01938 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize); 01939 }else{ 01940 idct_add = s->dsp.add_pixels4; 01941 for(j=1; j<3; j++){ 01942 for(i=j*16; i<j*16+4; i++){ 01943 if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) 01944 idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); 01945 } 01946 } 01947 } 01948 }else{ 01949 if(is_h264){ 01950 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) 01951 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 01952 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) 01953 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 01954 h->h264dsp.h264_idct_add8(dest, block_offset, 01955 h->mb, uvlinesize, 01956 h->non_zero_count_cache); 01957 }else{ 01958 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 01959 h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 01960 for(j=1; j<3; j++){ 01961 for(i=j*16; i<j*16+4; i++){ 01962 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ 01963 uint8_t * const ptr= dest[j-1] + block_offset[i]; 01964 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2); 01965 } 01966 } 01967 } 01968 } 01969 } 01970 } 01971 } 01972 if(h->cbp || IS_INTRA(mb_type)) 01973 { 01974 s->dsp.clear_blocks(h->mb); 01975 s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); 01976 } 01977 } 01978 01979 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){ 01980 MpegEncContext * const s = &h->s; 01981 const int mb_x= s->mb_x; 01982 const int mb_y= s->mb_y; 01983 const int mb_xy= h->mb_xy; 01984 const int mb_type= s->current_picture.mb_type[mb_xy]; 01985 uint8_t *dest[3]; 01986 int linesize; 01987 int i, j, p; 01988 int *block_offset = &h->block_offset[0]; 01989 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 01990 const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1; 01991 01992 for (p = 0; p < plane_count; p++) 01993 { 01994 dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16; 01995 s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); 01996 } 01997 01998 h->list_counts[mb_xy]= h->list_count; 01999 02000 if (!simple && MB_FIELD) { 02001 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2; 02002 block_offset = &h->block_offset[48]; 02003 if(mb_y&1) //FIXME move out of this function? 02004 for (p = 0; p < 3; p++) 02005 dest[p] -= s->linesize*15; 02006 if(FRAME_MBAFF) { 02007 int list; 02008 for(list=0; list<h->list_count; list++){ 02009 if(!USES_LIST(mb_type, list)) 02010 continue; 02011 if(IS_16X16(mb_type)){ 02012 int8_t *ref = &h->ref_cache[list][scan8[0]]; 02013 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 02014 }else{ 02015 for(i=0; i<16; i+=4){ 02016 int ref = h->ref_cache[list][scan8[i]]; 02017 if(ref >= 0) 02018 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 02019 } 02020 } 02021 } 02022 } 02023 } else { 02024 linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize; 02025 } 02026 02027 if (!simple && IS_INTRA_PCM(mb_type)) { 02028 if (pixel_shift) { 02029 const int bit_depth = h->sps.bit_depth_luma; 02030 GetBitContext gb; 02031 init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth); 02032 02033 for (p = 0; p < plane_count; p++) { 02034 for (i = 0; i < 16; i++) { 02035 uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize); 02036 for (j = 0; j < 16; j++) 02037 tmp[j] = get_bits(&gb, bit_depth); 02038 } 02039 } 02040 } else { 02041 for (p = 0; p < plane_count; p++) { 02042 for (i = 0; i < 16; i++) { 02043 memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16); 02044 } 02045 } 02046 } 02047 } else { 02048 if(IS_INTRA(mb_type)){ 02049 if(h->deblocking_filter) 02050 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift); 02051 02052 for (p = 0; p < plane_count; p++) 02053 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); 02054 02055 if(h->deblocking_filter) 02056 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift); 02057 }else{ 02058 if (pixel_shift) { 02059 hl_motion_16(h, dest[0], dest[1], dest[2], 02060 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 02061 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 02062 h->h264dsp.weight_h264_pixels_tab, 02063 h->h264dsp.biweight_h264_pixels_tab, 1); 02064 } else 02065 hl_motion_8(h, dest[0], dest[1], dest[2], 02066 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 02067 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 02068 h->h264dsp.weight_h264_pixels_tab, 02069 h->h264dsp.biweight_h264_pixels_tab, 1); 02070 } 02071 02072 for (p = 0; p < plane_count; p++) 02073 hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p); 02074 } 02075 if(h->cbp || IS_INTRA(mb_type)) 02076 { 02077 s->dsp.clear_blocks(h->mb); 02078 s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift)); 02079 } 02080 } 02081 02085 #define hl_decode_mb_simple(sh, bits) \ 02086 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \ 02087 hl_decode_mb_internal(h, 1, sh); \ 02088 } 02089 hl_decode_mb_simple(0, 8); 02090 hl_decode_mb_simple(1, 16); 02091 02095 static void av_noinline hl_decode_mb_complex(H264Context *h){ 02096 hl_decode_mb_internal(h, 0, h->pixel_shift); 02097 } 02098 02099 static void av_noinline hl_decode_mb_444_complex(H264Context *h){ 02100 hl_decode_mb_444_internal(h, 0, h->pixel_shift); 02101 } 02102 02103 static void av_noinline hl_decode_mb_444_simple(H264Context *h){ 02104 hl_decode_mb_444_internal(h, 1, 0); 02105 } 02106 02107 void ff_h264_hl_decode_mb(H264Context *h){ 02108 MpegEncContext * const s = &h->s; 02109 const int mb_xy= h->mb_xy; 02110 const int mb_type= s->current_picture.mb_type[mb_xy]; 02111 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; 02112 02113 if (CHROMA444) { 02114 if(is_complex || h->pixel_shift) 02115 hl_decode_mb_444_complex(h); 02116 else 02117 hl_decode_mb_444_simple(h); 02118 } else if (is_complex) { 02119 hl_decode_mb_complex(h); 02120 } else if (h->pixel_shift) { 02121 hl_decode_mb_simple_16(h); 02122 } else 02123 hl_decode_mb_simple_8(h); 02124 } 02125 02126 static int pred_weight_table(H264Context *h){ 02127 MpegEncContext * const s = &h->s; 02128 int list, i; 02129 int luma_def, chroma_def; 02130 02131 h->use_weight= 0; 02132 h->use_weight_chroma= 0; 02133 h->luma_log2_weight_denom= get_ue_golomb(&s->gb); 02134 if(h->sps.chroma_format_idc) 02135 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); 02136 luma_def = 1<<h->luma_log2_weight_denom; 02137 chroma_def = 1<<h->chroma_log2_weight_denom; 02138 02139 for(list=0; list<2; list++){ 02140 h->luma_weight_flag[list] = 0; 02141 h->chroma_weight_flag[list] = 0; 02142 for(i=0; i<h->ref_count[list]; i++){ 02143 int luma_weight_flag, chroma_weight_flag; 02144 02145 luma_weight_flag= get_bits1(&s->gb); 02146 if(luma_weight_flag){ 02147 h->luma_weight[i][list][0]= get_se_golomb(&s->gb); 02148 h->luma_weight[i][list][1]= get_se_golomb(&s->gb); 02149 if( h->luma_weight[i][list][0] != luma_def 02150 || h->luma_weight[i][list][1] != 0) { 02151 h->use_weight= 1; 02152 h->luma_weight_flag[list]= 1; 02153 } 02154 }else{ 02155 h->luma_weight[i][list][0]= luma_def; 02156 h->luma_weight[i][list][1]= 0; 02157 } 02158 02159 if(h->sps.chroma_format_idc){ 02160 chroma_weight_flag= get_bits1(&s->gb); 02161 if(chroma_weight_flag){ 02162 int j; 02163 for(j=0; j<2; j++){ 02164 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb); 02165 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb); 02166 if( h->chroma_weight[i][list][j][0] != chroma_def 02167 || h->chroma_weight[i][list][j][1] != 0) { 02168 h->use_weight_chroma= 1; 02169 h->chroma_weight_flag[list]= 1; 02170 } 02171 } 02172 }else{ 02173 int j; 02174 for(j=0; j<2; j++){ 02175 h->chroma_weight[i][list][j][0]= chroma_def; 02176 h->chroma_weight[i][list][j][1]= 0; 02177 } 02178 } 02179 } 02180 } 02181 if(h->slice_type_nos != AV_PICTURE_TYPE_B) break; 02182 } 02183 h->use_weight= h->use_weight || h->use_weight_chroma; 02184 return 0; 02185 } 02186 02192 static void implicit_weight_table(H264Context *h, int field){ 02193 MpegEncContext * const s = &h->s; 02194 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; 02195 02196 for (i = 0; i < 2; i++) { 02197 h->luma_weight_flag[i] = 0; 02198 h->chroma_weight_flag[i] = 0; 02199 } 02200 02201 if(field < 0){ 02202 if (s->picture_structure == PICT_FRAME) { 02203 cur_poc = s->current_picture_ptr->poc; 02204 } else { 02205 cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1]; 02206 } 02207 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF 02208 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ 02209 h->use_weight= 0; 02210 h->use_weight_chroma= 0; 02211 return; 02212 } 02213 ref_start= 0; 02214 ref_count0= h->ref_count[0]; 02215 ref_count1= h->ref_count[1]; 02216 }else{ 02217 cur_poc = s->current_picture_ptr->field_poc[field]; 02218 ref_start= 16; 02219 ref_count0= 16+2*h->ref_count[0]; 02220 ref_count1= 16+2*h->ref_count[1]; 02221 } 02222 02223 h->use_weight= 2; 02224 h->use_weight_chroma= 2; 02225 h->luma_log2_weight_denom= 5; 02226 h->chroma_log2_weight_denom= 5; 02227 02228 for(ref0=ref_start; ref0 < ref_count0; ref0++){ 02229 int poc0 = h->ref_list[0][ref0].poc; 02230 for(ref1=ref_start; ref1 < ref_count1; ref1++){ 02231 int w = 32; 02232 if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) { 02233 int poc1 = h->ref_list[1][ref1].poc; 02234 int td = av_clip(poc1 - poc0, -128, 127); 02235 if(td){ 02236 int tb = av_clip(cur_poc - poc0, -128, 127); 02237 int tx = (16384 + (FFABS(td) >> 1)) / td; 02238 int dist_scale_factor = (tb*tx + 32) >> 8; 02239 if(dist_scale_factor >= -64 && dist_scale_factor <= 128) 02240 w = 64 - dist_scale_factor; 02241 } 02242 } 02243 if(field<0){ 02244 h->implicit_weight[ref0][ref1][0]= 02245 h->implicit_weight[ref0][ref1][1]= w; 02246 }else{ 02247 h->implicit_weight[ref0][ref1][field]=w; 02248 } 02249 } 02250 } 02251 } 02252 02256 static void idr(H264Context *h){ 02257 ff_h264_remove_all_refs(h); 02258 h->prev_frame_num= 0; 02259 h->prev_frame_num_offset= 0; 02260 h->prev_poc_msb= 02261 h->prev_poc_lsb= 0; 02262 } 02263 02264 /* forget old pics after a seek */ 02265 static void flush_dpb(AVCodecContext *avctx){ 02266 H264Context *h= avctx->priv_data; 02267 int i; 02268 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { 02269 if(h->delayed_pic[i]) 02270 h->delayed_pic[i]->reference= 0; 02271 h->delayed_pic[i]= NULL; 02272 } 02273 h->outputed_poc=h->next_outputed_poc= INT_MIN; 02274 h->prev_interlaced_frame = 1; 02275 idr(h); 02276 if(h->s.current_picture_ptr) 02277 h->s.current_picture_ptr->reference= 0; 02278 h->s.first_field= 0; 02279 ff_h264_reset_sei(h); 02280 ff_mpeg_flush(avctx); 02281 } 02282 02283 static int init_poc(H264Context *h){ 02284 MpegEncContext * const s = &h->s; 02285 const int max_frame_num= 1<<h->sps.log2_max_frame_num; 02286 int field_poc[2]; 02287 Picture *cur = s->current_picture_ptr; 02288 02289 h->frame_num_offset= h->prev_frame_num_offset; 02290 if(h->frame_num < h->prev_frame_num) 02291 h->frame_num_offset += max_frame_num; 02292 02293 if(h->sps.poc_type==0){ 02294 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; 02295 02296 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) 02297 h->poc_msb = h->prev_poc_msb + max_poc_lsb; 02298 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) 02299 h->poc_msb = h->prev_poc_msb - max_poc_lsb; 02300 else 02301 h->poc_msb = h->prev_poc_msb; 02302 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); 02303 field_poc[0] = 02304 field_poc[1] = h->poc_msb + h->poc_lsb; 02305 if(s->picture_structure == PICT_FRAME) 02306 field_poc[1] += h->delta_poc_bottom; 02307 }else if(h->sps.poc_type==1){ 02308 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; 02309 int i; 02310 02311 if(h->sps.poc_cycle_length != 0) 02312 abs_frame_num = h->frame_num_offset + h->frame_num; 02313 else 02314 abs_frame_num = 0; 02315 02316 if(h->nal_ref_idc==0 && abs_frame_num > 0) 02317 abs_frame_num--; 02318 02319 expected_delta_per_poc_cycle = 0; 02320 for(i=0; i < h->sps.poc_cycle_length; i++) 02321 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse 02322 02323 if(abs_frame_num > 0){ 02324 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; 02325 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; 02326 02327 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; 02328 for(i = 0; i <= frame_num_in_poc_cycle; i++) 02329 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; 02330 } else 02331 expectedpoc = 0; 02332 02333 if(h->nal_ref_idc == 0) 02334 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; 02335 02336 field_poc[0] = expectedpoc + h->delta_poc[0]; 02337 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; 02338 02339 if(s->picture_structure == PICT_FRAME) 02340 field_poc[1] += h->delta_poc[1]; 02341 }else{ 02342 int poc= 2*(h->frame_num_offset + h->frame_num); 02343 02344 if(!h->nal_ref_idc) 02345 poc--; 02346 02347 field_poc[0]= poc; 02348 field_poc[1]= poc; 02349 } 02350 02351 if(s->picture_structure != PICT_BOTTOM_FIELD) 02352 s->current_picture_ptr->field_poc[0]= field_poc[0]; 02353 if(s->picture_structure != PICT_TOP_FIELD) 02354 s->current_picture_ptr->field_poc[1]= field_poc[1]; 02355 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); 02356 02357 return 0; 02358 } 02359 02360 02364 static void init_scan_tables(H264Context *h){ 02365 int i; 02366 for(i=0; i<16; i++){ 02367 #define T(x) (x>>2) | ((x<<2) & 0xF) 02368 h->zigzag_scan[i] = T(zigzag_scan[i]); 02369 h-> field_scan[i] = T( field_scan[i]); 02370 #undef T 02371 } 02372 for(i=0; i<64; i++){ 02373 #define T(x) (x>>3) | ((x&7)<<3) 02374 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); 02375 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); 02376 h->field_scan8x8[i] = T(field_scan8x8[i]); 02377 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); 02378 #undef T 02379 } 02380 if(h->sps.transform_bypass){ //FIXME same ugly 02381 h->zigzag_scan_q0 = zigzag_scan; 02382 h->zigzag_scan8x8_q0 = ff_zigzag_direct; 02383 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; 02384 h->field_scan_q0 = field_scan; 02385 h->field_scan8x8_q0 = field_scan8x8; 02386 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; 02387 }else{ 02388 h->zigzag_scan_q0 = h->zigzag_scan; 02389 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; 02390 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; 02391 h->field_scan_q0 = h->field_scan; 02392 h->field_scan8x8_q0 = h->field_scan8x8; 02393 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; 02394 } 02395 } 02396 02397 static void field_end(H264Context *h, int in_setup){ 02398 MpegEncContext * const s = &h->s; 02399 AVCodecContext * const avctx= s->avctx; 02400 s->mb_y= 0; 02401 02402 if (!in_setup && !s->dropable) 02403 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1, 02404 s->picture_structure==PICT_BOTTOM_FIELD); 02405 02406 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 02407 ff_vdpau_h264_set_reference_frames(s); 02408 02409 if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){ 02410 if(!s->dropable) { 02411 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 02412 h->prev_poc_msb= h->poc_msb; 02413 h->prev_poc_lsb= h->poc_lsb; 02414 } 02415 h->prev_frame_num_offset= h->frame_num_offset; 02416 h->prev_frame_num= h->frame_num; 02417 h->outputed_poc = h->next_outputed_poc; 02418 } 02419 02420 if (avctx->hwaccel) { 02421 if (avctx->hwaccel->end_frame(avctx) < 0) 02422 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); 02423 } 02424 02425 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 02426 ff_vdpau_h264_picture_complete(s); 02427 02428 /* 02429 * FIXME: Error handling code does not seem to support interlaced 02430 * when slices span multiple rows 02431 * The ff_er_add_slice calls don't work right for bottom 02432 * fields; they cause massive erroneous error concealing 02433 * Error marking covers both fields (top and bottom). 02434 * This causes a mismatched s->error_count 02435 * and a bad error table. Further, the error count goes to 02436 * INT_MAX when called for bottom field, because mb_y is 02437 * past end by one (callers fault) and resync_mb_y != 0 02438 * causes problems for the first MB line, too. 02439 */ 02440 if (!FIELD_PICTURE) 02441 ff_er_frame_end(s); 02442 02443 MPV_frame_end(s); 02444 02445 h->current_slice=0; 02446 } 02447 02451 static void clone_slice(H264Context *dst, H264Context *src) 02452 { 02453 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); 02454 dst->s.current_picture_ptr = src->s.current_picture_ptr; 02455 dst->s.current_picture = src->s.current_picture; 02456 dst->s.linesize = src->s.linesize; 02457 dst->s.uvlinesize = src->s.uvlinesize; 02458 dst->s.first_field = src->s.first_field; 02459 02460 dst->prev_poc_msb = src->prev_poc_msb; 02461 dst->prev_poc_lsb = src->prev_poc_lsb; 02462 dst->prev_frame_num_offset = src->prev_frame_num_offset; 02463 dst->prev_frame_num = src->prev_frame_num; 02464 dst->short_ref_count = src->short_ref_count; 02465 02466 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); 02467 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); 02468 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); 02469 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); 02470 02471 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); 02472 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); 02473 } 02474 02482 int ff_h264_get_profile(SPS *sps) 02483 { 02484 int profile = sps->profile_idc; 02485 02486 switch(sps->profile_idc) { 02487 case FF_PROFILE_H264_BASELINE: 02488 // constraint_set1_flag set to 1 02489 profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0; 02490 break; 02491 case FF_PROFILE_H264_HIGH_10: 02492 case FF_PROFILE_H264_HIGH_422: 02493 case FF_PROFILE_H264_HIGH_444_PREDICTIVE: 02494 // constraint_set3_flag set to 1 02495 profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0; 02496 break; 02497 } 02498 02499 return profile; 02500 } 02501 02511 static int decode_slice_header(H264Context *h, H264Context *h0){ 02512 MpegEncContext * const s = &h->s; 02513 MpegEncContext * const s0 = &h0->s; 02514 unsigned int first_mb_in_slice; 02515 unsigned int pps_id; 02516 int num_ref_idx_active_override_flag; 02517 unsigned int slice_type, tmp, i, j; 02518 int default_ref_list_done = 0; 02519 int last_pic_structure; 02520 02521 s->dropable= h->nal_ref_idc == 0; 02522 02523 /* FIXME: 2tap qpel isn't implemented for high bit depth. */ 02524 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){ 02525 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; 02526 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; 02527 }else{ 02528 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; 02529 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; 02530 } 02531 02532 first_mb_in_slice= get_ue_golomb(&s->gb); 02533 02534 if(first_mb_in_slice == 0){ //FIXME better field boundary detection 02535 if(h0->current_slice && FIELD_PICTURE){ 02536 field_end(h, 1); 02537 } 02538 02539 h0->current_slice = 0; 02540 if (!s0->first_field) 02541 s->current_picture_ptr= NULL; 02542 } 02543 02544 slice_type= get_ue_golomb_31(&s->gb); 02545 if(slice_type > 9){ 02546 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); 02547 return -1; 02548 } 02549 if(slice_type > 4){ 02550 slice_type -= 5; 02551 h->slice_type_fixed=1; 02552 }else 02553 h->slice_type_fixed=0; 02554 02555 slice_type= golomb_to_pict_type[ slice_type ]; 02556 if (slice_type == AV_PICTURE_TYPE_I 02557 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { 02558 default_ref_list_done = 1; 02559 } 02560 h->slice_type= slice_type; 02561 h->slice_type_nos= slice_type & 3; 02562 02563 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though 02564 02565 pps_id= get_ue_golomb(&s->gb); 02566 if(pps_id>=MAX_PPS_COUNT){ 02567 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); 02568 return -1; 02569 } 02570 if(!h0->pps_buffers[pps_id]) { 02571 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id); 02572 return -1; 02573 } 02574 h->pps= *h0->pps_buffers[pps_id]; 02575 02576 if(!h0->sps_buffers[h->pps.sps_id]) { 02577 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id); 02578 return -1; 02579 } 02580 h->sps = *h0->sps_buffers[h->pps.sps_id]; 02581 02582 s->avctx->profile = ff_h264_get_profile(&h->sps); 02583 s->avctx->level = h->sps.level_idc; 02584 s->avctx->refs = h->sps.ref_frame_count; 02585 02586 if(h == h0 && h->dequant_coeff_pps != pps_id){ 02587 h->dequant_coeff_pps = pps_id; 02588 init_dequant_tables(h); 02589 } 02590 02591 s->mb_width= h->sps.mb_width; 02592 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 02593 02594 h->b_stride= s->mb_width*4; 02595 02596 s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); 02597 if(h->sps.frame_mbs_only_flag) 02598 s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); 02599 else 02600 s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); 02601 02602 if (s->context_initialized 02603 && ( s->width != s->avctx->width || s->height != s->avctx->height 02604 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { 02605 if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) { 02606 av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0); 02607 return AVERROR_PATCHWELCOME; // width / height changed during parallelized decoding 02608 } 02609 free_tables(h, 0); 02610 flush_dpb(s->avctx); 02611 MPV_common_end(s); 02612 } 02613 if (!s->context_initialized) { 02614 if (h != h0) { 02615 av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n"); 02616 return -1; 02617 } 02618 02619 avcodec_set_dimensions(s->avctx, s->width, s->height); 02620 s->avctx->sample_aspect_ratio= h->sps.sar; 02621 av_assert0(s->avctx->sample_aspect_ratio.den); 02622 02623 h->s.avctx->coded_width = 16*s->mb_width; 02624 h->s.avctx->coded_height = 16*s->mb_height; 02625 02626 if(h->sps.video_signal_type_present_flag){ 02627 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; 02628 if(h->sps.colour_description_present_flag){ 02629 s->avctx->color_primaries = h->sps.color_primaries; 02630 s->avctx->color_trc = h->sps.color_trc; 02631 s->avctx->colorspace = h->sps.colorspace; 02632 } 02633 } 02634 02635 if(h->sps.timing_info_present_flag){ 02636 int64_t den= h->sps.time_scale; 02637 if(h->x264_build < 44U) 02638 den *= 2; 02639 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, 02640 h->sps.num_units_in_tick, den, 1<<30); 02641 } 02642 02643 switch (h->sps.bit_depth_luma) { 02644 case 9 : 02645 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; 02646 break; 02647 case 10 : 02648 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; 02649 break; 02650 default: 02651 if (CHROMA444){ 02652 s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; 02653 }else{ 02654 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, 02655 s->avctx->codec->pix_fmts ? 02656 s->avctx->codec->pix_fmts : 02657 s->avctx->color_range == AVCOL_RANGE_JPEG ? 02658 hwaccel_pixfmt_list_h264_jpeg_420 : 02659 ff_hwaccel_pixfmt_list_420); 02660 } 02661 } 02662 02663 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); 02664 02665 if (MPV_common_init(s) < 0) { 02666 av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n"); 02667 return -1; 02668 } 02669 s->first_field = 0; 02670 h->prev_interlaced_frame = 1; 02671 02672 init_scan_tables(h); 02673 if (ff_h264_alloc_tables(h) < 0) { 02674 av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n"); 02675 return AVERROR(ENOMEM); 02676 } 02677 02678 if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) { 02679 if (context_init(h) < 0) { 02680 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); 02681 return -1; 02682 } 02683 } else { 02684 for(i = 1; i < s->avctx->thread_count; i++) { 02685 H264Context *c; 02686 c = h->thread_context[i] = av_malloc(sizeof(H264Context)); 02687 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); 02688 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); 02689 c->h264dsp = h->h264dsp; 02690 c->sps = h->sps; 02691 c->pps = h->pps; 02692 c->pixel_shift = h->pixel_shift; 02693 init_scan_tables(c); 02694 clone_tables(c, h, i); 02695 } 02696 02697 for(i = 0; i < s->avctx->thread_count; i++) 02698 if (context_init(h->thread_context[i]) < 0) { 02699 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n"); 02700 return -1; 02701 } 02702 } 02703 } 02704 02705 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); 02706 02707 h->mb_mbaff = 0; 02708 h->mb_aff_frame = 0; 02709 last_pic_structure = s0->picture_structure; 02710 if(h->sps.frame_mbs_only_flag){ 02711 s->picture_structure= PICT_FRAME; 02712 }else{ 02713 if(get_bits1(&s->gb)) { //field_pic_flag 02714 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag 02715 } else { 02716 s->picture_structure= PICT_FRAME; 02717 h->mb_aff_frame = h->sps.mb_aff; 02718 } 02719 } 02720 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; 02721 02722 if(h0->current_slice == 0){ 02723 // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away 02724 if(h->frame_num != h->prev_frame_num) { 02725 int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num; 02726 02727 if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num; 02728 02729 if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) { 02730 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1; 02731 if (unwrap_prev_frame_num < 0) 02732 unwrap_prev_frame_num += max_frame_num; 02733 02734 h->prev_frame_num = unwrap_prev_frame_num; 02735 } 02736 } 02737 02738 while(h->frame_num != h->prev_frame_num && 02739 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ 02740 Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL; 02741 av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); 02742 if (ff_h264_frame_start(h) < 0) 02743 return -1; 02744 h->prev_frame_num++; 02745 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; 02746 s->current_picture_ptr->frame_num= h->prev_frame_num; 02747 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0); 02748 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1); 02749 ff_generate_sliding_window_mmcos(h); 02750 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 02751 /* Error concealment: if a ref is missing, copy the previous ref in its place. 02752 * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions 02753 * about there being no actual duplicates. 02754 * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're 02755 * concealing a lost frame, this probably isn't noticable by comparison, but it should 02756 * be fixed. */ 02757 if (h->short_ref_count) { 02758 if (prev) { 02759 av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize, 02760 (const uint8_t**)prev->data, prev->linesize, 02761 s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16); 02762 h->short_ref[0]->poc = prev->poc+2; 02763 } 02764 h->short_ref[0]->frame_num = h->prev_frame_num; 02765 } 02766 } 02767 02768 /* See if we have a decoded first field looking for a pair... */ 02769 if (s0->first_field) { 02770 assert(s0->current_picture_ptr); 02771 assert(s0->current_picture_ptr->data[0]); 02772 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); 02773 02774 /* figure out if we have a complementary field pair */ 02775 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { 02776 /* 02777 * Previous field is unmatched. Don't display it, but let it 02778 * remain for reference if marked as such. 02779 */ 02780 s0->current_picture_ptr = NULL; 02781 s0->first_field = FIELD_PICTURE; 02782 02783 } else { 02784 if (h->nal_ref_idc && 02785 s0->current_picture_ptr->reference && 02786 s0->current_picture_ptr->frame_num != h->frame_num) { 02787 /* 02788 * This and previous field were reference, but had 02789 * different frame_nums. Consider this field first in 02790 * pair. Throw away previous field except for reference 02791 * purposes. 02792 */ 02793 s0->first_field = 1; 02794 s0->current_picture_ptr = NULL; 02795 02796 } else { 02797 /* Second field in complementary pair */ 02798 s0->first_field = 0; 02799 } 02800 } 02801 02802 } else { 02803 /* Frame or first field in a potentially complementary pair */ 02804 assert(!s0->current_picture_ptr); 02805 s0->first_field = FIELD_PICTURE; 02806 } 02807 02808 if(!FIELD_PICTURE || s0->first_field) { 02809 if (ff_h264_frame_start(h) < 0) { 02810 s0->first_field = 0; 02811 return -1; 02812 } 02813 } else { 02814 ff_release_unused_pictures(s, 0); 02815 } 02816 } 02817 if(h != h0) 02818 clone_slice(h, h0); 02819 02820 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup 02821 02822 assert(s->mb_num == s->mb_width * s->mb_height); 02823 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || 02824 first_mb_in_slice >= s->mb_num){ 02825 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); 02826 return -1; 02827 } 02828 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; 02829 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; 02830 if (s->picture_structure == PICT_BOTTOM_FIELD) 02831 s->resync_mb_y = s->mb_y = s->mb_y + 1; 02832 assert(s->mb_y < s->mb_height); 02833 02834 if(s->picture_structure==PICT_FRAME){ 02835 h->curr_pic_num= h->frame_num; 02836 h->max_pic_num= 1<< h->sps.log2_max_frame_num; 02837 }else{ 02838 h->curr_pic_num= 2*h->frame_num + 1; 02839 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); 02840 } 02841 02842 if(h->nal_unit_type == NAL_IDR_SLICE){ 02843 get_ue_golomb(&s->gb); /* idr_pic_id */ 02844 } 02845 02846 if(h->sps.poc_type==0){ 02847 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); 02848 02849 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ 02850 h->delta_poc_bottom= get_se_golomb(&s->gb); 02851 } 02852 } 02853 02854 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ 02855 h->delta_poc[0]= get_se_golomb(&s->gb); 02856 02857 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) 02858 h->delta_poc[1]= get_se_golomb(&s->gb); 02859 } 02860 02861 init_poc(h); 02862 02863 if(h->pps.redundant_pic_cnt_present){ 02864 h->redundant_pic_count= get_ue_golomb(&s->gb); 02865 } 02866 02867 //set defaults, might be overridden a few lines later 02868 h->ref_count[0]= h->pps.ref_count[0]; 02869 h->ref_count[1]= h->pps.ref_count[1]; 02870 02871 if(h->slice_type_nos != AV_PICTURE_TYPE_I){ 02872 int max_refs = s->picture_structure == PICT_FRAME ? 16 : 32; 02873 02874 if(h->slice_type_nos == AV_PICTURE_TYPE_B){ 02875 h->direct_spatial_mv_pred= get_bits1(&s->gb); 02876 } 02877 num_ref_idx_active_override_flag= get_bits1(&s->gb); 02878 02879 if(num_ref_idx_active_override_flag){ 02880 h->ref_count[0]= get_ue_golomb(&s->gb) + 1; 02881 if(h->slice_type_nos==AV_PICTURE_TYPE_B) 02882 h->ref_count[1]= get_ue_golomb(&s->gb) + 1; 02883 } 02884 02885 if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) { 02886 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); 02887 h->ref_count[0] = h->ref_count[1] = 1; 02888 return AVERROR_INVALIDDATA; 02889 } 02890 02891 if(h->slice_type_nos == AV_PICTURE_TYPE_B) 02892 h->list_count= 2; 02893 else 02894 h->list_count= 1; 02895 }else 02896 h->list_count= 0; 02897 02898 if(!default_ref_list_done){ 02899 ff_h264_fill_default_ref_list(h); 02900 } 02901 02902 if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) { 02903 h->ref_count[1]= h->ref_count[0]= 0; 02904 return -1; 02905 } 02906 02907 if(h->slice_type_nos!=AV_PICTURE_TYPE_I){ 02908 s->last_picture_ptr= &h->ref_list[0][0]; 02909 ff_copy_picture(&s->last_picture, s->last_picture_ptr); 02910 } 02911 if(h->slice_type_nos==AV_PICTURE_TYPE_B){ 02912 s->next_picture_ptr= &h->ref_list[1][0]; 02913 ff_copy_picture(&s->next_picture, s->next_picture_ptr); 02914 } 02915 02916 if( (h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P ) 02917 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) ) 02918 pred_weight_table(h); 02919 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){ 02920 implicit_weight_table(h, -1); 02921 }else { 02922 h->use_weight = 0; 02923 for (i = 0; i < 2; i++) { 02924 h->luma_weight_flag[i] = 0; 02925 h->chroma_weight_flag[i] = 0; 02926 } 02927 } 02928 02929 if(h->nal_ref_idc) 02930 ff_h264_decode_ref_pic_marking(h0, &s->gb); 02931 02932 if(FRAME_MBAFF){ 02933 ff_h264_fill_mbaff_ref_list(h); 02934 02935 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){ 02936 implicit_weight_table(h, 0); 02937 implicit_weight_table(h, 1); 02938 } 02939 } 02940 02941 if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred) 02942 ff_h264_direct_dist_scale_factor(h); 02943 ff_h264_direct_ref_list_init(h); 02944 02945 if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){ 02946 tmp = get_ue_golomb_31(&s->gb); 02947 if(tmp > 2){ 02948 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); 02949 return -1; 02950 } 02951 h->cabac_init_idc= tmp; 02952 } 02953 02954 h->last_qscale_diff = 0; 02955 tmp = h->pps.init_qp + get_se_golomb(&s->gb); 02956 if(tmp>51+6*(h->sps.bit_depth_luma-8)){ 02957 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); 02958 return -1; 02959 } 02960 s->qscale= tmp; 02961 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 02962 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 02963 //FIXME qscale / qp ... stuff 02964 if(h->slice_type == AV_PICTURE_TYPE_SP){ 02965 get_bits1(&s->gb); /* sp_for_switch_flag */ 02966 } 02967 if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){ 02968 get_se_golomb(&s->gb); /* slice_qs_delta */ 02969 } 02970 02971 h->deblocking_filter = 1; 02972 h->slice_alpha_c0_offset = 52; 02973 h->slice_beta_offset = 52; 02974 if( h->pps.deblocking_filter_parameters_present ) { 02975 tmp= get_ue_golomb_31(&s->gb); 02976 if(tmp > 2){ 02977 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); 02978 return -1; 02979 } 02980 h->deblocking_filter= tmp; 02981 if(h->deblocking_filter < 2) 02982 h->deblocking_filter^= 1; // 1<->0 02983 02984 if( h->deblocking_filter ) { 02985 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; 02986 h->slice_beta_offset += get_se_golomb(&s->gb) << 1; 02987 if( h->slice_alpha_c0_offset > 104U 02988 || h->slice_beta_offset > 104U){ 02989 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset); 02990 return -1; 02991 } 02992 } 02993 } 02994 02995 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL 02996 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I) 02997 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == AV_PICTURE_TYPE_B) 02998 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 02999 h->deblocking_filter= 0; 03000 03001 if(h->deblocking_filter == 1 && h0->max_contexts > 1) { 03002 if(s->avctx->flags2 & CODEC_FLAG2_FAST) { 03003 /* Cheat slightly for speed: 03004 Do not bother to deblock across slices. */ 03005 h->deblocking_filter = 2; 03006 } else { 03007 h0->max_contexts = 1; 03008 if(!h0->single_decode_warning) { 03009 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); 03010 h0->single_decode_warning = 1; 03011 } 03012 if (h != h0) { 03013 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n"); 03014 return 1; 03015 } 03016 } 03017 } 03018 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); 03019 03020 #if 0 //FMO 03021 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) 03022 slice_group_change_cycle= get_bits(&s->gb, ?); 03023 #endif 03024 03025 h0->last_slice_type = slice_type; 03026 h->slice_num = ++h0->current_slice; 03027 if(h->slice_num >= MAX_SLICES){ 03028 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); 03029 } 03030 03031 for(j=0; j<2; j++){ 03032 int id_list[16]; 03033 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; 03034 for(i=0; i<16; i++){ 03035 id_list[i]= 60; 03036 if(h->ref_list[j][i].data[0]){ 03037 int k; 03038 uint8_t *base= h->ref_list[j][i].base[0]; 03039 for(k=0; k<h->short_ref_count; k++) 03040 if(h->short_ref[k]->base[0] == base){ 03041 id_list[i]= k; 03042 break; 03043 } 03044 for(k=0; k<h->long_ref_count; k++) 03045 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ 03046 id_list[i]= h->short_ref_count + k; 03047 break; 03048 } 03049 } 03050 } 03051 03052 ref2frm[0]= 03053 ref2frm[1]= -1; 03054 for(i=0; i<16; i++) 03055 ref2frm[i+2]= 4*id_list[i] 03056 +(h->ref_list[j][i].reference&3); 03057 ref2frm[18+0]= 03058 ref2frm[18+1]= -1; 03059 for(i=16; i<48; i++) 03060 ref2frm[i+4]= 4*id_list[(i-16)>>1] 03061 +(h->ref_list[j][i].reference&3); 03062 } 03063 03064 //FIXME: fix draw_edges+PAFF+frame threads 03065 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16; 03066 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; 03067 03068 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 03069 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", 03070 h->slice_num, 03071 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), 03072 first_mb_in_slice, 03073 av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", 03074 pps_id, h->frame_num, 03075 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], 03076 h->ref_count[0], h->ref_count[1], 03077 s->qscale, 03078 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26, 03079 h->use_weight, 03080 h->use_weight==1 && h->use_weight_chroma ? "c" : "", 03081 h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" 03082 ); 03083 } 03084 03085 return 0; 03086 } 03087 03088 int ff_h264_get_slice_type(const H264Context *h) 03089 { 03090 switch (h->slice_type) { 03091 case AV_PICTURE_TYPE_P: return 0; 03092 case AV_PICTURE_TYPE_B: return 1; 03093 case AV_PICTURE_TYPE_I: return 2; 03094 case AV_PICTURE_TYPE_SP: return 3; 03095 case AV_PICTURE_TYPE_SI: return 4; 03096 default: return -1; 03097 } 03098 } 03099 03104 static int fill_filter_caches(H264Context *h, int mb_type){ 03105 MpegEncContext * const s = &h->s; 03106 const int mb_xy= h->mb_xy; 03107 int top_xy, left_xy[2]; 03108 int top_type, left_type[2]; 03109 03110 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 03111 03112 //FIXME deblocking could skip the intra and nnz parts. 03113 03114 /* Wow, what a mess, why didn't they simplify the interlacing & intra 03115 * stuff, I can't imagine that these complex rules are worth it. */ 03116 03117 left_xy[1] = left_xy[0] = mb_xy-1; 03118 if(FRAME_MBAFF){ 03119 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); 03120 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 03121 if(s->mb_y&1){ 03122 if (left_mb_field_flag != curr_mb_field_flag) { 03123 left_xy[0] -= s->mb_stride; 03124 } 03125 }else{ 03126 if(curr_mb_field_flag){ 03127 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); 03128 } 03129 if (left_mb_field_flag != curr_mb_field_flag) { 03130 left_xy[1] += s->mb_stride; 03131 } 03132 } 03133 } 03134 03135 h->top_mb_xy = top_xy; 03136 h->left_mb_xy[0] = left_xy[0]; 03137 h->left_mb_xy[1] = left_xy[1]; 03138 { 03139 //for sufficiently low qp, filtering wouldn't do anything 03140 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp 03141 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice 03142 int qp = s->current_picture.qscale_table[mb_xy]; 03143 if(qp <= qp_thresh 03144 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) 03145 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ 03146 if(!FRAME_MBAFF) 03147 return 1; 03148 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) 03149 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) 03150 return 1; 03151 } 03152 } 03153 03154 top_type = s->current_picture.mb_type[top_xy] ; 03155 left_type[0] = s->current_picture.mb_type[left_xy[0]]; 03156 left_type[1] = s->current_picture.mb_type[left_xy[1]]; 03157 if(h->deblocking_filter == 2){ 03158 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; 03159 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; 03160 }else{ 03161 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; 03162 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; 03163 } 03164 h->top_type = top_type ; 03165 h->left_type[0]= left_type[0]; 03166 h->left_type[1]= left_type[1]; 03167 03168 if(IS_INTRA(mb_type)) 03169 return 0; 03170 03171 AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]); 03172 AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]); 03173 AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]); 03174 AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]); 03175 03176 h->cbp= h->cbp_table[mb_xy]; 03177 03178 { 03179 int list; 03180 for(list=0; list<h->list_count; list++){ 03181 int8_t *ref; 03182 int y, b_stride; 03183 int16_t (*mv_dst)[2]; 03184 int16_t (*mv_src)[2]; 03185 03186 if(!USES_LIST(mb_type, list)){ 03187 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 03188 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03189 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03190 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03191 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03192 continue; 03193 } 03194 03195 ref = &s->current_picture.ref_index[list][4*mb_xy]; 03196 { 03197 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03198 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03199 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03200 ref += 2; 03201 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03202 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 03203 } 03204 03205 b_stride = h->b_stride; 03206 mv_dst = &h->mv_cache[list][scan8[0]]; 03207 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; 03208 for(y=0; y<4; y++){ 03209 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); 03210 } 03211 03212 } 03213 } 03214 03215 03216 /* 03217 0 . T T. T T T T 03218 1 L . .L . . . . 03219 2 L . .L . . . . 03220 3 . T TL . . . . 03221 4 L . .L . . . . 03222 5 L . .. . . . . 03223 */ 03224 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 03225 if(top_type){ 03226 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]); 03227 } 03228 03229 if(left_type[0]){ 03230 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4]; 03231 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4]; 03232 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4]; 03233 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4]; 03234 } 03235 03236 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 03237 if(!CABAC && h->pps.transform_8x8_mode){ 03238 if(IS_8x8DCT(top_type)){ 03239 h->non_zero_count_cache[4+8*0]= 03240 h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12; 03241 h->non_zero_count_cache[6+8*0]= 03242 h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12; 03243 } 03244 if(IS_8x8DCT(left_type[0])){ 03245 h->non_zero_count_cache[3+8*1]= 03246 h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF 03247 } 03248 if(IS_8x8DCT(left_type[1])){ 03249 h->non_zero_count_cache[3+8*3]= 03250 h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF 03251 } 03252 03253 if(IS_8x8DCT(mb_type)){ 03254 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= 03255 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12; 03256 03257 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= 03258 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12; 03259 03260 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= 03261 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12; 03262 03263 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= 03264 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12; 03265 } 03266 } 03267 03268 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 03269 int list; 03270 for(list=0; list<h->list_count; list++){ 03271 if(USES_LIST(top_type, list)){ 03272 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 03273 const int b8_xy= 4*top_xy + 2; 03274 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03275 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); 03276 h->ref_cache[list][scan8[0] + 0 - 1*8]= 03277 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; 03278 h->ref_cache[list][scan8[0] + 2 - 1*8]= 03279 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; 03280 }else{ 03281 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 03282 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); 03283 } 03284 03285 if(!IS_INTERLACED(mb_type^left_type[0])){ 03286 if(USES_LIST(left_type[0], list)){ 03287 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 03288 const int b8_xy= 4*left_xy[0] + 1; 03289 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 03290 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); 03291 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); 03292 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); 03293 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); 03294 h->ref_cache[list][scan8[0] - 1 + 0 ]= 03295 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; 03296 h->ref_cache[list][scan8[0] - 1 +16 ]= 03297 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; 03298 }else{ 03299 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); 03300 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); 03301 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); 03302 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); 03303 h->ref_cache[list][scan8[0] - 1 + 0 ]= 03304 h->ref_cache[list][scan8[0] - 1 + 8 ]= 03305 h->ref_cache[list][scan8[0] - 1 + 16 ]= 03306 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 03307 } 03308 } 03309 } 03310 } 03311 03312 return 0; 03313 } 03314 03315 static void loop_filter(H264Context *h, int start_x, int end_x){ 03316 MpegEncContext * const s = &h->s; 03317 uint8_t *dest_y, *dest_cb, *dest_cr; 03318 int linesize, uvlinesize, mb_x, mb_y; 03319 const int end_mb_y= s->mb_y + FRAME_MBAFF; 03320 const int old_slice_type= h->slice_type; 03321 const int pixel_shift = h->pixel_shift; 03322 03323 if(h->deblocking_filter) { 03324 for(mb_x= start_x; mb_x<end_x; mb_x++){ 03325 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ 03326 int mb_xy, mb_type; 03327 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; 03328 h->slice_num= h->slice_table[mb_xy]; 03329 mb_type= s->current_picture.mb_type[mb_xy]; 03330 h->list_count= h->list_counts[mb_xy]; 03331 03332 if(FRAME_MBAFF) 03333 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); 03334 03335 s->mb_x= mb_x; 03336 s->mb_y= mb_y; 03337 dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; 03338 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); 03339 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); 03340 //FIXME simplify above 03341 03342 if (MB_FIELD) { 03343 linesize = h->mb_linesize = s->linesize * 2; 03344 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 03345 if(mb_y&1){ //FIXME move out of this function? 03346 dest_y -= s->linesize*15; 03347 dest_cb-= s->uvlinesize*((8 << CHROMA444)-1); 03348 dest_cr-= s->uvlinesize*((8 << CHROMA444)-1); 03349 } 03350 } else { 03351 linesize = h->mb_linesize = s->linesize; 03352 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 03353 } 03354 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); 03355 if(fill_filter_caches(h, mb_type)) 03356 continue; 03357 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); 03358 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); 03359 03360 if (FRAME_MBAFF) { 03361 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 03362 } else { 03363 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 03364 } 03365 } 03366 } 03367 } 03368 h->slice_type= old_slice_type; 03369 s->mb_x= end_x; 03370 s->mb_y= end_mb_y - FRAME_MBAFF; 03371 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 03372 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 03373 } 03374 03375 static void predict_field_decoding_flag(H264Context *h){ 03376 MpegEncContext * const s = &h->s; 03377 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 03378 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) 03379 ? s->current_picture.mb_type[mb_xy-1] 03380 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) 03381 ? s->current_picture.mb_type[mb_xy-s->mb_stride] 03382 : 0; 03383 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; 03384 } 03385 03389 static void decode_finish_row(H264Context *h){ 03390 MpegEncContext * const s = &h->s; 03391 int top = 16*(s->mb_y >> FIELD_PICTURE); 03392 int height = 16 << FRAME_MBAFF; 03393 int deblock_border = (16 + 4) << FRAME_MBAFF; 03394 int pic_height = 16*s->mb_height >> FIELD_PICTURE; 03395 03396 if (h->deblocking_filter) { 03397 if((top + height) >= pic_height) 03398 height += deblock_border; 03399 03400 top -= deblock_border; 03401 } 03402 03403 if (top >= pic_height || (top + height) < h->emu_edge_height) 03404 return; 03405 03406 height = FFMIN(height, pic_height - top); 03407 if (top < h->emu_edge_height) { 03408 height = top+height; 03409 top = 0; 03410 } 03411 03412 ff_draw_horiz_band(s, top, height); 03413 03414 if (s->dropable) return; 03415 03416 ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1, 03417 s->picture_structure==PICT_BOTTOM_FIELD); 03418 } 03419 03420 static int decode_slice(struct AVCodecContext *avctx, void *arg){ 03421 H264Context *h = *(void**)arg; 03422 MpegEncContext * const s = &h->s; 03423 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 03424 int lf_x_start = s->mb_x; 03425 03426 s->mb_skip_run= -1; 03427 03428 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || 03429 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 03430 03431 if( h->pps.cabac ) { 03432 /* realign */ 03433 align_get_bits( &s->gb ); 03434 03435 /* init cabac */ 03436 ff_init_cabac_states( &h->cabac); 03437 ff_init_cabac_decoder( &h->cabac, 03438 s->gb.buffer + get_bits_count(&s->gb)/8, 03439 (get_bits_left(&s->gb) + 7)/8); 03440 03441 ff_h264_init_cabac_states(h); 03442 03443 for(;;){ 03444 //START_TIMER 03445 int ret = ff_h264_decode_mb_cabac(h); 03446 int eos; 03447 //STOP_TIMER("decode_mb_cabac") 03448 03449 if(ret>=0) ff_h264_hl_decode_mb(h); 03450 03451 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 03452 s->mb_y++; 03453 03454 ret = ff_h264_decode_mb_cabac(h); 03455 03456 if(ret>=0) ff_h264_hl_decode_mb(h); 03457 s->mb_y--; 03458 } 03459 eos = get_cabac_terminate( &h->cabac ); 03460 03461 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ 03462 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03463 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1); 03464 return 0; 03465 } 03466 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { 03467 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); 03468 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03469 return -1; 03470 } 03471 03472 if( ++s->mb_x >= s->mb_width ) { 03473 loop_filter(h, lf_x_start, s->mb_x); 03474 s->mb_x = lf_x_start = 0; 03475 decode_finish_row(h); 03476 ++s->mb_y; 03477 if(FIELD_OR_MBAFF_PICTURE) { 03478 ++s->mb_y; 03479 if(FRAME_MBAFF && s->mb_y < s->mb_height) 03480 predict_field_decoding_flag(h); 03481 } 03482 } 03483 03484 if( eos || s->mb_y >= s->mb_height ) { 03485 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03486 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03487 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); 03488 return 0; 03489 } 03490 } 03491 03492 } else { 03493 for(;;){ 03494 int ret = ff_h264_decode_mb_cavlc(h); 03495 03496 if(ret>=0) ff_h264_hl_decode_mb(h); 03497 03498 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? 03499 s->mb_y++; 03500 ret = ff_h264_decode_mb_cavlc(h); 03501 03502 if(ret>=0) ff_h264_hl_decode_mb(h); 03503 s->mb_y--; 03504 } 03505 03506 if(ret<0){ 03507 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 03508 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03509 return -1; 03510 } 03511 03512 if(++s->mb_x >= s->mb_width){ 03513 loop_filter(h, lf_x_start, s->mb_x); 03514 s->mb_x = lf_x_start = 0; 03515 decode_finish_row(h); 03516 ++s->mb_y; 03517 if(FIELD_OR_MBAFF_PICTURE) { 03518 ++s->mb_y; 03519 if(FRAME_MBAFF && s->mb_y < s->mb_height) 03520 predict_field_decoding_flag(h); 03521 } 03522 if(s->mb_y >= s->mb_height){ 03523 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03524 03525 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { 03526 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03527 03528 return 0; 03529 }else{ 03530 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03531 03532 return -1; 03533 } 03534 } 03535 } 03536 03537 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ 03538 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 03539 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ 03540 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03541 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); 03542 03543 return 0; 03544 }else{ 03545 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03546 03547 return -1; 03548 } 03549 } 03550 } 03551 } 03552 03553 #if 0 03554 for(;s->mb_y < s->mb_height; s->mb_y++){ 03555 for(;s->mb_x < s->mb_width; s->mb_x++){ 03556 int ret= decode_mb(h); 03557 03558 ff_h264_hl_decode_mb(h); 03559 03560 if(ret<0){ 03561 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 03562 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03563 03564 return -1; 03565 } 03566 03567 if(++s->mb_x >= s->mb_width){ 03568 s->mb_x=0; 03569 if(++s->mb_y >= s->mb_height){ 03570 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 03571 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03572 03573 return 0; 03574 }else{ 03575 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03576 03577 return -1; 03578 } 03579 } 03580 } 03581 03582 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ 03583 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 03584 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 03585 03586 return 0; 03587 }else{ 03588 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 03589 03590 return -1; 03591 } 03592 } 03593 } 03594 s->mb_x=0; 03595 ff_draw_horiz_band(s, 16*s->mb_y, 16); 03596 } 03597 #endif 03598 return -1; //not reached 03599 } 03600 03607 static void execute_decode_slices(H264Context *h, int context_count){ 03608 MpegEncContext * const s = &h->s; 03609 AVCodecContext * const avctx= s->avctx; 03610 H264Context *hx; 03611 int i; 03612 03613 if (s->avctx->hwaccel) 03614 return; 03615 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 03616 return; 03617 if(context_count == 1) { 03618 decode_slice(avctx, &h); 03619 } else { 03620 for(i = 1; i < context_count; i++) { 03621 hx = h->thread_context[i]; 03622 hx->s.error_recognition = avctx->error_recognition; 03623 hx->s.error_count = 0; 03624 } 03625 03626 avctx->execute(avctx, (void *)decode_slice, 03627 h->thread_context, NULL, context_count, sizeof(void*)); 03628 03629 /* pull back stuff from slices to master context */ 03630 hx = h->thread_context[context_count - 1]; 03631 s->mb_x = hx->s.mb_x; 03632 s->mb_y = hx->s.mb_y; 03633 s->dropable = hx->s.dropable; 03634 s->picture_structure = hx->s.picture_structure; 03635 for(i = 1; i < context_count; i++) 03636 h->s.error_count += h->thread_context[i]->s.error_count; 03637 } 03638 } 03639 03640 03641 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ 03642 MpegEncContext * const s = &h->s; 03643 AVCodecContext * const avctx= s->avctx; 03644 H264Context *hx; 03645 int buf_index; 03646 int context_count; 03647 int next_avc; 03648 int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); 03649 int nals_needed=0; 03650 int nal_index; 03651 03652 h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1; 03653 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ 03654 h->current_slice = 0; 03655 if (!s->first_field) 03656 s->current_picture_ptr= NULL; 03657 ff_h264_reset_sei(h); 03658 } 03659 03660 for(;pass <= 1;pass++){ 03661 buf_index = 0; 03662 context_count = 0; 03663 next_avc = h->is_avc ? 0 : buf_size; 03664 nal_index = 0; 03665 for(;;){ 03666 int consumed; 03667 int dst_length; 03668 int bit_length; 03669 const uint8_t *ptr; 03670 int i, nalsize = 0; 03671 int err; 03672 03673 if(buf_index >= next_avc) { 03674 if(buf_index >= buf_size) break; 03675 nalsize = 0; 03676 for(i = 0; i < h->nal_length_size; i++) 03677 nalsize = (nalsize << 8) | buf[buf_index++]; 03678 if(nalsize <= 0 || nalsize > buf_size - buf_index){ 03679 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); 03680 break; 03681 } 03682 next_avc= buf_index + nalsize; 03683 } else { 03684 // start code prefix search 03685 for(; buf_index + 3 < next_avc; buf_index++){ 03686 // This should always succeed in the first iteration. 03687 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) 03688 break; 03689 } 03690 03691 if(buf_index+3 >= buf_size) break; 03692 03693 buf_index+=3; 03694 if(buf_index >= next_avc) continue; 03695 } 03696 03697 hx = h->thread_context[context_count]; 03698 03699 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); 03700 if (ptr==NULL || dst_length < 0){ 03701 return -1; 03702 } 03703 i= buf_index + consumed; 03704 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc && 03705 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0) 03706 s->workaround_bugs |= FF_BUG_TRUNCATED; 03707 03708 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){ 03709 while(ptr[dst_length - 1] == 0 && dst_length > 0) 03710 dst_length--; 03711 } 03712 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); 03713 03714 if(s->avctx->debug&FF_DEBUG_STARTCODE){ 03715 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); 03716 } 03717 03718 if (h->is_avc && (nalsize != consumed) && nalsize){ 03719 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); 03720 } 03721 03722 buf_index += consumed; 03723 nal_index++; 03724 03725 if(pass == 0) { 03726 // packets can sometimes contain multiple PPS/SPS 03727 // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely 03728 // if so, when frame threading we can't start the next thread until we've read all of them 03729 switch (hx->nal_unit_type) { 03730 case NAL_SPS: 03731 case NAL_PPS: 03732 nals_needed = nal_index; 03733 break; 03734 case NAL_IDR_SLICE: 03735 case NAL_SLICE: 03736 init_get_bits(&hx->s.gb, ptr, bit_length); 03737 if (!get_ue_golomb(&hx->s.gb)) 03738 nals_needed = nal_index; 03739 } 03740 continue; 03741 } 03742 03743 //FIXME do not discard SEI id 03744 if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0) 03745 continue; 03746 03747 again: 03748 err = 0; 03749 switch(hx->nal_unit_type){ 03750 case NAL_IDR_SLICE: 03751 if (h->nal_unit_type != NAL_IDR_SLICE) { 03752 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); 03753 return -1; 03754 } 03755 idr(h); //FIXME ensure we don't loose some frames if there is reordering 03756 case NAL_SLICE: 03757 init_get_bits(&hx->s.gb, ptr, bit_length); 03758 hx->intra_gb_ptr= 03759 hx->inter_gb_ptr= &hx->s.gb; 03760 hx->s.data_partitioning = 0; 03761 03762 if((err = decode_slice_header(hx, h))) 03763 break; 03764 03765 s->current_picture_ptr->key_frame |= 03766 (hx->nal_unit_type == NAL_IDR_SLICE) || 03767 (h->sei_recovery_frame_cnt >= 0); 03768 03769 if (h->current_slice == 1) { 03770 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) { 03771 decode_postinit(h, nal_index >= nals_needed); 03772 } 03773 03774 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) 03775 return -1; 03776 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 03777 ff_vdpau_h264_picture_start(s); 03778 } 03779 03780 if(hx->redundant_pic_count==0 03781 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 03782 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B) 03783 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I) 03784 && avctx->skip_frame < AVDISCARD_ALL){ 03785 if(avctx->hwaccel) { 03786 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) 03787 return -1; 03788 }else 03789 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ 03790 static const uint8_t start_code[] = {0x00, 0x00, 0x01}; 03791 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); 03792 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); 03793 }else 03794 context_count++; 03795 } 03796 break; 03797 case NAL_DPA: 03798 init_get_bits(&hx->s.gb, ptr, bit_length); 03799 hx->intra_gb_ptr= 03800 hx->inter_gb_ptr= NULL; 03801 03802 if ((err = decode_slice_header(hx, h)) < 0) 03803 break; 03804 03805 hx->s.data_partitioning = 1; 03806 03807 break; 03808 case NAL_DPB: 03809 init_get_bits(&hx->intra_gb, ptr, bit_length); 03810 hx->intra_gb_ptr= &hx->intra_gb; 03811 break; 03812 case NAL_DPC: 03813 init_get_bits(&hx->inter_gb, ptr, bit_length); 03814 hx->inter_gb_ptr= &hx->inter_gb; 03815 03816 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning 03817 && s->context_initialized 03818 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 03819 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B) 03820 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I) 03821 && avctx->skip_frame < AVDISCARD_ALL) 03822 context_count++; 03823 break; 03824 case NAL_SEI: 03825 init_get_bits(&s->gb, ptr, bit_length); 03826 ff_h264_decode_sei(h); 03827 break; 03828 case NAL_SPS: 03829 init_get_bits(&s->gb, ptr, bit_length); 03830 ff_h264_decode_seq_parameter_set(h); 03831 03832 if (s->flags& CODEC_FLAG_LOW_DELAY || 03833 (h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames)) 03834 s->low_delay=1; 03835 03836 if(avctx->has_b_frames < 2) 03837 avctx->has_b_frames= !s->low_delay; 03838 03839 if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { 03840 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { 03841 avctx->bits_per_raw_sample = h->sps.bit_depth_luma; 03842 h->pixel_shift = h->sps.bit_depth_luma > 8; 03843 03844 ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); 03845 ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); 03846 dsputil_init(&s->dsp, s->avctx); 03847 } else { 03848 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); 03849 return -1; 03850 } 03851 } 03852 break; 03853 case NAL_PPS: 03854 init_get_bits(&s->gb, ptr, bit_length); 03855 03856 ff_h264_decode_picture_parameter_set(h, bit_length); 03857 03858 break; 03859 case NAL_AUD: 03860 case NAL_END_SEQUENCE: 03861 case NAL_END_STREAM: 03862 case NAL_FILLER_DATA: 03863 case NAL_SPS_EXT: 03864 case NAL_AUXILIARY_SLICE: 03865 break; 03866 default: 03867 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length); 03868 } 03869 03870 if(context_count == h->max_contexts) { 03871 execute_decode_slices(h, context_count); 03872 context_count = 0; 03873 } 03874 03875 if (err < 0) 03876 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); 03877 else if(err == 1) { 03878 /* Slice could not be decoded in parallel mode, copy down 03879 * NAL unit stuff to context 0 and restart. Note that 03880 * rbsp_buffer is not transferred, but since we no longer 03881 * run in parallel mode this should not be an issue. */ 03882 h->nal_unit_type = hx->nal_unit_type; 03883 h->nal_ref_idc = hx->nal_ref_idc; 03884 hx = h; 03885 goto again; 03886 } 03887 } 03888 } 03889 if(context_count) 03890 execute_decode_slices(h, context_count); 03891 return buf_index; 03892 } 03893 03897 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ 03898 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) 03899 if(pos+10>buf_size) pos=buf_size; // oops ;) 03900 03901 return pos; 03902 } 03903 03904 static int decode_frame(AVCodecContext *avctx, 03905 void *data, int *data_size, 03906 AVPacket *avpkt) 03907 { 03908 const uint8_t *buf = avpkt->data; 03909 int buf_size = avpkt->size; 03910 H264Context *h = avctx->priv_data; 03911 MpegEncContext *s = &h->s; 03912 AVFrame *pict = data; 03913 int buf_index; 03914 03915 s->flags= avctx->flags; 03916 s->flags2= avctx->flags2; 03917 03918 /* end of stream, output what is still in the buffers */ 03919 out: 03920 if (buf_size == 0) { 03921 Picture *out; 03922 int i, out_idx; 03923 03924 s->current_picture_ptr = NULL; 03925 03926 //FIXME factorize this with the output code below 03927 out = h->delayed_pic[0]; 03928 out_idx = 0; 03929 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 03930 if(h->delayed_pic[i]->poc < out->poc){ 03931 out = h->delayed_pic[i]; 03932 out_idx = i; 03933 } 03934 03935 for(i=out_idx; h->delayed_pic[i]; i++) 03936 h->delayed_pic[i] = h->delayed_pic[i+1]; 03937 03938 if(out){ 03939 *data_size = sizeof(AVFrame); 03940 *pict= *(AVFrame*)out; 03941 } 03942 03943 return 0; 03944 } 03945 03946 buf_index=decode_nal_units(h, buf, buf_size); 03947 if(buf_index < 0) 03948 return -1; 03949 03950 if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) { 03951 buf_size = 0; 03952 goto out; 03953 } 03954 03955 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ 03956 if (avctx->skip_frame >= AVDISCARD_NONREF) 03957 return 0; 03958 av_log(avctx, AV_LOG_ERROR, "no frame!\n"); 03959 return -1; 03960 } 03961 03962 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ 03963 03964 if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1); 03965 03966 field_end(h, 0); 03967 03968 if (!h->next_output_pic) { 03969 /* Wait for second field. */ 03970 *data_size = 0; 03971 03972 } else { 03973 *data_size = sizeof(AVFrame); 03974 *pict = *(AVFrame*)h->next_output_pic; 03975 } 03976 } 03977 03978 assert(pict->data[0] || !*data_size); 03979 ff_print_debug_info(s, pict); 03980 //printf("out %d\n", (int)pict->data[0]); 03981 03982 return get_consumed_bytes(s, buf_index, buf_size); 03983 } 03984 #if 0 03985 static inline void fill_mb_avail(H264Context *h){ 03986 MpegEncContext * const s = &h->s; 03987 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 03988 03989 if(s->mb_y){ 03990 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; 03991 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; 03992 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; 03993 }else{ 03994 h->mb_avail[0]= 03995 h->mb_avail[1]= 03996 h->mb_avail[2]= 0; 03997 } 03998 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; 03999 h->mb_avail[4]= 1; //FIXME move out 04000 h->mb_avail[5]= 0; //FIXME move out 04001 } 04002 #endif 04003 04004 #ifdef TEST 04005 #undef printf 04006 #undef random 04007 #define COUNT 8000 04008 #define SIZE (COUNT*40) 04009 int main(void){ 04010 int i; 04011 uint8_t temp[SIZE]; 04012 PutBitContext pb; 04013 GetBitContext gb; 04014 // int int_temp[10000]; 04015 DSPContext dsp; 04016 AVCodecContext avctx; 04017 04018 dsputil_init(&dsp, &avctx); 04019 04020 init_put_bits(&pb, temp, SIZE); 04021 printf("testing unsigned exp golomb\n"); 04022 for(i=0; i<COUNT; i++){ 04023 START_TIMER 04024 set_ue_golomb(&pb, i); 04025 STOP_TIMER("set_ue_golomb"); 04026 } 04027 flush_put_bits(&pb); 04028 04029 init_get_bits(&gb, temp, 8*SIZE); 04030 for(i=0; i<COUNT; i++){ 04031 int j, s; 04032 04033 s= show_bits(&gb, 24); 04034 04035 START_TIMER 04036 j= get_ue_golomb(&gb); 04037 if(j != i){ 04038 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 04039 // return -1; 04040 } 04041 STOP_TIMER("get_ue_golomb"); 04042 } 04043 04044 04045 init_put_bits(&pb, temp, SIZE); 04046 printf("testing signed exp golomb\n"); 04047 for(i=0; i<COUNT; i++){ 04048 START_TIMER 04049 set_se_golomb(&pb, i - COUNT/2); 04050 STOP_TIMER("set_se_golomb"); 04051 } 04052 flush_put_bits(&pb); 04053 04054 init_get_bits(&gb, temp, 8*SIZE); 04055 for(i=0; i<COUNT; i++){ 04056 int j, s; 04057 04058 s= show_bits(&gb, 24); 04059 04060 START_TIMER 04061 j= get_se_golomb(&gb); 04062 if(j != i - COUNT/2){ 04063 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 04064 // return -1; 04065 } 04066 STOP_TIMER("get_se_golomb"); 04067 } 04068 04069 #if 0 04070 printf("testing 4x4 (I)DCT\n"); 04071 04072 DCTELEM block[16]; 04073 uint8_t src[16], ref[16]; 04074 uint64_t error= 0, max_error=0; 04075 04076 for(i=0; i<COUNT; i++){ 04077 int j; 04078 // printf("%d %d %d\n", r1, r2, (r2-r1)*16); 04079 for(j=0; j<16; j++){ 04080 ref[j]= random()%255; 04081 src[j]= random()%255; 04082 } 04083 04084 h264_diff_dct_c(block, src, ref, 4); 04085 04086 //normalize 04087 for(j=0; j<16; j++){ 04088 // printf("%d ", block[j]); 04089 block[j]= block[j]*4; 04090 if(j&1) block[j]= (block[j]*4 + 2)/5; 04091 if(j&4) block[j]= (block[j]*4 + 2)/5; 04092 } 04093 // printf("\n"); 04094 04095 h->h264dsp.h264_idct_add(ref, block, 4); 04096 /* for(j=0; j<16; j++){ 04097 printf("%d ", ref[j]); 04098 } 04099 printf("\n");*/ 04100 04101 for(j=0; j<16; j++){ 04102 int diff= FFABS(src[j] - ref[j]); 04103 04104 error+= diff*diff; 04105 max_error= FFMAX(max_error, diff); 04106 } 04107 } 04108 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); 04109 printf("testing quantizer\n"); 04110 for(qp=0; qp<52; qp++){ 04111 for(i=0; i<16; i++) 04112 src1_block[i]= src2_block[i]= random()%255; 04113 04114 } 04115 printf("Testing NAL layer\n"); 04116 04117 uint8_t bitstream[COUNT]; 04118 uint8_t nal[COUNT*2]; 04119 H264Context h; 04120 memset(&h, 0, sizeof(H264Context)); 04121 04122 for(i=0; i<COUNT; i++){ 04123 int zeros= i; 04124 int nal_length; 04125 int consumed; 04126 int out_length; 04127 uint8_t *out; 04128 int j; 04129 04130 for(j=0; j<COUNT; j++){ 04131 bitstream[j]= (random() % 255) + 1; 04132 } 04133 04134 for(j=0; j<zeros; j++){ 04135 int pos= random() % COUNT; 04136 while(bitstream[pos] == 0){ 04137 pos++; 04138 pos %= COUNT; 04139 } 04140 bitstream[pos]=0; 04141 } 04142 04143 START_TIMER 04144 04145 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2); 04146 if(nal_length<0){ 04147 printf("encoding failed\n"); 04148 return -1; 04149 } 04150 04151 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length); 04152 04153 STOP_TIMER("NAL") 04154 04155 if(out_length != COUNT){ 04156 printf("incorrect length %d %d\n", out_length, COUNT); 04157 return -1; 04158 } 04159 04160 if(consumed != nal_length){ 04161 printf("incorrect consumed length %d %d\n", nal_length, consumed); 04162 return -1; 04163 } 04164 04165 if(memcmp(bitstream, out, COUNT)){ 04166 printf("mismatch\n"); 04167 return -1; 04168 } 04169 } 04170 #endif 04171 04172 printf("Testing RBSP\n"); 04173 04174 04175 return 0; 04176 } 04177 #endif /* TEST */ 04178 04179 04180 av_cold void ff_h264_free_context(H264Context *h) 04181 { 04182 int i; 04183 04184 free_tables(h, 1); //FIXME cleanup init stuff perhaps 04185 04186 for(i = 0; i < MAX_SPS_COUNT; i++) 04187 av_freep(h->sps_buffers + i); 04188 04189 for(i = 0; i < MAX_PPS_COUNT; i++) 04190 av_freep(h->pps_buffers + i); 04191 } 04192 04193 av_cold int ff_h264_decode_end(AVCodecContext *avctx) 04194 { 04195 H264Context *h = avctx->priv_data; 04196 MpegEncContext *s = &h->s; 04197 04198 ff_h264_free_context(h); 04199 04200 MPV_common_end(s); 04201 04202 // memset(h, 0, sizeof(H264Context)); 04203 04204 return 0; 04205 } 04206 04207 static const AVProfile profiles[] = { 04208 { FF_PROFILE_H264_BASELINE, "Baseline" }, 04209 { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" }, 04210 { FF_PROFILE_H264_MAIN, "Main" }, 04211 { FF_PROFILE_H264_EXTENDED, "Extended" }, 04212 { FF_PROFILE_H264_HIGH, "High" }, 04213 { FF_PROFILE_H264_HIGH_10, "High 10" }, 04214 { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" }, 04215 { FF_PROFILE_H264_HIGH_422, "High 4:2:2" }, 04216 { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" }, 04217 { FF_PROFILE_H264_HIGH_444, "High 4:4:4" }, 04218 { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" }, 04219 { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" }, 04220 { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" }, 04221 { FF_PROFILE_UNKNOWN }, 04222 }; 04223 04224 AVCodec ff_h264_decoder = { 04225 "h264", 04226 AVMEDIA_TYPE_VIDEO, 04227 CODEC_ID_H264, 04228 sizeof(H264Context), 04229 ff_h264_decode_init, 04230 NULL, 04231 ff_h264_decode_end, 04232 decode_frame, 04233 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY | 04234 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS, 04235 .flush= flush_dpb, 04236 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), 04237 .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy), 04238 .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context), 04239 .profiles = NULL_IF_CONFIG_SMALL(profiles), 04240 }; 04241 04242 #if CONFIG_H264_VDPAU_DECODER 04243 AVCodec ff_h264_vdpau_decoder = { 04244 "h264_vdpau", 04245 AVMEDIA_TYPE_VIDEO, 04246 CODEC_ID_H264, 04247 sizeof(H264Context), 04248 ff_h264_decode_init, 04249 NULL, 04250 ff_h264_decode_end, 04251 decode_frame, 04252 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, 04253 .flush= flush_dpb, 04254 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), 04255 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE}, 04256 .profiles = NULL_IF_CONFIG_SMALL(profiles), 04257 }; 04258 #endif