Libav
|
00001 /* 00002 * DXVA2 H264 HW acceleration. 00003 * 00004 * copyright (c) 2009 Laurent Aimar 00005 * 00006 * This file is part of FFmpeg. 00007 * 00008 * FFmpeg is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * FFmpeg is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with FFmpeg; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00023 #include "dxva2_internal.h" 00024 #include "h264.h" 00025 #include "h264data.h" 00026 00027 struct dxva2_picture_context { 00028 DXVA_PicParams_H264 pp; 00029 DXVA_Qmatrix_H264 qm; 00030 unsigned slice_count; 00031 DXVA_Slice_H264_Short slice_short[MAX_SLICES]; 00032 DXVA_Slice_H264_Long slice_long[MAX_SLICES]; 00033 const uint8_t *bitstream; 00034 unsigned bitstream_size; 00035 }; 00036 00037 static void fill_picture_entry(DXVA_PicEntry_H264 *pic, 00038 unsigned index, unsigned flag) 00039 { 00040 assert((index&0x7f) == index && (flag&0x01) == flag); 00041 pic->bPicEntry = index | (flag << 7); 00042 } 00043 00044 static void fill_picture_parameters(struct dxva_context *ctx, const H264Context *h, 00045 DXVA_PicParams_H264 *pp) 00046 { 00047 const MpegEncContext *s = &h->s; 00048 const Picture *current_picture = s->current_picture_ptr; 00049 int i; 00050 00051 memset(pp, 0, sizeof(*pp)); 00052 /* Configure current picture */ 00053 fill_picture_entry(&pp->CurrPic, 00054 ff_dxva2_get_surface_index(ctx, current_picture), 00055 s->picture_structure == PICT_BOTTOM_FIELD); 00056 /* Configure the set of references */ 00057 pp->UsedForReferenceFlags = 0; 00058 pp->NonExistingFrameFlags = 0; 00059 for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) { 00060 if (i < h->short_ref_count + h->long_ref_count) { 00061 const Picture *r; 00062 if (i < h->short_ref_count) { 00063 r = h->short_ref[i]; 00064 assert(!r->long_ref); 00065 } else { 00066 r = h->long_ref[i - h->short_ref_count]; 00067 assert(r->long_ref); 00068 } 00069 fill_picture_entry(&pp->RefFrameList[i], 00070 ff_dxva2_get_surface_index(ctx, r), 00071 r->long_ref != 0); 00072 00073 if ((r->reference & PICT_TOP_FIELD) && r->field_poc[0] != INT_MAX) 00074 pp->FieldOrderCntList[i][0] = r->field_poc[0]; 00075 if ((r->reference & PICT_BOTTOM_FIELD) && r->field_poc[1] != INT_MAX) 00076 pp->FieldOrderCntList[i][1] = r->field_poc[1]; 00077 00078 pp->FrameNumList[i] = r->long_ref ? r->pic_id : r->frame_num; 00079 if (r->reference & PICT_TOP_FIELD) 00080 pp->UsedForReferenceFlags |= 1 << (2*i + 0); 00081 if (r->reference & PICT_BOTTOM_FIELD) 00082 pp->UsedForReferenceFlags |= 1 << (2*i + 1); 00083 } else { 00084 pp->RefFrameList[i].bPicEntry = 0xff; 00085 pp->FieldOrderCntList[i][0] = 0; 00086 pp->FieldOrderCntList[i][1] = 0; 00087 pp->FrameNumList[i] = 0; 00088 } 00089 } 00090 00091 pp->wFrameWidthInMbsMinus1 = s->mb_width - 1; 00092 pp->wFrameHeightInMbsMinus1 = s->mb_height - 1; 00093 pp->num_ref_frames = h->sps.ref_frame_count; 00094 00095 pp->wBitFields = ((s->picture_structure != PICT_FRAME) << 0) | 00096 (h->sps.mb_aff << 1) | 00097 (h->sps.residual_color_transform_flag << 2) | 00098 /* sp_for_switch_flag (not implemented by FFmpeg) */ 00099 (0 << 3) | 00100 (h->sps.chroma_format_idc << 4) | 00101 ((h->nal_ref_idc != 0) << 6) | 00102 (h->pps.constrained_intra_pred << 7) | 00103 (h->pps.weighted_pred << 8) | 00104 (h->pps.weighted_bipred_idc << 9) | 00105 /* MbsConsecutiveFlag */ 00106 (1 << 11) | 00107 (h->sps.frame_mbs_only_flag << 12) | 00108 (h->pps.transform_8x8_mode << 13) | 00109 ((h->sps.level_idc >= 31) << 14) | 00110 /* IntraPicFlag (Modified if we detect a non 00111 * intra slice in decode_slice) */ 00112 (1 << 15); 00113 00114 pp->bit_depth_luma_minus8 = h->sps.bit_depth_luma - 8; 00115 pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8; 00116 pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */ 00117 pp->StatusReportFeedbackNumber = 1 + ctx->report_id++; 00118 pp->CurrFieldOrderCnt[0] = 0; 00119 if ((s->picture_structure & PICT_TOP_FIELD) && 00120 current_picture->field_poc[0] != INT_MAX) 00121 pp->CurrFieldOrderCnt[0] = current_picture->field_poc[0]; 00122 pp->CurrFieldOrderCnt[1] = 0; 00123 if ((s->picture_structure & PICT_BOTTOM_FIELD) && 00124 current_picture->field_poc[1] != INT_MAX) 00125 pp->CurrFieldOrderCnt[1] = current_picture->field_poc[1]; 00126 pp->pic_init_qs_minus26 = h->pps.init_qs - 26; 00127 pp->chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0]; 00128 pp->second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1]; 00129 pp->ContinuationFlag = 1; 00130 pp->pic_init_qp_minus26 = h->pps.init_qp - 26; 00131 pp->num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1; 00132 pp->num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1; 00133 pp->Reserved8BitsA = 0; 00134 pp->frame_num = h->frame_num; 00135 pp->log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4; 00136 pp->pic_order_cnt_type = h->sps.poc_type; 00137 if (h->sps.poc_type == 0) 00138 pp->log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4; 00139 else if (h->sps.poc_type == 1) 00140 pp->delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag; 00141 pp->direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag; 00142 pp->entropy_coding_mode_flag = h->pps.cabac; 00143 pp->pic_order_present_flag = h->pps.pic_order_present; 00144 pp->num_slice_groups_minus1 = h->pps.slice_group_count - 1; 00145 pp->slice_group_map_type = h->pps.mb_slice_group_map_type; 00146 pp->deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present; 00147 pp->redundant_pic_cnt_present_flag= h->pps.redundant_pic_cnt_present; 00148 pp->Reserved8BitsB = 0; 00149 pp->slice_group_change_rate_minus1= 0; /* XXX not implemented by FFmpeg */ 00150 //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg */ 00151 } 00152 00153 static void fill_scaling_lists(const H264Context *h, DXVA_Qmatrix_H264 *qm) 00154 { 00155 unsigned i, j; 00156 memset(qm, 0, sizeof(*qm)); 00157 for (i = 0; i < 6; i++) 00158 for (j = 0; j < 16; j++) 00159 qm->bScalingLists4x4[i][j] = h->pps.scaling_matrix4[i][zigzag_scan[j]]; 00160 00161 for (i = 0; i < 2; i++) 00162 for (j = 0; j < 64; j++) 00163 qm->bScalingLists8x8[i][j] = h->pps.scaling_matrix8[i][ff_zigzag_direct[j]]; 00164 } 00165 00166 static int is_slice_short(struct dxva_context *ctx) 00167 { 00168 assert(ctx->cfg->ConfigBitstreamRaw == 1 || 00169 ctx->cfg->ConfigBitstreamRaw == 2); 00170 return ctx->cfg->ConfigBitstreamRaw == 2; 00171 } 00172 00173 static void fill_slice_short(DXVA_Slice_H264_Short *slice, 00174 unsigned position, unsigned size) 00175 { 00176 memset(slice, 0, sizeof(*slice)); 00177 slice->BSNALunitDataLocation = position; 00178 slice->SliceBytesInBuffer = size; 00179 slice->wBadSliceChopping = 0; 00180 } 00181 00182 static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, 00183 unsigned position, unsigned size) 00184 { 00185 const H264Context *h = avctx->priv_data; 00186 struct dxva_context *ctx = avctx->hwaccel_context; 00187 const MpegEncContext *s = &h->s; 00188 unsigned list; 00189 00190 memset(slice, 0, sizeof(*slice)); 00191 slice->BSNALunitDataLocation = position; 00192 slice->SliceBytesInBuffer = size; 00193 slice->wBadSliceChopping = 0; 00194 00195 slice->first_mb_in_slice = (s->mb_y >> FIELD_OR_MBAFF_PICTURE) * s->mb_width + s->mb_x; 00196 slice->NumMbsForSlice = 0; /* XXX it is set once we have all slices */ 00197 slice->BitOffsetToSliceData = get_bits_count(&s->gb) + 8; 00198 slice->slice_type = ff_h264_get_slice_type(h); 00199 if (h->slice_type_fixed) 00200 slice->slice_type += 5; 00201 slice->luma_log2_weight_denom = h->luma_log2_weight_denom; 00202 slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom; 00203 if (h->list_count > 0) 00204 slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1; 00205 if (h->list_count > 1) 00206 slice->num_ref_idx_l1_active_minus1 = h->ref_count[1] - 1; 00207 slice->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2 - 26; 00208 slice->slice_beta_offset_div2 = h->slice_beta_offset / 2 - 26; 00209 slice->Reserved8Bits = 0; 00210 00211 for (list = 0; list < 2; list++) { 00212 unsigned i; 00213 for (i = 0; i < FF_ARRAY_ELEMS(slice->RefPicList[list]); i++) { 00214 if (list < h->list_count && i < h->ref_count[list]) { 00215 const Picture *r = &h->ref_list[list][i]; 00216 unsigned plane; 00217 fill_picture_entry(&slice->RefPicList[list][i], 00218 ff_dxva2_get_surface_index(ctx, r), 00219 r->reference == PICT_BOTTOM_FIELD); 00220 for (plane = 0; plane < 3; plane++) { 00221 int w, o; 00222 if (plane == 0 && h->luma_weight_flag[list]) { 00223 w = h->luma_weight[i][list][0]; 00224 o = h->luma_weight[i][list][1]; 00225 } else if (plane >= 1 && h->chroma_weight_flag[list]) { 00226 w = h->chroma_weight[i][list][plane-1][0]; 00227 o = h->chroma_weight[i][list][plane-1][1]; 00228 } else { 00229 w = 1 << (plane == 0 ? h->luma_log2_weight_denom : 00230 h->chroma_log2_weight_denom); 00231 o = 0; 00232 } 00233 slice->Weights[list][i][plane][0] = w; 00234 slice->Weights[list][i][plane][1] = o; 00235 } 00236 } else { 00237 unsigned plane; 00238 slice->RefPicList[list][i].bPicEntry = 0xff; 00239 for (plane = 0; plane < 3; plane++) { 00240 slice->Weights[list][i][plane][0] = 0; 00241 slice->Weights[list][i][plane][1] = 0; 00242 } 00243 } 00244 } 00245 } 00246 slice->slice_qs_delta = 0; /* XXX not implemented by FFmpeg */ 00247 slice->slice_qp_delta = s->qscale - h->pps.init_qp; 00248 slice->redundant_pic_cnt = h->redundant_pic_count; 00249 if (h->slice_type == FF_B_TYPE) 00250 slice->direct_spatial_mv_pred_flag = h->direct_spatial_mv_pred; 00251 slice->cabac_init_idc = h->pps.cabac ? h->cabac_init_idc : 0; 00252 if (h->deblocking_filter < 2) 00253 slice->disable_deblocking_filter_idc = 1 - h->deblocking_filter; 00254 else 00255 slice->disable_deblocking_filter_idc = h->deblocking_filter; 00256 slice->slice_id = h->current_slice - 1; 00257 } 00258 00259 static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx, 00260 DXVA2_DecodeBufferDesc *bs, 00261 DXVA2_DecodeBufferDesc *sc) 00262 { 00263 const H264Context *h = avctx->priv_data; 00264 const MpegEncContext *s = &h->s; 00265 const unsigned mb_count = s->mb_width * s->mb_height; 00266 struct dxva_context *ctx = avctx->hwaccel_context; 00267 const Picture *current_picture = h->s.current_picture_ptr; 00268 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 00269 DXVA_Slice_H264_Short *slice = NULL; 00270 uint8_t *dxva_data, *current, *end; 00271 unsigned dxva_size; 00272 void *slice_data; 00273 unsigned slice_size; 00274 unsigned padding; 00275 unsigned i; 00276 00277 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */ 00278 if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder, 00279 DXVA2_BitStreamDateBufferType, 00280 &dxva_data, &dxva_size))) 00281 return -1; 00282 current = dxva_data; 00283 end = dxva_data + dxva_size; 00284 00285 for (i = 0; i < ctx_pic->slice_count; i++) { 00286 static const uint8_t start_code[] = { 0, 0, 1 }; 00287 static const unsigned start_code_size = sizeof(start_code); 00288 unsigned position, size; 00289 00290 assert(offsetof(DXVA_Slice_H264_Short, BSNALunitDataLocation) == 00291 offsetof(DXVA_Slice_H264_Long, BSNALunitDataLocation)); 00292 assert(offsetof(DXVA_Slice_H264_Short, SliceBytesInBuffer) == 00293 offsetof(DXVA_Slice_H264_Long, SliceBytesInBuffer)); 00294 00295 if (is_slice_short(ctx)) 00296 slice = &ctx_pic->slice_short[i]; 00297 else 00298 slice = (DXVA_Slice_H264_Short*)&ctx_pic->slice_long[i]; 00299 00300 position = slice->BSNALunitDataLocation; 00301 size = slice->SliceBytesInBuffer; 00302 if (start_code_size + size > end - current) { 00303 av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream"); 00304 break; 00305 } 00306 00307 slice->BSNALunitDataLocation = current - dxva_data; 00308 slice->SliceBytesInBuffer = start_code_size + size; 00309 00310 if (!is_slice_short(ctx)) { 00311 DXVA_Slice_H264_Long *slice_long = (DXVA_Slice_H264_Long*)slice; 00312 if (i < ctx_pic->slice_count - 1) 00313 slice_long->NumMbsForSlice = 00314 slice_long[1].first_mb_in_slice - slice_long[0].first_mb_in_slice; 00315 else 00316 slice_long->NumMbsForSlice = mb_count - slice_long->first_mb_in_slice; 00317 } 00318 00319 memcpy(current, start_code, start_code_size); 00320 current += start_code_size; 00321 00322 memcpy(current, &ctx_pic->bitstream[position], size); 00323 current += size; 00324 } 00325 padding = FFMIN(128 - ((current - dxva_data) & 127), end - current); 00326 if (slice && padding > 0) { 00327 memset(current, 0, padding); 00328 current += padding; 00329 00330 slice->SliceBytesInBuffer += padding; 00331 } 00332 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx->decoder, 00333 DXVA2_BitStreamDateBufferType))) 00334 return -1; 00335 if (i < ctx_pic->slice_count) 00336 return -1; 00337 00338 memset(bs, 0, sizeof(*bs)); 00339 bs->CompressedBufferType = DXVA2_BitStreamDateBufferType; 00340 bs->DataSize = current - dxva_data; 00341 bs->NumMBsInBuffer = mb_count; 00342 00343 if (is_slice_short(ctx)) { 00344 slice_data = ctx_pic->slice_short; 00345 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_short); 00346 } else { 00347 slice_data = ctx_pic->slice_long; 00348 slice_size = ctx_pic->slice_count * sizeof(*ctx_pic->slice_long); 00349 } 00350 assert((bs->DataSize & 127) == 0); 00351 return ff_dxva2_commit_buffer(avctx, ctx, sc, 00352 DXVA2_SliceControlBufferType, 00353 slice_data, slice_size, mb_count); 00354 } 00355 00356 00357 static int start_frame(AVCodecContext *avctx, 00358 av_unused const uint8_t *buffer, 00359 av_unused uint32_t size) 00360 { 00361 const H264Context *h = avctx->priv_data; 00362 struct dxva_context *ctx = avctx->hwaccel_context; 00363 struct dxva2_picture_context *ctx_pic = h->s.current_picture_ptr->hwaccel_picture_private; 00364 00365 if (!ctx->decoder || !ctx->cfg || ctx->surface_count <= 0) 00366 return -1; 00367 assert(ctx_pic); 00368 00369 /* Fill up DXVA_PicParams_H264 */ 00370 fill_picture_parameters(ctx, h, &ctx_pic->pp); 00371 00372 /* Fill up DXVA_Qmatrix_H264 */ 00373 fill_scaling_lists(h, &ctx_pic->qm); 00374 00375 ctx_pic->slice_count = 0; 00376 ctx_pic->bitstream_size = 0; 00377 ctx_pic->bitstream = NULL; 00378 return 0; 00379 } 00380 00381 static int decode_slice(AVCodecContext *avctx, 00382 const uint8_t *buffer, uint32_t size) 00383 { 00384 const H264Context *h = avctx->priv_data; 00385 struct dxva_context *ctx = avctx->hwaccel_context; 00386 const Picture *current_picture = h->s.current_picture_ptr; 00387 struct dxva2_picture_context *ctx_pic = current_picture->hwaccel_picture_private; 00388 unsigned position; 00389 00390 if (ctx_pic->slice_count >= MAX_SLICES) 00391 return -1; 00392 00393 if (!ctx_pic->bitstream) 00394 ctx_pic->bitstream = buffer; 00395 ctx_pic->bitstream_size += size; 00396 00397 position = buffer - ctx_pic->bitstream; 00398 if (is_slice_short(ctx)) 00399 fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], 00400 position, size); 00401 else 00402 fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count], 00403 position, size); 00404 ctx_pic->slice_count++; 00405 00406 if (h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE) 00407 ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */ 00408 return 0; 00409 } 00410 00411 static int end_frame(AVCodecContext *avctx) 00412 { 00413 H264Context *h = avctx->priv_data; 00414 MpegEncContext *s = &h->s; 00415 struct dxva2_picture_context *ctx_pic = 00416 h->s.current_picture_ptr->hwaccel_picture_private; 00417 00418 if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) 00419 return -1; 00420 return ff_dxva2_common_end_frame(avctx, s, 00421 &ctx_pic->pp, sizeof(ctx_pic->pp), 00422 &ctx_pic->qm, sizeof(ctx_pic->qm), 00423 commit_bitstream_and_slice_buffer); 00424 } 00425 00426 AVHWAccel h264_dxva2_hwaccel = { 00427 .name = "h264_dxva2", 00428 .type = AVMEDIA_TYPE_VIDEO, 00429 .id = CODEC_ID_H264, 00430 .pix_fmt = PIX_FMT_DXVA2_VLD, 00431 .capabilities = 0, 00432 .start_frame = start_frame, 00433 .decode_slice = decode_slice, 00434 .end_frame = end_frame, 00435 .priv_data_size = sizeof(struct dxva2_picture_context), 00436 }; 00437