• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/dnxhdenc.c

Go to the documentation of this file.
00001 /*
00002  * VC3/DNxHD encoder
00003  * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
00004  *
00005  * VC-3 encoder funded by the British Broadcasting Corporation
00006  *
00007  * This file is part of FFmpeg.
00008  *
00009  * FFmpeg is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * FFmpeg is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with FFmpeg; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00022  */
00023 
00024 //#define DEBUG
00025 #define RC_VARIANCE 1 // use variance or ssd for fast rc
00026 
00027 #include "avcodec.h"
00028 #include "dsputil.h"
00029 #include "mpegvideo.h"
00030 #include "dnxhdenc.h"
00031 
00032 int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
00033 
00034 #define LAMBDA_FRAC_BITS 10
00035 
00036 static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00037 {
00038     int i;
00039     for (i = 0; i < 4; i++) {
00040         block[0] = pixels[0]; block[1] = pixels[1];
00041         block[2] = pixels[2]; block[3] = pixels[3];
00042         block[4] = pixels[4]; block[5] = pixels[5];
00043         block[6] = pixels[6]; block[7] = pixels[7];
00044         pixels += line_size;
00045         block += 8;
00046     }
00047     memcpy(block   , block- 8, sizeof(*block)*8);
00048     memcpy(block+ 8, block-16, sizeof(*block)*8);
00049     memcpy(block+16, block-24, sizeof(*block)*8);
00050     memcpy(block+24, block-32, sizeof(*block)*8);
00051 }
00052 
00053 static int dnxhd_init_vlc(DNXHDEncContext *ctx)
00054 {
00055     int i, j, level, run;
00056     int max_level = 1<<(ctx->cid_table->bit_depth+2);
00057 
00058     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail);
00059     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits , max_level*4*sizeof(*ctx->vlc_bits ), fail);
00060     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2                               , fail);
00061     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits , 63                                 , fail);
00062 
00063     ctx->vlc_codes += max_level*2;
00064     ctx->vlc_bits  += max_level*2;
00065     for (level = -max_level; level < max_level; level++) {
00066         for (run = 0; run < 2; run++) {
00067             int index = (level<<1)|run;
00068             int sign, offset = 0, alevel = level;
00069 
00070             MASK_ABS(sign, alevel);
00071             if (alevel > 64) {
00072                 offset = (alevel-1)>>6;
00073                 alevel -= offset<<6;
00074             }
00075             for (j = 0; j < 257; j++) {
00076                 if (ctx->cid_table->ac_level[j] == alevel &&
00077                     (!offset || (ctx->cid_table->ac_index_flag[j] && offset)) &&
00078                     (!run    || (ctx->cid_table->ac_run_flag  [j] && run))) {
00079                     assert(!ctx->vlc_codes[index]);
00080                     if (alevel) {
00081                         ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1);
00082                         ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1;
00083                     } else {
00084                         ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
00085                         ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j];
00086                     }
00087                     break;
00088                 }
00089             }
00090             assert(!alevel || j < 257);
00091             if (offset) {
00092                 ctx->vlc_codes[index] = (ctx->vlc_codes[index]<<ctx->cid_table->index_bits)|offset;
00093                 ctx->vlc_bits [index]+= ctx->cid_table->index_bits;
00094             }
00095         }
00096     }
00097     for (i = 0; i < 62; i++) {
00098         int run = ctx->cid_table->run[i];
00099         assert(run < 63);
00100         ctx->run_codes[run] = ctx->cid_table->run_codes[i];
00101         ctx->run_bits [run] = ctx->cid_table->run_bits[i];
00102     }
00103     return 0;
00104  fail:
00105     return -1;
00106 }
00107 
00108 static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
00109 {
00110     // init first elem to 1 to avoid div by 0 in convert_matrix
00111     uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
00112     int qscale, i;
00113 
00114     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int)     , fail);
00115     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int)     , fail);
00116     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
00117     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
00118 
00119     for (i = 1; i < 64; i++) {
00120         int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00121         weight_matrix[j] = ctx->cid_table->luma_weight[i];
00122     }
00123     ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
00124                       ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00125     for (i = 1; i < 64; i++) {
00126         int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00127         weight_matrix[j] = ctx->cid_table->chroma_weight[i];
00128     }
00129     ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
00130                       ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00131     for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
00132         for (i = 0; i < 64; i++) {
00133             ctx->qmatrix_l  [qscale]   [i] <<= 2; ctx->qmatrix_c  [qscale]   [i] <<= 2;
00134             ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
00135             ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
00136         }
00137     }
00138     return 0;
00139  fail:
00140     return -1;
00141 }
00142 
00143 static int dnxhd_init_rc(DNXHDEncContext *ctx)
00144 {
00145     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry), fail);
00146     if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
00147         FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry), fail);
00148 
00149     ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8;
00150     ctx->qscale = 1;
00151     ctx->lambda = 2<<LAMBDA_FRAC_BITS; // qscale 2
00152     return 0;
00153  fail:
00154     return -1;
00155 }
00156 
00157 static int dnxhd_encode_init(AVCodecContext *avctx)
00158 {
00159     DNXHDEncContext *ctx = avctx->priv_data;
00160     int i, index;
00161 
00162     ctx->cid = ff_dnxhd_find_cid(avctx);
00163     if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
00164         av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
00165         return -1;
00166     }
00167     av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
00168 
00169     index = ff_dnxhd_get_cid_table(ctx->cid);
00170     ctx->cid_table = &ff_dnxhd_cid_table[index];
00171 
00172     ctx->m.avctx = avctx;
00173     ctx->m.mb_intra = 1;
00174     ctx->m.h263_aic = 1;
00175 
00176     ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
00177 
00178     dsputil_init(&ctx->m.dsp, avctx);
00179     ff_dct_common_init(&ctx->m);
00180 #if HAVE_MMX
00181     ff_dnxhd_init_mmx(ctx);
00182 #endif
00183     if (!ctx->m.dct_quantize)
00184         ctx->m.dct_quantize = dct_quantize_c;
00185 
00186     ctx->m.mb_height = (avctx->height + 15) / 16;
00187     ctx->m.mb_width  = (avctx->width  + 15) / 16;
00188 
00189     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
00190         ctx->interlaced = 1;
00191         ctx->m.mb_height /= 2;
00192     }
00193 
00194     ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
00195 
00196     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
00197         ctx->m.intra_quant_bias = avctx->intra_quant_bias;
00198     if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0) // XXX tune lbias/cbias
00199         return -1;
00200 
00201     if (dnxhd_init_vlc(ctx) < 0)
00202         return -1;
00203     if (dnxhd_init_rc(ctx) < 0)
00204         return -1;
00205 
00206     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail);
00207     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail);
00208     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits,    ctx->m.mb_num   *sizeof(uint16_t), fail);
00209     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale,  ctx->m.mb_num   *sizeof(uint8_t) , fail);
00210 
00211     ctx->frame.key_frame = 1;
00212     ctx->frame.pict_type = FF_I_TYPE;
00213     ctx->m.avctx->coded_frame = &ctx->frame;
00214 
00215     if (avctx->thread_count > MAX_THREADS) {
00216         av_log(avctx, AV_LOG_ERROR, "too many threads\n");
00217         return -1;
00218     }
00219 
00220     ctx->thread[0] = ctx;
00221     for (i = 1; i < avctx->thread_count; i++) {
00222         ctx->thread[i] =  av_malloc(sizeof(DNXHDEncContext));
00223         memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
00224     }
00225 
00226     return 0;
00227  fail: //for FF_ALLOCZ_OR_GOTO
00228     return -1;
00229 }
00230 
00231 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
00232 {
00233     DNXHDEncContext *ctx = avctx->priv_data;
00234     const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
00235 
00236     memset(buf, 0, 640);
00237 
00238     memcpy(buf, header_prefix, 5);
00239     buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
00240     buf[6] = 0x80; // crc flag off
00241     buf[7] = 0xa0; // reserved
00242     AV_WB16(buf + 0x18, avctx->height); // ALPF
00243     AV_WB16(buf + 0x1a, avctx->width);  // SPL
00244     AV_WB16(buf + 0x1d, avctx->height); // NAL
00245 
00246     buf[0x21] = 0x38; // FIXME 8 bit per comp
00247     buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2);
00248     AV_WB32(buf + 0x28, ctx->cid); // CID
00249     buf[0x2c] = ctx->interlaced ? 0 : 0x80;
00250 
00251     buf[0x5f] = 0x01; // UDL
00252 
00253     buf[0x167] = 0x02; // reserved
00254     AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
00255     buf[0x16d] = ctx->m.mb_height; // Ns
00256     buf[0x16f] = 0x10; // reserved
00257 
00258     ctx->msip = buf + 0x170;
00259     return 0;
00260 }
00261 
00262 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
00263 {
00264     int nbits;
00265     if (diff < 0) {
00266         nbits = av_log2_16bit(-2*diff);
00267         diff--;
00268     } else {
00269         nbits = av_log2_16bit(2*diff);
00270     }
00271     put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
00272              (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
00273 }
00274 
00275 static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
00276 {
00277     int last_non_zero = 0;
00278     int slevel, i, j;
00279 
00280     dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
00281     ctx->m.last_dc[n] = block[0];
00282 
00283     for (i = 1; i <= last_index; i++) {
00284         j = ctx->m.intra_scantable.permutated[i];
00285         slevel = block[j];
00286         if (slevel) {
00287             int run_level = i - last_non_zero - 1;
00288             int rlevel = (slevel<<1)|!!run_level;
00289             put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
00290             if (run_level)
00291                 put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]);
00292             last_non_zero = i;
00293         }
00294     }
00295     put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
00296 }
00297 
00298 static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
00299 {
00300     const uint8_t *weight_matrix;
00301     int level;
00302     int i;
00303 
00304     weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight;
00305 
00306     for (i = 1; i <= last_index; i++) {
00307         int j = ctx->m.intra_scantable.permutated[i];
00308         level = block[j];
00309         if (level) {
00310             if (level < 0) {
00311                 level = (1-2*level) * qscale * weight_matrix[i];
00312                 if (weight_matrix[i] != 32)
00313                     level += 32;
00314                 level >>= 6;
00315                 level = -level;
00316             } else {
00317                 level = (2*level+1) * qscale * weight_matrix[i];
00318                 if (weight_matrix[i] != 32)
00319                     level += 32;
00320                 level >>= 6;
00321             }
00322             block[j] = level;
00323         }
00324     }
00325 }
00326 
00327 static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
00328 {
00329     int score = 0;
00330     int i;
00331     for (i = 0; i < 64; i++)
00332         score += (block[i]-qblock[i])*(block[i]-qblock[i]);
00333     return score;
00334 }
00335 
00336 static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
00337 {
00338     int last_non_zero = 0;
00339     int bits = 0;
00340     int i, j, level;
00341     for (i = 1; i <= last_index; i++) {
00342         j = ctx->m.intra_scantable.permutated[i];
00343         level = block[j];
00344         if (level) {
00345             int run_level = i - last_non_zero - 1;
00346             bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level];
00347             last_non_zero = i;
00348         }
00349     }
00350     return bits;
00351 }
00352 
00353 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
00354 {
00355     const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize)   + (mb_x << 4);
00356     const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00357     const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00358     DSPContext *dsp = &ctx->m.dsp;
00359 
00360     dsp->get_pixels(ctx->blocks[0], ptr_y    , ctx->m.linesize);
00361     dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
00362     dsp->get_pixels(ctx->blocks[2], ptr_u    , ctx->m.uvlinesize);
00363     dsp->get_pixels(ctx->blocks[3], ptr_v    , ctx->m.uvlinesize);
00364 
00365     if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
00366         if (ctx->interlaced) {
00367             ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset    , ctx->m.linesize);
00368             ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00369             ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset   , ctx->m.uvlinesize);
00370             ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset   , ctx->m.uvlinesize);
00371         } else {
00372             dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]);
00373             dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]);
00374         }
00375     } else {
00376         dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset    , ctx->m.linesize);
00377         dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00378         dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset   , ctx->m.uvlinesize);
00379         dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset   , ctx->m.uvlinesize);
00380     }
00381 }
00382 
00383 static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
00384 {
00385     if (i&2) {
00386         ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
00387         ctx->m.q_intra_matrix   = ctx->qmatrix_c;
00388         return 1 + (i&1);
00389     } else {
00390         ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
00391         ctx->m.q_intra_matrix   = ctx->qmatrix_l;
00392         return 0;
00393     }
00394 }
00395 
00396 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00397 {
00398     DNXHDEncContext *ctx = avctx->priv_data;
00399     int mb_y = jobnr, mb_x;
00400     int qscale = ctx->qscale;
00401     LOCAL_ALIGNED_16(DCTELEM, block, [64]);
00402     ctx = ctx->thread[threadnr];
00403 
00404     ctx->m.last_dc[0] =
00405     ctx->m.last_dc[1] =
00406     ctx->m.last_dc[2] = 1024;
00407 
00408     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00409         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00410         int ssd     = 0;
00411         int ac_bits = 0;
00412         int dc_bits = 0;
00413         int i;
00414 
00415         dnxhd_get_blocks(ctx, mb_x, mb_y);
00416 
00417         for (i = 0; i < 8; i++) {
00418             DCTELEM *src_block = ctx->blocks[i];
00419             int overflow, nbits, diff, last_index;
00420             int n = dnxhd_switch_matrix(ctx, i);
00421 
00422             memcpy(block, src_block, 64*sizeof(*block));
00423             last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00424             ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
00425 
00426             diff = block[0] - ctx->m.last_dc[n];
00427             if (diff < 0) nbits = av_log2_16bit(-2*diff);
00428             else          nbits = av_log2_16bit( 2*diff);
00429             dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
00430 
00431             ctx->m.last_dc[n] = block[0];
00432 
00433             if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
00434                 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
00435                 ctx->m.dsp.idct(block);
00436                 ssd += dnxhd_ssd_block(block, src_block);
00437             }
00438         }
00439         ctx->mb_rc[qscale][mb].ssd = ssd;
00440         ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0];
00441     }
00442     return 0;
00443 }
00444 
00445 static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00446 {
00447     DNXHDEncContext *ctx = avctx->priv_data;
00448     int mb_y = jobnr, mb_x;
00449     ctx = ctx->thread[threadnr];
00450     init_put_bits(&ctx->m.pb, (uint8_t *)arg + 640 + ctx->slice_offs[jobnr], ctx->slice_size[jobnr]);
00451 
00452     ctx->m.last_dc[0] =
00453     ctx->m.last_dc[1] =
00454     ctx->m.last_dc[2] = 1024;
00455     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00456         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00457         int qscale = ctx->mb_qscale[mb];
00458         int i;
00459 
00460         put_bits(&ctx->m.pb, 12, qscale<<1);
00461 
00462         dnxhd_get_blocks(ctx, mb_x, mb_y);
00463 
00464         for (i = 0; i < 8; i++) {
00465             DCTELEM *block = ctx->blocks[i];
00466             int last_index, overflow;
00467             int n = dnxhd_switch_matrix(ctx, i);
00468             last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00469             //START_TIMER;
00470             dnxhd_encode_block(ctx, block, last_index, n);
00471             //STOP_TIMER("encode_block");
00472         }
00473     }
00474     if (put_bits_count(&ctx->m.pb)&31)
00475         put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
00476     flush_put_bits(&ctx->m.pb);
00477     return 0;
00478 }
00479 
00480 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
00481 {
00482     int mb_y, mb_x;
00483     int offset = 0;
00484     for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
00485         int thread_size;
00486         ctx->slice_offs[mb_y] = offset;
00487             ctx->slice_size[mb_y] = 0;
00488             for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00489                 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00490                 ctx->slice_size[mb_y] += ctx->mb_bits[mb];
00491             }
00492             ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
00493             ctx->slice_size[mb_y] >>= 3;
00494             thread_size = ctx->slice_size[mb_y];
00495         offset += thread_size;
00496     }
00497 }
00498 
00499 static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
00500 {
00501     DNXHDEncContext *ctx = avctx->priv_data;
00502     int mb_y = jobnr, mb_x;
00503     ctx = ctx->thread[threadnr];
00504     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00505         unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
00506         uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
00507         int sum      = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
00508         int varc     = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
00509         ctx->mb_cmp[mb].value = varc;
00510         ctx->mb_cmp[mb].mb = mb;
00511     }
00512     return 0;
00513 }
00514 
00515 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
00516 {
00517     int lambda, up_step, down_step;
00518     int last_lower = INT_MAX, last_higher = 0;
00519     int x, y, q;
00520 
00521     for (q = 1; q < avctx->qmax; q++) {
00522         ctx->qscale = q;
00523         avctx->execute2(avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
00524     }
00525     up_step = down_step = 2<<LAMBDA_FRAC_BITS;
00526     lambda = ctx->lambda;
00527 
00528     for (;;) {
00529         int bits = 0;
00530         int end = 0;
00531         if (lambda == last_higher) {
00532             lambda++;
00533             end = 1; // need to set final qscales/bits
00534         }
00535         for (y = 0; y < ctx->m.mb_height; y++) {
00536             for (x = 0; x < ctx->m.mb_width; x++) {
00537                 unsigned min = UINT_MAX;
00538                 int qscale = 1;
00539                 int mb = y*ctx->m.mb_width+x;
00540                 for (q = 1; q < avctx->qmax; q++) {
00541                     unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
00542                     if (score < min) {
00543                         min = score;
00544                         qscale = q;
00545                     }
00546                 }
00547                 bits += ctx->mb_rc[qscale][mb].bits;
00548                 ctx->mb_qscale[mb] = qscale;
00549                 ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
00550             }
00551             bits = (bits+31)&~31; // padding
00552             if (bits > ctx->frame_bits)
00553                 break;
00554         }
00555         //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
00556         //        lambda, last_higher, last_lower, bits, ctx->frame_bits);
00557         if (end) {
00558             if (bits > ctx->frame_bits)
00559                 return -1;
00560             break;
00561         }
00562         if (bits < ctx->frame_bits) {
00563             last_lower = FFMIN(lambda, last_lower);
00564             if (last_higher != 0)
00565                 lambda = (lambda+last_higher)>>1;
00566             else
00567                 lambda -= down_step;
00568             down_step *= 5; // XXX tune ?
00569             up_step = 1<<LAMBDA_FRAC_BITS;
00570             lambda = FFMAX(1, lambda);
00571             if (lambda == last_lower)
00572                 break;
00573         } else {
00574             last_higher = FFMAX(lambda, last_higher);
00575             if (last_lower != INT_MAX)
00576                 lambda = (lambda+last_lower)>>1;
00577             else if ((int64_t)lambda + up_step > INT_MAX)
00578                 return -1;
00579             else
00580                 lambda += up_step;
00581             up_step = FFMIN((int64_t)up_step*5, INT_MAX);
00582             down_step = 1<<LAMBDA_FRAC_BITS;
00583         }
00584     }
00585     //dprintf(ctx->m.avctx, "out lambda %d\n", lambda);
00586     ctx->lambda = lambda;
00587     return 0;
00588 }
00589 
00590 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
00591 {
00592     int bits = 0;
00593     int up_step = 1;
00594     int down_step = 1;
00595     int last_higher = 0;
00596     int last_lower = INT_MAX;
00597     int qscale;
00598     int x, y;
00599 
00600     qscale = ctx->qscale;
00601     for (;;) {
00602         bits = 0;
00603         ctx->qscale = qscale;
00604         // XXX avoid recalculating bits
00605         ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height);
00606         for (y = 0; y < ctx->m.mb_height; y++) {
00607             for (x = 0; x < ctx->m.mb_width; x++)
00608                 bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
00609             bits = (bits+31)&~31; // padding
00610             if (bits > ctx->frame_bits)
00611                 break;
00612         }
00613         //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
00614         //        ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
00615         if (bits < ctx->frame_bits) {
00616             if (qscale == 1)
00617                 return 1;
00618             if (last_higher == qscale - 1) {
00619                 qscale = last_higher;
00620                 break;
00621             }
00622             last_lower = FFMIN(qscale, last_lower);
00623             if (last_higher != 0)
00624                 qscale = (qscale+last_higher)>>1;
00625             else
00626                 qscale -= down_step++;
00627             if (qscale < 1)
00628                 qscale = 1;
00629             up_step = 1;
00630         } else {
00631             if (last_lower == qscale + 1)
00632                 break;
00633             last_higher = FFMAX(qscale, last_higher);
00634             if (last_lower != INT_MAX)
00635                 qscale = (qscale+last_lower)>>1;
00636             else
00637                 qscale += up_step++;
00638             down_step = 1;
00639             if (qscale >= ctx->m.avctx->qmax)
00640                 return -1;
00641         }
00642     }
00643     //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
00644     ctx->qscale = qscale;
00645     return 0;
00646 }
00647 
00648 #define BUCKET_BITS 8
00649 #define RADIX_PASSES 4
00650 #define NBUCKETS (1 << BUCKET_BITS)
00651 
00652 static inline int get_bucket(int value, int shift)
00653 {
00654     value >>= shift;
00655     value &= NBUCKETS - 1;
00656     return NBUCKETS - 1 - value;
00657 }
00658 
00659 static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS])
00660 {
00661     int i, j;
00662     memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS);
00663     for (i = 0; i < size; i++) {
00664         int v = data[i].value;
00665         for (j = 0; j < RADIX_PASSES; j++) {
00666             buckets[j][get_bucket(v, 0)]++;
00667             v >>= BUCKET_BITS;
00668         }
00669         assert(!v);
00670     }
00671     for (j = 0; j < RADIX_PASSES; j++) {
00672         int offset = size;
00673         for (i = NBUCKETS - 1; i >= 0; i--)
00674             buckets[j][i] = offset -= buckets[j][i];
00675         assert(!buckets[j][0]);
00676     }
00677 }
00678 
00679 static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass)
00680 {
00681     int shift = pass * BUCKET_BITS;
00682     int i;
00683     for (i = 0; i < size; i++) {
00684         int v = get_bucket(data[i].value, shift);
00685         int pos = buckets[v]++;
00686         dst[pos] = data[i];
00687     }
00688 }
00689 
00690 static void radix_sort(RCCMPEntry *data, int size)
00691 {
00692     int buckets[RADIX_PASSES][NBUCKETS];
00693     RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size);
00694     radix_count(data, size, buckets);
00695     radix_sort_pass(tmp, data, size, buckets[0], 0);
00696     radix_sort_pass(data, tmp, size, buckets[1], 1);
00697     if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) {
00698         radix_sort_pass(tmp, data, size, buckets[2], 2);
00699         radix_sort_pass(data, tmp, size, buckets[3], 3);
00700     }
00701     av_free(tmp);
00702 }
00703 
00704 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
00705 {
00706     int max_bits = 0;
00707     int ret, x, y;
00708     if ((ret = dnxhd_find_qscale(ctx)) < 0)
00709         return -1;
00710     for (y = 0; y < ctx->m.mb_height; y++) {
00711         for (x = 0; x < ctx->m.mb_width; x++) {
00712             int mb = y*ctx->m.mb_width+x;
00713             int delta_bits;
00714             ctx->mb_qscale[mb] = ctx->qscale;
00715             ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
00716             max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
00717             if (!RC_VARIANCE) {
00718                 delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
00719                 ctx->mb_cmp[mb].mb = mb;
00720                 ctx->mb_cmp[mb].value = delta_bits ?
00721                     ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
00722                     : INT_MIN; //avoid increasing qscale
00723             }
00724         }
00725         max_bits += 31; //worst padding
00726     }
00727     if (!ret) {
00728         if (RC_VARIANCE)
00729             avctx->execute2(avctx, dnxhd_mb_var_thread, NULL, NULL, ctx->m.mb_height);
00730         radix_sort(ctx->mb_cmp, ctx->m.mb_num);
00731         for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
00732             int mb = ctx->mb_cmp[x].mb;
00733             max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
00734             ctx->mb_qscale[mb] = ctx->qscale+1;
00735             ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
00736         }
00737     }
00738     return 0;
00739 }
00740 
00741 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
00742 {
00743     int i;
00744 
00745     for (i = 0; i < 3; i++) {
00746         ctx->frame.data[i]     = frame->data[i];
00747         ctx->frame.linesize[i] = frame->linesize[i];
00748     }
00749 
00750     for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00751         ctx->thread[i]->m.linesize    = ctx->frame.linesize[0]<<ctx->interlaced;
00752         ctx->thread[i]->m.uvlinesize  = ctx->frame.linesize[1]<<ctx->interlaced;
00753         ctx->thread[i]->dct_y_offset  = ctx->m.linesize  *8;
00754         ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
00755     }
00756 
00757     ctx->frame.interlaced_frame = frame->interlaced_frame;
00758     ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
00759 }
00760 
00761 static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
00762 {
00763     DNXHDEncContext *ctx = avctx->priv_data;
00764     int first_field = 1;
00765     int offset, i, ret;
00766 
00767     if (buf_size < ctx->cid_table->frame_size) {
00768         av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
00769         return -1;
00770     }
00771 
00772     dnxhd_load_picture(ctx, data);
00773 
00774  encode_coding_unit:
00775     for (i = 0; i < 3; i++) {
00776         ctx->src[i] = ctx->frame.data[i];
00777         if (ctx->interlaced && ctx->cur_field)
00778             ctx->src[i] += ctx->frame.linesize[i];
00779     }
00780 
00781     dnxhd_write_header(avctx, buf);
00782 
00783     if (avctx->mb_decision == FF_MB_DECISION_RD)
00784         ret = dnxhd_encode_rdo(avctx, ctx);
00785     else
00786         ret = dnxhd_encode_fast(avctx, ctx);
00787     if (ret < 0) {
00788         av_log(avctx, AV_LOG_ERROR,
00789                "picture could not fit ratecontrol constraints, increase qmax\n");
00790         return -1;
00791     }
00792 
00793     dnxhd_setup_threads_slices(ctx);
00794 
00795     offset = 0;
00796     for (i = 0; i < ctx->m.mb_height; i++) {
00797         AV_WB32(ctx->msip + i * 4, offset);
00798         offset += ctx->slice_size[i];
00799         assert(!(ctx->slice_size[i] & 3));
00800     }
00801 
00802     avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
00803 
00804     assert(640 + offset + 4 <= ctx->cid_table->coding_unit_size);
00805     memset(buf + 640 + offset, 0, ctx->cid_table->coding_unit_size - 4 - offset - 640);
00806 
00807     AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE); // EOF
00808 
00809     if (ctx->interlaced && first_field) {
00810         first_field     = 0;
00811         ctx->cur_field ^= 1;
00812         buf      += ctx->cid_table->coding_unit_size;
00813         buf_size -= ctx->cid_table->coding_unit_size;
00814         goto encode_coding_unit;
00815     }
00816 
00817     ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA;
00818 
00819     return ctx->cid_table->frame_size;
00820 }
00821 
00822 static int dnxhd_encode_end(AVCodecContext *avctx)
00823 {
00824     DNXHDEncContext *ctx = avctx->priv_data;
00825     int max_level = 1<<(ctx->cid_table->bit_depth+2);
00826     int i;
00827 
00828     av_free(ctx->vlc_codes-max_level*2);
00829     av_free(ctx->vlc_bits -max_level*2);
00830     av_freep(&ctx->run_codes);
00831     av_freep(&ctx->run_bits);
00832 
00833     av_freep(&ctx->mb_bits);
00834     av_freep(&ctx->mb_qscale);
00835     av_freep(&ctx->mb_rc);
00836     av_freep(&ctx->mb_cmp);
00837     av_freep(&ctx->slice_size);
00838     av_freep(&ctx->slice_offs);
00839 
00840     av_freep(&ctx->qmatrix_c);
00841     av_freep(&ctx->qmatrix_l);
00842     av_freep(&ctx->qmatrix_c16);
00843     av_freep(&ctx->qmatrix_l16);
00844 
00845     for (i = 1; i < avctx->thread_count; i++)
00846         av_freep(&ctx->thread[i]);
00847 
00848     return 0;
00849 }
00850 
00851 AVCodec dnxhd_encoder = {
00852     "dnxhd",
00853     AVMEDIA_TYPE_VIDEO,
00854     CODEC_ID_DNXHD,
00855     sizeof(DNXHDEncContext),
00856     dnxhd_encode_init,
00857     dnxhd_encode_picture,
00858     dnxhd_encode_end,
00859     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
00860     .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
00861 };

Generated on Fri Sep 16 2011 17:17:35 for FFmpeg by  doxygen 1.7.1