libavcodec/vp8.c
Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "internal.h"
00028 #include "vp8.h"
00029 #include "vp8data.h"
00030 #include "rectangle.h"
00031 #include "thread.h"
00032 
00033 #if ARCH_ARM
00034 #   include "arm/vp8.h"
00035 #endif
00036 
00037 static void free_buffers(VP8Context *s)
00038 {
00039     av_freep(&s->macroblocks_base);
00040     av_freep(&s->filter_strength);
00041     av_freep(&s->intra4x4_pred_mode_top);
00042     av_freep(&s->top_nnz);
00043     av_freep(&s->edge_emu_buffer);
00044     av_freep(&s->top_border);
00045 
00046     s->macroblocks = NULL;
00047 }
00048 
00049 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
00050 {
00051     int ret;
00052     if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
00053         return ret;
00054     if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
00055         f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
00056     } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
00057         ff_thread_release_buffer(s->avctx, f);
00058         return AVERROR(ENOMEM);
00059     }
00060     return 0;
00061 }
00062 
00063 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
00064 {
00065     if (f->ref_index[0]) {
00066         if (prefer_delayed_free) {
00067             /* Upon a size change, we want to free the maps but other threads may still
00068              * be using them, so queue them. Upon a seek, all threads are inactive so
00069              * we want to cache one to prevent re-allocation in the next decoding
00070              * iteration, but the rest we can free directly. */
00071             int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
00072             if (s->num_maps_to_be_freed < max_queued_maps) {
00073                 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
00074             } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
00075                 av_free(f->ref_index[0]);
00076             } /* else: MEMLEAK (should never happen, but better that than crash) */
00077             f->ref_index[0] = NULL;
00078         } else /* vp8_decode_free() */ {
00079             av_free(f->ref_index[0]);
00080         }
00081     }
00082     ff_thread_release_buffer(s->avctx, f);
00083 }
00084 
00085 static void vp8_decode_flush_impl(AVCodecContext *avctx,
00086                                   int prefer_delayed_free, int can_direct_free, int free_mem)
00087 {
00088     VP8Context *s = avctx->priv_data;
00089     int i;
00090 
00091     if (!avctx->internal->is_copy) {
00092         for (i = 0; i < 5; i++)
00093             if (s->frames[i].data[0])
00094                 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
00095     }
00096     memset(s->framep, 0, sizeof(s->framep));
00097 
00098     if (free_mem) {
00099         free_buffers(s);
00100         s->maps_are_invalid = 1;
00101     }
00102 }
00103 
00104 static void vp8_decode_flush(AVCodecContext *avctx)
00105 {
00106     vp8_decode_flush_impl(avctx, 1, 1, 0);
00107 }
00108 
00109 static int update_dimensions(VP8Context *s, int width, int height)
00110 {
00111     if (width  != s->avctx->width ||
00112         height != s->avctx->height) {
00113         if (av_image_check_size(width, height, 0, s->avctx))
00114             return AVERROR_INVALIDDATA;
00115 
00116         vp8_decode_flush_impl(s->avctx, 1, 0, 1);
00117 
00118         avcodec_set_dimensions(s->avctx, width, height);
00119     }
00120 
00121     s->mb_width  = (s->avctx->coded_width +15) / 16;
00122     s->mb_height = (s->avctx->coded_height+15) / 16;
00123 
00124     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00125     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00126     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00127     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00128     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00129 
00130     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00131         !s->top_nnz || !s->top_border)
00132         return AVERROR(ENOMEM);
00133 
00134     s->macroblocks        = s->macroblocks_base + 1;
00135 
00136     return 0;
00137 }
00138 
00139 static void parse_segment_info(VP8Context *s)
00140 {
00141     VP56RangeCoder *c = &s->c;
00142     int i;
00143 
00144     s->segmentation.update_map = vp8_rac_get(c);
00145 
00146     if (vp8_rac_get(c)) { // update segment feature data
00147         s->segmentation.absolute_vals = vp8_rac_get(c);
00148 
00149         for (i = 0; i < 4; i++)
00150             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00151 
00152         for (i = 0; i < 4; i++)
00153             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00154     }
00155     if (s->segmentation.update_map)
00156         for (i = 0; i < 3; i++)
00157             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00158 }
00159 
00160 static void update_lf_deltas(VP8Context *s)
00161 {
00162     VP56RangeCoder *c = &s->c;
00163     int i;
00164 
00165     for (i = 0; i < 4; i++)
00166         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00167 
00168     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00169         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00170 }
00171 
00172 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00173 {
00174     const uint8_t *sizes = buf;
00175     int i;
00176 
00177     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00178 
00179     buf      += 3*(s->num_coeff_partitions-1);
00180     buf_size -= 3*(s->num_coeff_partitions-1);
00181     if (buf_size < 0)
00182         return -1;
00183 
00184     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00185         int size = AV_RL24(sizes + 3*i);
00186         if (buf_size - size < 0)
00187             return -1;
00188 
00189         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00190         buf      += size;
00191         buf_size -= size;
00192     }
00193     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00194 
00195     return 0;
00196 }
00197 
00198 static void get_quants(VP8Context *s)
00199 {
00200     VP56RangeCoder *c = &s->c;
00201     int i, base_qi;
00202 
00203     int yac_qi     = vp8_rac_get_uint(c, 7);
00204     int ydc_delta  = vp8_rac_get_sint(c, 4);
00205     int y2dc_delta = vp8_rac_get_sint(c, 4);
00206     int y2ac_delta = vp8_rac_get_sint(c, 4);
00207     int uvdc_delta = vp8_rac_get_sint(c, 4);
00208     int uvac_delta = vp8_rac_get_sint(c, 4);
00209 
00210     for (i = 0; i < 4; i++) {
00211         if (s->segmentation.enabled) {
00212             base_qi = s->segmentation.base_quant[i];
00213             if (!s->segmentation.absolute_vals)
00214                 base_qi += yac_qi;
00215         } else
00216             base_qi = yac_qi;
00217 
00218         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00219         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00220         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00221         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00222         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00223         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00224 
00225         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00226         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00227     }
00228 }
00229 
00243 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00244 {
00245     VP56RangeCoder *c = &s->c;
00246 
00247     if (update)
00248         return VP56_FRAME_CURRENT;
00249 
00250     switch (vp8_rac_get_uint(c, 2)) {
00251     case 1:
00252         return VP56_FRAME_PREVIOUS;
00253     case 2:
00254         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00255     }
00256     return VP56_FRAME_NONE;
00257 }
00258 
00259 static void update_refs(VP8Context *s)
00260 {
00261     VP56RangeCoder *c = &s->c;
00262 
00263     int update_golden = vp8_rac_get(c);
00264     int update_altref = vp8_rac_get(c);
00265 
00266     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00267     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00268 }
00269 
00270 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00271 {
00272     VP56RangeCoder *c = &s->c;
00273     int header_size, hscale, vscale, i, j, k, l, m, ret;
00274     int width  = s->avctx->width;
00275     int height = s->avctx->height;
00276 
00277     s->keyframe  = !(buf[0] & 1);
00278     s->profile   =  (buf[0]>>1) & 7;
00279     s->invisible = !(buf[0] & 0x10);
00280     header_size  = AV_RL24(buf) >> 5;
00281     buf      += 3;
00282     buf_size -= 3;
00283 
00284     if (s->profile > 3)
00285         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00286 
00287     if (!s->profile)
00288         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00289     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00290         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00291 
00292     if (header_size > buf_size - 7*s->keyframe) {
00293         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00294         return AVERROR_INVALIDDATA;
00295     }
00296 
00297     if (s->keyframe) {
00298         if (AV_RL24(buf) != 0x2a019d) {
00299             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00300             return AVERROR_INVALIDDATA;
00301         }
00302         width  = AV_RL16(buf+3) & 0x3fff;
00303         height = AV_RL16(buf+5) & 0x3fff;
00304         hscale = buf[4] >> 6;
00305         vscale = buf[6] >> 6;
00306         buf      += 7;
00307         buf_size -= 7;
00308 
00309         if (hscale || vscale)
00310             av_log_missing_feature(s->avctx, "Upscaling", 1);
00311 
00312         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00313         for (i = 0; i < 4; i++)
00314             for (j = 0; j < 16; j++)
00315                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00316                        sizeof(s->prob->token[i][j]));
00317         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00318         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00319         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00320         memset(&s->segmentation, 0, sizeof(s->segmentation));
00321     }
00322 
00323     if (!s->macroblocks_base || /* first frame */
00324         width != s->avctx->width || height != s->avctx->height) {
00325         if ((ret = update_dimensions(s, width, height)) < 0)
00326             return ret;
00327     }
00328 
00329     ff_vp56_init_range_decoder(c, buf, header_size);
00330     buf      += header_size;
00331     buf_size -= header_size;
00332 
00333     if (s->keyframe) {
00334         if (vp8_rac_get(c))
00335             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00336         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00337     }
00338 
00339     if ((s->segmentation.enabled = vp8_rac_get(c)))
00340         parse_segment_info(s);
00341     else
00342         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00343 
00344     s->filter.simple    = vp8_rac_get(c);
00345     s->filter.level     = vp8_rac_get_uint(c, 6);
00346     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00347 
00348     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00349         if (vp8_rac_get(c))
00350             update_lf_deltas(s);
00351 
00352     if (setup_partitions(s, buf, buf_size)) {
00353         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00354         return AVERROR_INVALIDDATA;
00355     }
00356 
00357     get_quants(s);
00358 
00359     if (!s->keyframe) {
00360         update_refs(s);
00361         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00362         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00363     }
00364 
00365     // if we aren't saving this frame's probabilities for future frames,
00366     // make a copy of the current probabilities
00367     if (!(s->update_probabilities = vp8_rac_get(c)))
00368         s->prob[1] = s->prob[0];
00369 
00370     s->update_last = s->keyframe || vp8_rac_get(c);
00371 
00372     for (i = 0; i < 4; i++)
00373         for (j = 0; j < 8; j++)
00374             for (k = 0; k < 3; k++)
00375                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00376                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00377                         int prob = vp8_rac_get_uint(c, 8);
00378                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00379                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00380                     }
00381 
00382     if ((s->mbskip_enabled = vp8_rac_get(c)))
00383         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00384 
00385     if (!s->keyframe) {
00386         s->prob->intra  = vp8_rac_get_uint(c, 8);
00387         s->prob->last   = vp8_rac_get_uint(c, 8);
00388         s->prob->golden = vp8_rac_get_uint(c, 8);
00389 
00390         if (vp8_rac_get(c))
00391             for (i = 0; i < 4; i++)
00392                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00393         if (vp8_rac_get(c))
00394             for (i = 0; i < 3; i++)
00395                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00396 
00397         // 17.2 MV probability update
00398         for (i = 0; i < 2; i++)
00399             for (j = 0; j < 19; j++)
00400                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00401                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00402     }
00403 
00404     return 0;
00405 }
00406 
00407 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00408 {
00409     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00410     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00411 }
00412 
00416 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00417 {
00418     int bit, x = 0;
00419 
00420     if (vp56_rac_get_prob_branchy(c, p[0])) {
00421         int i;
00422 
00423         for (i = 0; i < 3; i++)
00424             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00425         for (i = 9; i > 3; i--)
00426             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00427         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00428             x += 8;
00429     } else {
00430         // small_mvtree
00431         const uint8_t *ps = p+2;
00432         bit = vp56_rac_get_prob(c, *ps);
00433         ps += 1 + 3*bit;
00434         x  += 4*bit;
00435         bit = vp56_rac_get_prob(c, *ps);
00436         ps += 1 + bit;
00437         x  += 2*bit;
00438         x  += vp56_rac_get_prob(c, *ps);
00439     }
00440 
00441     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00442 }
00443 
00444 static av_always_inline
00445 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00446 {
00447     if (left == top)
00448         return vp8_submv_prob[4-!!left];
00449     if (!top)
00450         return vp8_submv_prob[2];
00451     return vp8_submv_prob[1-!!left];
00452 }
00453 
00458 static av_always_inline
00459 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00460 {
00461     int part_idx;
00462     int n, num;
00463     VP8Macroblock *top_mb  = &mb[2];
00464     VP8Macroblock *left_mb = &mb[-1];
00465     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00466                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00467                   *mbsplits_cur, *firstidx;
00468     VP56mv *top_mv  = top_mb->bmv;
00469     VP56mv *left_mv = left_mb->bmv;
00470     VP56mv *cur_mv  = mb->bmv;
00471 
00472     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00473         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00474             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00475         } else {
00476             part_idx = VP8_SPLITMVMODE_8x8;
00477         }
00478     } else {
00479         part_idx = VP8_SPLITMVMODE_4x4;
00480     }
00481 
00482     num = vp8_mbsplit_count[part_idx];
00483     mbsplits_cur = vp8_mbsplits[part_idx],
00484     firstidx = vp8_mbfirstidx[part_idx];
00485     mb->partitioning = part_idx;
00486 
00487     for (n = 0; n < num; n++) {
00488         int k = firstidx[n];
00489         uint32_t left, above;
00490         const uint8_t *submv_prob;
00491 
00492         if (!(k & 3))
00493             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00494         else
00495             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00496         if (k <= 3)
00497             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00498         else
00499             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00500 
00501         submv_prob = get_submv_prob(left, above);
00502 
00503         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00504             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00505                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00506                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00507                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00508                 } else {
00509                     AV_ZERO32(&mb->bmv[n]);
00510                 }
00511             } else {
00512                 AV_WN32A(&mb->bmv[n], above);
00513             }
00514         } else {
00515             AV_WN32A(&mb->bmv[n], left);
00516         }
00517     }
00518 
00519     return num;
00520 }
00521 
00522 static av_always_inline
00523 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00524 {
00525     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00526                                   mb - 1 /* left */,
00527                                   mb + 1 /* top-left */ };
00528     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00529     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00530     int idx = CNT_ZERO;
00531     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00532     int8_t *sign_bias = s->sign_bias;
00533     VP56mv near_mv[4];
00534     uint8_t cnt[4] = { 0 };
00535     VP56RangeCoder *c = &s->c;
00536 
00537     AV_ZERO32(&near_mv[0]);
00538     AV_ZERO32(&near_mv[1]);
00539     AV_ZERO32(&near_mv[2]);
00540 
00541     /* Process MB on top, left and top-left */
00542     #define MV_EDGE_CHECK(n)\
00543     {\
00544         VP8Macroblock *edge = mb_edge[n];\
00545         int edge_ref = edge->ref_frame;\
00546         if (edge_ref != VP56_FRAME_CURRENT) {\
00547             uint32_t mv = AV_RN32A(&edge->mv);\
00548             if (mv) {\
00549                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00550                     /* SWAR negate of the values in mv. */\
00551                     mv = ~mv;\
00552                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00553                 }\
00554                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00555                     AV_WN32A(&near_mv[++idx], mv);\
00556                 cnt[idx]      += 1 + (n != 2);\
00557             } else\
00558                 cnt[CNT_ZERO] += 1 + (n != 2);\
00559         }\
00560     }
00561 
00562     MV_EDGE_CHECK(0)
00563     MV_EDGE_CHECK(1)
00564     MV_EDGE_CHECK(2)
00565 
00566     mb->partitioning = VP8_SPLITMVMODE_NONE;
00567     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00568         mb->mode = VP8_MVMODE_MV;
00569 
00570         /* If we have three distinct MVs, merge first and last if they're the same */
00571         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00572             cnt[CNT_NEAREST] += 1;
00573 
00574         /* Swap near and nearest if necessary */
00575         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00576             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00577             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00578         }
00579 
00580         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00581             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00582 
00583                 /* Choose the best mv out of 0,0 and the nearest mv */
00584                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00585                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00586                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00587                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00588 
00589                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00590                     mb->mode = VP8_MVMODE_SPLIT;
00591                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00592                 } else {
00593                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00594                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00595                     mb->bmv[0] = mb->mv;
00596                 }
00597             } else {
00598                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00599                 mb->bmv[0] = mb->mv;
00600             }
00601         } else {
00602             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00603             mb->bmv[0] = mb->mv;
00604         }
00605     } else {
00606         mb->mode = VP8_MVMODE_ZERO;
00607         AV_ZERO32(&mb->mv);
00608         mb->bmv[0] = mb->mv;
00609     }
00610 }
00611 
00612 static av_always_inline
00613 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00614                            int mb_x, int keyframe)
00615 {
00616     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00617     if (keyframe) {
00618         int x, y;
00619         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00620         uint8_t* const left = s->intra4x4_pred_mode_left;
00621         for (y = 0; y < 4; y++) {
00622             for (x = 0; x < 4; x++) {
00623                 const uint8_t *ctx;
00624                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00625                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00626                 left[y] = top[x] = *intra4x4;
00627                 intra4x4++;
00628             }
00629         }
00630     } else {
00631         int i;
00632         for (i = 0; i < 16; i++)
00633             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00634     }
00635 }
00636 
00637 static av_always_inline
00638 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00639 {
00640     VP56RangeCoder *c = &s->c;
00641 
00642     if (s->segmentation.update_map)
00643         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00644     else
00645         *segment = ref ? *ref : *segment;
00646     s->segment = *segment;
00647 
00648     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00649 
00650     if (s->keyframe) {
00651         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00652 
00653         if (mb->mode == MODE_I4x4) {
00654             decode_intra4x4_modes(s, c, mb_x, 1);
00655         } else {
00656             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00657             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00658             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00659         }
00660 
00661         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00662         mb->ref_frame = VP56_FRAME_CURRENT;
00663     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00664         // inter MB, 16.2
00665         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00666             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00667                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00668         else
00669             mb->ref_frame = VP56_FRAME_PREVIOUS;
00670         s->ref_count[mb->ref_frame-1]++;
00671 
00672         // motion vectors, 16.3
00673         decode_mvs(s, mb, mb_x, mb_y);
00674     } else {
00675         // intra MB, 16.1
00676         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00677 
00678         if (mb->mode == MODE_I4x4)
00679             decode_intra4x4_modes(s, c, mb_x, 0);
00680 
00681         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00682         mb->ref_frame = VP56_FRAME_CURRENT;
00683         mb->partitioning = VP8_SPLITMVMODE_NONE;
00684         AV_ZERO32(&mb->bmv[0]);
00685     }
00686 }
00687 
00688 #ifndef decode_block_coeffs_internal
00689 
00698 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00699                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00700                                         int i, uint8_t *token_prob, int16_t qmul[2])
00701 {
00702     goto skip_eob;
00703     do {
00704         int coeff;
00705         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00706             return i;
00707 
00708 skip_eob:
00709         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00710             if (++i == 16)
00711                 return i; // invalid input; blocks should end with EOB
00712             token_prob = probs[i][0];
00713             goto skip_eob;
00714         }
00715 
00716         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00717             coeff = 1;
00718             token_prob = probs[i+1][1];
00719         } else {
00720             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00721                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00722                 if (coeff)
00723                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00724                 coeff += 2;
00725             } else {
00726                 // DCT_CAT*
00727                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00728                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00729                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00730                     } else {                                    // DCT_CAT2
00731                         coeff  = 7;
00732                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00733                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00734                     }
00735                 } else {    // DCT_CAT3 and up
00736                     int a = vp56_rac_get_prob(c, token_prob[8]);
00737                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00738                     int cat = (a<<1) + b;
00739                     coeff  = 3 + (8<<cat);
00740                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00741                 }
00742             }
00743             token_prob = probs[i+1][2];
00744         }
00745         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00746     } while (++i < 16);
00747 
00748     return i;
00749 }
00750 #endif
00751 
00763 static av_always_inline
00764 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00765                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00766                         int i, int zero_nhood, int16_t qmul[2])
00767 {
00768     uint8_t *token_prob = probs[i][zero_nhood];
00769     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00770         return 0;
00771     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00772 }
00773 
00774 static av_always_inline
00775 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00776                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00777 {
00778     int i, x, y, luma_start = 0, luma_ctx = 3;
00779     int nnz_pred, nnz, nnz_total = 0;
00780     int segment = s->segment;
00781     int block_dc = 0;
00782 
00783     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00784         nnz_pred = t_nnz[8] + l_nnz[8];
00785 
00786         // decode DC values and do hadamard
00787         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00788                                   s->qmat[segment].luma_dc_qmul);
00789         l_nnz[8] = t_nnz[8] = !!nnz;
00790         if (nnz) {
00791             nnz_total += nnz;
00792             block_dc = 1;
00793             if (nnz == 1)
00794                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00795             else
00796                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00797         }
00798         luma_start = 1;
00799         luma_ctx = 0;
00800     }
00801 
00802     // luma blocks
00803     for (y = 0; y < 4; y++)
00804         for (x = 0; x < 4; x++) {
00805             nnz_pred = l_nnz[y] + t_nnz[x];
00806             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00807                                       nnz_pred, s->qmat[segment].luma_qmul);
00808             // nnz+block_dc may be one more than the actual last index, but we don't care
00809             s->non_zero_count_cache[y][x] = nnz + block_dc;
00810             t_nnz[x] = l_nnz[y] = !!nnz;
00811             nnz_total += nnz;
00812         }
00813 
00814     // chroma blocks
00815     // TODO: what to do about dimensions? 2nd dim for luma is x,
00816     // but for chroma it's (y<<1)|x
00817     for (i = 4; i < 6; i++)
00818         for (y = 0; y < 2; y++)
00819             for (x = 0; x < 2; x++) {
00820                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00821                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00822                                           nnz_pred, s->qmat[segment].chroma_qmul);
00823                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00824                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00825                 nnz_total += nnz;
00826             }
00827 
00828     // if there were no coded coeffs despite the macroblock not being marked skip,
00829     // we MUST not do the inner loop filter and should not do IDCT
00830     // Since skip isn't used for bitstream prediction, just manually set it.
00831     if (!nnz_total)
00832         mb->skip = 1;
00833 }
00834 
00835 static av_always_inline
00836 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00837                       int linesize, int uvlinesize, int simple)
00838 {
00839     AV_COPY128(top_border, src_y + 15*linesize);
00840     if (!simple) {
00841         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00842         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00843     }
00844 }
00845 
00846 static av_always_inline
00847 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00848                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00849                     int simple, int xchg)
00850 {
00851     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00852     src_y  -=   linesize;
00853     src_cb -= uvlinesize;
00854     src_cr -= uvlinesize;
00855 
00856 #define XCHG(a,b,xchg) do {                     \
00857         if (xchg) AV_SWAP64(b,a);               \
00858         else      AV_COPY64(b,a);               \
00859     } while (0)
00860 
00861     XCHG(top_border_m1+8, src_y-8, xchg);
00862     XCHG(top_border,      src_y,   xchg);
00863     XCHG(top_border+8,    src_y+8, 1);
00864     if (mb_x < mb_width-1)
00865         XCHG(top_border+32, src_y+16, 1);
00866 
00867     // only copy chroma for normal loop filter
00868     // or to initialize the top row to 127
00869     if (!simple || !mb_y) {
00870         XCHG(top_border_m1+16, src_cb-8, xchg);
00871         XCHG(top_border_m1+24, src_cr-8, xchg);
00872         XCHG(top_border+16,    src_cb, 1);
00873         XCHG(top_border+24,    src_cr, 1);
00874     }
00875 }
00876 
00877 static av_always_inline
00878 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00879 {
00880     if (!mb_x) {
00881         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00882     } else {
00883         return mb_y ? mode : LEFT_DC_PRED8x8;
00884     }
00885 }
00886 
00887 static av_always_inline
00888 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00889 {
00890     if (!mb_x) {
00891         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00892     } else {
00893         return mb_y ? mode : HOR_PRED8x8;
00894     }
00895 }
00896 
00897 static av_always_inline
00898 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00899 {
00900     if (mode == DC_PRED8x8) {
00901         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00902     } else {
00903         return mode;
00904     }
00905 }
00906 
00907 static av_always_inline
00908 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00909 {
00910     switch (mode) {
00911     case DC_PRED8x8:
00912         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00913     case VERT_PRED8x8:
00914         return !mb_y ? DC_127_PRED8x8 : mode;
00915     case HOR_PRED8x8:
00916         return !mb_x ? DC_129_PRED8x8 : mode;
00917     case PLANE_PRED8x8 /*TM*/:
00918         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00919     }
00920     return mode;
00921 }
00922 
00923 static av_always_inline
00924 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00925 {
00926     if (!mb_x) {
00927         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00928     } else {
00929         return mb_y ? mode : HOR_VP8_PRED;
00930     }
00931 }
00932 
00933 static av_always_inline
00934 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00935 {
00936     switch (mode) {
00937     case VERT_PRED:
00938         if (!mb_x && mb_y) {
00939             *copy_buf = 1;
00940             return mode;
00941         }
00942         /* fall-through */
00943     case DIAG_DOWN_LEFT_PRED:
00944     case VERT_LEFT_PRED:
00945         return !mb_y ? DC_127_PRED : mode;
00946     case HOR_PRED:
00947         if (!mb_y) {
00948             *copy_buf = 1;
00949             return mode;
00950         }
00951         /* fall-through */
00952     case HOR_UP_PRED:
00953         return !mb_x ? DC_129_PRED : mode;
00954     case TM_VP8_PRED:
00955         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00956     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00957     case DIAG_DOWN_RIGHT_PRED:
00958     case VERT_RIGHT_PRED:
00959     case HOR_DOWN_PRED:
00960         if (!mb_y || !mb_x)
00961             *copy_buf = 1;
00962         return mode;
00963     }
00964     return mode;
00965 }
00966 
00967 static av_always_inline
00968 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00969                    int mb_x, int mb_y)
00970 {
00971     AVCodecContext *avctx = s->avctx;
00972     int x, y, mode, nnz;
00973     uint32_t tr;
00974 
00975     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00976     // otherwise, skip it if we aren't going to deblock
00977     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00978         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00979                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00980                        s->filter.simple, 1);
00981 
00982     if (mb->mode < MODE_I4x4) {
00983         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00984             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00985         } else {
00986             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00987         }
00988         s->hpc.pred16x16[mode](dst[0], s->linesize);
00989     } else {
00990         uint8_t *ptr = dst[0];
00991         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00992         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00993 
00994         // all blocks on the right edge of the macroblock use bottom edge
00995         // the top macroblock for their topright edge
00996         uint8_t *tr_right = ptr - s->linesize + 16;
00997 
00998         // if we're on the right edge of the frame, said edge is extended
00999         // from the top macroblock
01000         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
01001             mb_x == s->mb_width-1) {
01002             tr = tr_right[-1]*0x01010101u;
01003             tr_right = (uint8_t *)&tr;
01004         }
01005 
01006         if (mb->skip)
01007             AV_ZERO128(s->non_zero_count_cache);
01008 
01009         for (y = 0; y < 4; y++) {
01010             uint8_t *topright = ptr + 4 - s->linesize;
01011             for (x = 0; x < 4; x++) {
01012                 int copy = 0, linesize = s->linesize;
01013                 uint8_t *dst = ptr+4*x;
01014                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
01015 
01016                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
01017                     topright = tr_top;
01018                 } else if (x == 3)
01019                     topright = tr_right;
01020 
01021                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
01022                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
01023                     if (copy) {
01024                         dst = copy_dst + 12;
01025                         linesize = 8;
01026                         if (!(mb_y + y)) {
01027                             copy_dst[3] = 127U;
01028                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
01029                         } else {
01030                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
01031                             if (!(mb_x + x)) {
01032                                 copy_dst[3] = 129U;
01033                             } else {
01034                                 copy_dst[3] = ptr[4*x-s->linesize-1];
01035                             }
01036                         }
01037                         if (!(mb_x + x)) {
01038                             copy_dst[11] =
01039                             copy_dst[19] =
01040                             copy_dst[27] =
01041                             copy_dst[35] = 129U;
01042                         } else {
01043                             copy_dst[11] = ptr[4*x              -1];
01044                             copy_dst[19] = ptr[4*x+s->linesize  -1];
01045                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
01046                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
01047                         }
01048                     }
01049                 } else {
01050                     mode = intra4x4[x];
01051                 }
01052                 s->hpc.pred4x4[mode](dst, topright, linesize);
01053                 if (copy) {
01054                     AV_COPY32(ptr+4*x              , copy_dst+12);
01055                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01056                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01057                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01058                 }
01059 
01060                 nnz = s->non_zero_count_cache[y][x];
01061                 if (nnz) {
01062                     if (nnz == 1)
01063                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01064                     else
01065                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01066                 }
01067                 topright += 4;
01068             }
01069 
01070             ptr   += 4*s->linesize;
01071             intra4x4 += 4;
01072         }
01073     }
01074 
01075     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01076         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01077     } else {
01078         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01079     }
01080     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01081     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01082 
01083     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01084         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01085                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01086                        s->filter.simple, 0);
01087 }
01088 
01089 static const uint8_t subpel_idx[3][8] = {
01090     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01091                                 // also function pointer index
01092     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01093     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01094 };
01095 
01112 static av_always_inline
01113 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01114                  int x_off, int y_off, int block_w, int block_h,
01115                  int width, int height, int linesize,
01116                  vp8_mc_func mc_func[3][3])
01117 {
01118     uint8_t *src = ref->data[0];
01119 
01120     if (AV_RN32A(mv)) {
01121 
01122         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01123         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01124 
01125         x_off += mv->x >> 2;
01126         y_off += mv->y >> 2;
01127 
01128         // edge emulation
01129         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01130         src += y_off * linesize + x_off;
01131         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01132             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01133             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01134                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01135                                     x_off - mx_idx, y_off - my_idx, width, height);
01136             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01137         }
01138         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01139     } else {
01140         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01141         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01142     }
01143 }
01144 
01162 static av_always_inline
01163 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01164                    const VP56mv *mv, int x_off, int y_off,
01165                    int block_w, int block_h, int width, int height, int linesize,
01166                    vp8_mc_func mc_func[3][3])
01167 {
01168     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01169 
01170     if (AV_RN32A(mv)) {
01171         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01172         int my = mv->y&7, my_idx = subpel_idx[0][my];
01173 
01174         x_off += mv->x >> 3;
01175         y_off += mv->y >> 3;
01176 
01177         // edge emulation
01178         src1 += y_off * linesize + x_off;
01179         src2 += y_off * linesize + x_off;
01180         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01181         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01182             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01183             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01184                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01185                                     x_off - mx_idx, y_off - my_idx, width, height);
01186             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01187             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01188 
01189             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01190                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01191                                     x_off - mx_idx, y_off - my_idx, width, height);
01192             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01193             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01194         } else {
01195             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01196             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01197         }
01198     } else {
01199         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01200         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01201         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01202     }
01203 }
01204 
01205 static av_always_inline
01206 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01207                  AVFrame *ref_frame, int x_off, int y_off,
01208                  int bx_off, int by_off,
01209                  int block_w, int block_h,
01210                  int width, int height, VP56mv *mv)
01211 {
01212     VP56mv uvmv = *mv;
01213 
01214     /* Y */
01215     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01216                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01217                 block_w, block_h, width, height, s->linesize,
01218                 s->put_pixels_tab[block_w == 8]);
01219 
01220     /* U/V */
01221     if (s->profile == 3) {
01222         uvmv.x &= ~7;
01223         uvmv.y &= ~7;
01224     }
01225     x_off   >>= 1; y_off   >>= 1;
01226     bx_off  >>= 1; by_off  >>= 1;
01227     width   >>= 1; height  >>= 1;
01228     block_w >>= 1; block_h >>= 1;
01229     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01230                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01231                   &uvmv, x_off + bx_off, y_off + by_off,
01232                   block_w, block_h, width, height, s->uvlinesize,
01233                   s->put_pixels_tab[1 + (block_w == 4)]);
01234 }
01235 
01236 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01237  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01238 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01239 {
01240     /* Don't prefetch refs that haven't been used very often this frame. */
01241     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01242         int x_off = mb_x << 4, y_off = mb_y << 4;
01243         int mx = (mb->mv.x>>2) + x_off + 8;
01244         int my = (mb->mv.y>>2) + y_off;
01245         uint8_t **src= s->framep[ref]->data;
01246         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01247         /* For threading, a ff_thread_await_progress here might be useful, but
01248          * it actually slows down the decoder. Since a bad prefetch doesn't
01249          * generate bad decoder output, we don't run it here. */
01250         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01251         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01252         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01253     }
01254 }
01255 
01259 static av_always_inline
01260 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01261                    int mb_x, int mb_y)
01262 {
01263     int x_off = mb_x << 4, y_off = mb_y << 4;
01264     int width = 16*s->mb_width, height = 16*s->mb_height;
01265     AVFrame *ref = s->framep[mb->ref_frame];
01266     VP56mv *bmv = mb->bmv;
01267 
01268     switch (mb->partitioning) {
01269     case VP8_SPLITMVMODE_NONE:
01270         vp8_mc_part(s, dst, ref, x_off, y_off,
01271                     0, 0, 16, 16, width, height, &mb->mv);
01272         break;
01273     case VP8_SPLITMVMODE_4x4: {
01274         int x, y;
01275         VP56mv uvmv;
01276 
01277         /* Y */
01278         for (y = 0; y < 4; y++) {
01279             for (x = 0; x < 4; x++) {
01280                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01281                             ref, &bmv[4*y + x],
01282                             4*x + x_off, 4*y + y_off, 4, 4,
01283                             width, height, s->linesize,
01284                             s->put_pixels_tab[2]);
01285             }
01286         }
01287 
01288         /* U/V */
01289         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01290         for (y = 0; y < 2; y++) {
01291             for (x = 0; x < 2; x++) {
01292                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01293                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01294                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01295                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01296                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01297                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01298                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01299                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01300                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01301                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01302                 if (s->profile == 3) {
01303                     uvmv.x &= ~7;
01304                     uvmv.y &= ~7;
01305                 }
01306                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01307                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01308                               4*x + x_off, 4*y + y_off, 4, 4,
01309                               width, height, s->uvlinesize,
01310                               s->put_pixels_tab[2]);
01311             }
01312         }
01313         break;
01314     }
01315     case VP8_SPLITMVMODE_16x8:
01316         vp8_mc_part(s, dst, ref, x_off, y_off,
01317                     0, 0, 16, 8, width, height, &bmv[0]);
01318         vp8_mc_part(s, dst, ref, x_off, y_off,
01319                     0, 8, 16, 8, width, height, &bmv[1]);
01320         break;
01321     case VP8_SPLITMVMODE_8x16:
01322         vp8_mc_part(s, dst, ref, x_off, y_off,
01323                     0, 0, 8, 16, width, height, &bmv[0]);
01324         vp8_mc_part(s, dst, ref, x_off, y_off,
01325                     8, 0, 8, 16, width, height, &bmv[1]);
01326         break;
01327     case VP8_SPLITMVMODE_8x8:
01328         vp8_mc_part(s, dst, ref, x_off, y_off,
01329                     0, 0, 8, 8, width, height, &bmv[0]);
01330         vp8_mc_part(s, dst, ref, x_off, y_off,
01331                     8, 0, 8, 8, width, height, &bmv[1]);
01332         vp8_mc_part(s, dst, ref, x_off, y_off,
01333                     0, 8, 8, 8, width, height, &bmv[2]);
01334         vp8_mc_part(s, dst, ref, x_off, y_off,
01335                     8, 8, 8, 8, width, height, &bmv[3]);
01336         break;
01337     }
01338 }
01339 
01340 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01341 {
01342     int x, y, ch;
01343 
01344     if (mb->mode != MODE_I4x4) {
01345         uint8_t *y_dst = dst[0];
01346         for (y = 0; y < 4; y++) {
01347             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01348             if (nnz4) {
01349                 if (nnz4&~0x01010101) {
01350                     for (x = 0; x < 4; x++) {
01351                         if ((uint8_t)nnz4 == 1)
01352                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01353                         else if((uint8_t)nnz4 > 1)
01354                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01355                         nnz4 >>= 8;
01356                         if (!nnz4)
01357                             break;
01358                     }
01359                 } else {
01360                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01361                 }
01362             }
01363             y_dst += 4*s->linesize;
01364         }
01365     }
01366 
01367     for (ch = 0; ch < 2; ch++) {
01368         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01369         if (nnz4) {
01370             uint8_t *ch_dst = dst[1+ch];
01371             if (nnz4&~0x01010101) {
01372                 for (y = 0; y < 2; y++) {
01373                     for (x = 0; x < 2; x++) {
01374                         if ((uint8_t)nnz4 == 1)
01375                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01376                         else if((uint8_t)nnz4 > 1)
01377                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01378                         nnz4 >>= 8;
01379                         if (!nnz4)
01380                             goto chroma_idct_end;
01381                     }
01382                     ch_dst += 4*s->uvlinesize;
01383                 }
01384             } else {
01385                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01386             }
01387         }
01388 chroma_idct_end: ;
01389     }
01390 }
01391 
01392 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01393 {
01394     int interior_limit, filter_level;
01395 
01396     if (s->segmentation.enabled) {
01397         filter_level = s->segmentation.filter_level[s->segment];
01398         if (!s->segmentation.absolute_vals)
01399             filter_level += s->filter.level;
01400     } else
01401         filter_level = s->filter.level;
01402 
01403     if (s->lf_delta.enabled) {
01404         filter_level += s->lf_delta.ref[mb->ref_frame];
01405         filter_level += s->lf_delta.mode[mb->mode];
01406     }
01407 
01408     filter_level = av_clip_uintp2(filter_level, 6);
01409 
01410     interior_limit = filter_level;
01411     if (s->filter.sharpness) {
01412         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01413         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01414     }
01415     interior_limit = FFMAX(interior_limit, 1);
01416 
01417     f->filter_level = filter_level;
01418     f->inner_limit = interior_limit;
01419     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01420 }
01421 
01422 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01423 {
01424     int mbedge_lim, bedge_lim, hev_thresh;
01425     int filter_level = f->filter_level;
01426     int inner_limit = f->inner_limit;
01427     int inner_filter = f->inner_filter;
01428     int linesize = s->linesize;
01429     int uvlinesize = s->uvlinesize;
01430     static const uint8_t hev_thresh_lut[2][64] = {
01431         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01432           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01433           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01434           3, 3, 3, 3 },
01435         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01436           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01437           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01438           2, 2, 2, 2 }
01439     };
01440 
01441     if (!filter_level)
01442         return;
01443 
01444      bedge_lim = 2*filter_level + inner_limit;
01445     mbedge_lim = bedge_lim + 4;
01446 
01447     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01448 
01449     if (mb_x) {
01450         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01451                                        mbedge_lim, inner_limit, hev_thresh);
01452         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01453                                        mbedge_lim, inner_limit, hev_thresh);
01454     }
01455 
01456     if (inner_filter) {
01457         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01458                                              inner_limit, hev_thresh);
01459         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01460                                              inner_limit, hev_thresh);
01461         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01462                                              inner_limit, hev_thresh);
01463         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01464                                              uvlinesize,  bedge_lim,
01465                                              inner_limit, hev_thresh);
01466     }
01467 
01468     if (mb_y) {
01469         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01470                                        mbedge_lim, inner_limit, hev_thresh);
01471         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01472                                        mbedge_lim, inner_limit, hev_thresh);
01473     }
01474 
01475     if (inner_filter) {
01476         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01477                                              linesize,    bedge_lim,
01478                                              inner_limit, hev_thresh);
01479         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01480                                              linesize,    bedge_lim,
01481                                              inner_limit, hev_thresh);
01482         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01483                                              linesize,    bedge_lim,
01484                                              inner_limit, hev_thresh);
01485         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01486                                              dst[2] + 4 * uvlinesize,
01487                                              uvlinesize,  bedge_lim,
01488                                              inner_limit, hev_thresh);
01489     }
01490 }
01491 
01492 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01493 {
01494     int mbedge_lim, bedge_lim;
01495     int filter_level = f->filter_level;
01496     int inner_limit = f->inner_limit;
01497     int inner_filter = f->inner_filter;
01498     int linesize = s->linesize;
01499 
01500     if (!filter_level)
01501         return;
01502 
01503      bedge_lim = 2*filter_level + inner_limit;
01504     mbedge_lim = bedge_lim + 4;
01505 
01506     if (mb_x)
01507         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01508     if (inner_filter) {
01509         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01510         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01511         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01512     }
01513 
01514     if (mb_y)
01515         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01516     if (inner_filter) {
01517         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01518         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01519         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01520     }
01521 }
01522 
01523 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01524 {
01525     VP8FilterStrength *f = s->filter_strength;
01526     uint8_t *dst[3] = {
01527         curframe->data[0] + 16*mb_y*s->linesize,
01528         curframe->data[1] +  8*mb_y*s->uvlinesize,
01529         curframe->data[2] +  8*mb_y*s->uvlinesize
01530     };
01531     int mb_x;
01532 
01533     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01534         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01535         filter_mb(s, dst, f++, mb_x, mb_y);
01536         dst[0] += 16;
01537         dst[1] += 8;
01538         dst[2] += 8;
01539     }
01540 }
01541 
01542 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01543 {
01544     VP8FilterStrength *f = s->filter_strength;
01545     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01546     int mb_x;
01547 
01548     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01549         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01550         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01551         dst += 16;
01552     }
01553 }
01554 
01555 static void release_queued_segmaps(VP8Context *s, int is_close)
01556 {
01557     int leave_behind = is_close ? 0 : !s->maps_are_invalid;
01558     while (s->num_maps_to_be_freed > leave_behind)
01559         av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
01560     s->maps_are_invalid = 0;
01561 }
01562 
01563 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01564                             AVPacket *avpkt)
01565 {
01566     VP8Context *s = avctx->priv_data;
01567     int ret, mb_x, mb_y, i, y, referenced;
01568     enum AVDiscard skip_thresh;
01569     AVFrame *av_uninit(curframe), *prev_frame;
01570 
01571     release_queued_segmaps(s, 0);
01572 
01573     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01574         return ret;
01575 
01576     prev_frame = s->framep[VP56_FRAME_CURRENT];
01577 
01578     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01579                                 || s->update_altref == VP56_FRAME_CURRENT;
01580 
01581     skip_thresh = !referenced ? AVDISCARD_NONREF :
01582                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01583 
01584     if (avctx->skip_frame >= skip_thresh) {
01585         s->invisible = 1;
01586         goto skip_decode;
01587     }
01588     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01589 
01590     // release no longer referenced frames
01591     for (i = 0; i < 5; i++)
01592         if (s->frames[i].data[0] &&
01593             &s->frames[i] != prev_frame &&
01594             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01595             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01596             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01597             vp8_release_frame(s, &s->frames[i], 1, 0);
01598 
01599     // find a free buffer
01600     for (i = 0; i < 5; i++)
01601         if (&s->frames[i] != prev_frame &&
01602             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01603             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01604             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01605             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01606             break;
01607         }
01608     if (i == 5) {
01609         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01610         abort();
01611     }
01612     if (curframe->data[0])
01613         vp8_release_frame(s, curframe, 1, 0);
01614 
01615     curframe->key_frame = s->keyframe;
01616     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01617     curframe->reference = referenced ? 3 : 0;
01618     if ((ret = vp8_alloc_frame(s, curframe))) {
01619         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01620         return ret;
01621     }
01622 
01623     // check if golden and altref are swapped
01624     if (s->update_altref != VP56_FRAME_NONE) {
01625         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01626     } else {
01627         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01628     }
01629     if (s->update_golden != VP56_FRAME_NONE) {
01630         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01631     } else {
01632         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01633     }
01634     if (s->update_last) {
01635         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01636     } else {
01637         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01638     }
01639     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01640 
01641     ff_thread_finish_setup(avctx);
01642 
01643     // Given that arithmetic probabilities are updated every frame, it's quite likely
01644     // that the values we have on a random interframe are complete junk if we didn't
01645     // start decode on a keyframe. So just don't display anything rather than junk.
01646     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01647                          !s->framep[VP56_FRAME_GOLDEN] ||
01648                          !s->framep[VP56_FRAME_GOLDEN2])) {
01649         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01650         return AVERROR_INVALIDDATA;
01651     }
01652 
01653     s->linesize   = curframe->linesize[0];
01654     s->uvlinesize = curframe->linesize[1];
01655 
01656     if (!s->edge_emu_buffer)
01657         s->edge_emu_buffer = av_malloc(21*s->linesize);
01658 
01659     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01660 
01661     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01662     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01663 
01664     // top edge of 127 for intra prediction
01665     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01666         s->top_border[0][15] = s->top_border[0][23] = 127;
01667         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01668     }
01669     memset(s->ref_count, 0, sizeof(s->ref_count));
01670     if (s->keyframe)
01671         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01672 
01673 #define MARGIN (16 << 2)
01674     s->mv_min.y = -MARGIN;
01675     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01676 
01677     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01678         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01679         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01680         int mb_xy = mb_y*s->mb_width;
01681         uint8_t *dst[3] = {
01682             curframe->data[0] + 16*mb_y*s->linesize,
01683             curframe->data[1] +  8*mb_y*s->uvlinesize,
01684             curframe->data[2] +  8*mb_y*s->uvlinesize
01685         };
01686 
01687         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01688         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01689         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01690 
01691         // left edge of 129 for intra prediction
01692         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01693             for (i = 0; i < 3; i++)
01694                 for (y = 0; y < 16>>!!i; y++)
01695                     dst[i][y*curframe->linesize[i]-1] = 129;
01696             if (mb_y == 1) // top left edge is also 129
01697                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01698         }
01699 
01700         s->mv_min.x = -MARGIN;
01701         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01702         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01703             ff_thread_await_progress(prev_frame, mb_y, 0);
01704 
01705         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01706             /* Prefetch the current frame, 4 MBs ahead */
01707             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01708             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01709 
01710             decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01711                            prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
01712 
01713             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01714 
01715             if (!mb->skip)
01716                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01717 
01718             if (mb->mode <= MODE_I4x4)
01719                 intra_predict(s, dst, mb, mb_x, mb_y);
01720             else
01721                 inter_predict(s, dst, mb, mb_x, mb_y);
01722 
01723             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01724 
01725             if (!mb->skip) {
01726                 idct_mb(s, dst, mb);
01727             } else {
01728                 AV_ZERO64(s->left_nnz);
01729                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01730 
01731                 // Reset DC block predictors if they would exist if the mb had coefficients
01732                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01733                     s->left_nnz[8]      = 0;
01734                     s->top_nnz[mb_x][8] = 0;
01735                 }
01736             }
01737 
01738             if (s->deblock_filter)
01739                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01740 
01741             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01742 
01743             dst[0] += 16;
01744             dst[1] += 8;
01745             dst[2] += 8;
01746             s->mv_min.x -= 64;
01747             s->mv_max.x -= 64;
01748         }
01749         if (s->deblock_filter) {
01750             if (s->filter.simple)
01751                 filter_mb_row_simple(s, curframe, mb_y);
01752             else
01753                 filter_mb_row(s, curframe, mb_y);
01754         }
01755         s->mv_min.y -= 64;
01756         s->mv_max.y -= 64;
01757 
01758         ff_thread_report_progress(curframe, mb_y, 0);
01759     }
01760 
01761     ff_thread_report_progress(curframe, INT_MAX, 0);
01762 skip_decode:
01763     // if future frames don't use the updated probabilities,
01764     // reset them to the values we saved
01765     if (!s->update_probabilities)
01766         s->prob[0] = s->prob[1];
01767 
01768     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01769 
01770     if (!s->invisible) {
01771         *(AVFrame*)data = *curframe;
01772         *data_size = sizeof(AVFrame);
01773     }
01774 
01775     return avpkt->size;
01776 }
01777 
01778 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01779 {
01780     VP8Context *s = avctx->priv_data;
01781 
01782     s->avctx = avctx;
01783     avctx->pix_fmt = PIX_FMT_YUV420P;
01784 
01785     dsputil_init(&s->dsp, avctx);
01786     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
01787     ff_vp8dsp_init(&s->vp8dsp);
01788 
01789     return 0;
01790 }
01791 
01792 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01793 {
01794     vp8_decode_flush_impl(avctx, 0, 1, 1);
01795     release_queued_segmaps(avctx->priv_data, 1);
01796     return 0;
01797 }
01798 
01799 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01800 {
01801     VP8Context *s = avctx->priv_data;
01802 
01803     s->avctx = avctx;
01804 
01805     return 0;
01806 }
01807 
01808 #define REBASE(pic) \
01809     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01810 
01811 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01812 {
01813     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01814 
01815     if (s->macroblocks_base &&
01816         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
01817         free_buffers(s);
01818         s->maps_are_invalid = 1;
01819     }
01820 
01821     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01822     s->segmentation = s_src->segmentation;
01823     s->lf_delta = s_src->lf_delta;
01824     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01825 
01826     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01827     s->framep[0] = REBASE(s_src->next_framep[0]);
01828     s->framep[1] = REBASE(s_src->next_framep[1]);
01829     s->framep[2] = REBASE(s_src->next_framep[2]);
01830     s->framep[3] = REBASE(s_src->next_framep[3]);
01831 
01832     return 0;
01833 }
01834 
01835 AVCodec ff_vp8_decoder = {
01836     .name           = "vp8",
01837     .type           = AVMEDIA_TYPE_VIDEO,
01838     .id             = CODEC_ID_VP8,
01839     .priv_data_size = sizeof(VP8Context),
01840     .init           = vp8_decode_init,
01841     .close          = vp8_decode_free,
01842     .decode         = vp8_decode_frame,
01843     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01844     .flush = vp8_decode_flush,
01845     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01846     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01847     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01848 };