00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044
00045 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
00046 #define MAX_LSPS 16 ///< maximum filter order
00047 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
00048
00049 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
00050 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
00051 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
00052 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00053
00054 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00055
00056 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
00057
00061 static VLC frame_type_vlc;
00062
00066 enum {
00067 ACB_TYPE_NONE = 0,
00068 ACB_TYPE_ASYMMETRIC = 1,
00069
00070
00071
00072
00073 ACB_TYPE_HAMMING = 2
00074
00075
00076 };
00077
00081 enum {
00082 FCB_TYPE_SILENCE = 0,
00083
00084
00085 FCB_TYPE_HARDCODED = 1,
00086
00087 FCB_TYPE_AW_PULSES = 2,
00088
00089 FCB_TYPE_EXC_PULSES = 3,
00090
00091
00092 };
00093
00097 static const struct frame_type_desc {
00098 uint8_t n_blocks;
00099
00100 uint8_t log_n_blocks;
00101 uint8_t acb_type;
00102 uint8_t fcb_type;
00103 uint8_t dbl_pulses;
00104
00105
00106 uint16_t frame_size;
00107
00108 } frame_descs[17] = {
00109 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00110 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00112 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00113 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00118 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00119 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00121 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00122 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00124 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00125 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127
00131 typedef struct {
00136 AVFrame frame;
00137 GetBitContext gb;
00138
00139
00140
00141 int8_t vbm_tree[25];
00142
00143 int spillover_bitsize;
00144
00145
00146 int history_nsamples;
00147
00148
00149
00150 int do_apf;
00151
00152 int denoise_strength;
00153
00154 int denoise_tilt_corr;
00155
00156 int dc_level;
00157
00158
00159 int lsps;
00160 int lsp_q_mode;
00161 int lsp_def_mode;
00162
00163 int frame_lsp_bitsize;
00164
00165 int sframe_lsp_bitsize;
00166
00167
00168 int min_pitch_val;
00169 int max_pitch_val;
00170 int pitch_nbits;
00171
00172 int block_pitch_nbits;
00173
00174 int block_pitch_range;
00175 int block_delta_pitch_nbits;
00176
00177
00178
00179 int block_delta_pitch_hrange;
00180
00181 uint16_t block_conv_table[4];
00182
00183
00193 int spillover_nbits;
00194
00195
00196
00197 int has_residual_lsps;
00198
00199
00200
00201
00202 int skip_bits_next;
00203
00204
00205
00206 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209 int sframe_cache_size;
00210
00211
00212
00213
00214 PutBitContext pb;
00215
00225 double prev_lsps[MAX_LSPS];
00226
00227 int last_pitch_val;
00228 int last_acb_type;
00229 int pitch_diff_sh16;
00230
00231 float silence_gain;
00232
00233 int aw_idx_is_ext;
00234
00235 int aw_pulse_range;
00236
00237
00238
00239
00240
00241 int aw_n_pulses[2];
00242
00243
00244 int aw_first_pulse_off[2];
00245
00246 int aw_next_pulse_off_cache;
00247
00248
00249
00250
00251
00252 int frame_cntr;
00253
00254 float gain_pred_err[6];
00255 float excitation_history[MAX_SIGNAL_HISTORY];
00259 float synth_history[MAX_LSPS];
00260
00269 RDFTContext rdft, irdft;
00270
00271 DCTContext dct, dst;
00272
00273 float sin[511], cos[511];
00274
00275 float postfilter_agc;
00276
00277 float dcf_mem[2];
00278 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281 float denoise_filter_cache[MAX_FRAMESIZE];
00282 int denoise_filter_cache_size;
00283 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290
00293 } WMAVoiceContext;
00294
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306 static const uint8_t bits[] = {
00307 2, 2, 2, 4, 4, 4,
00308 6, 6, 6, 8, 8, 8,
00309 10, 10, 10, 12, 12, 12,
00310 14, 14, 14, 14
00311 };
00312 static const uint16_t codes[] = {
00313 0x0000, 0x0001, 0x0002,
00314 0x000c, 0x000d, 0x000e,
00315 0x003c, 0x003d, 0x003e,
00316 0x00fc, 0x00fd, 0x00fe,
00317 0x03fc, 0x03fd, 0x03fe,
00318 0x0ffc, 0x0ffd, 0x0ffe,
00319 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00320 };
00321 int cntr[8], n, res;
00322
00323 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324 memset(cntr, 0, sizeof(cntr));
00325 for (n = 0; n < 17; n++) {
00326 res = get_bits(gb, 3);
00327 if (cntr[res] > 3)
00328 return -1;
00329 vbm_tree[res * 3 + cntr[res]++] = n;
00330 }
00331 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332 bits, 1, 1, codes, 2, 2, 132);
00333 return 0;
00334 }
00335
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341 int n, flags, pitch_range, lsp16_flag;
00342 WMAVoiceContext *s = ctx->priv_data;
00343
00352 if (ctx->extradata_size != 46) {
00353 av_log(ctx, AV_LOG_ERROR,
00354 "Invalid extradata size %d (should be 46)\n",
00355 ctx->extradata_size);
00356 return -1;
00357 }
00358 flags = AV_RL32(ctx->extradata + 18);
00359 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360 s->do_apf = flags & 0x1;
00361 if (s->do_apf) {
00362 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00363 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364 ff_dct_init(&s->dct, 6, DCT_I);
00365 ff_dct_init(&s->dst, 6, DST_I);
00366
00367 ff_sine_window_init(s->cos, 256);
00368 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369 for (n = 0; n < 255; n++) {
00370 s->sin[n] = -s->sin[510 - n];
00371 s->cos[510 - n] = s->cos[n];
00372 }
00373 }
00374 s->denoise_strength = (flags >> 2) & 0xF;
00375 if (s->denoise_strength >= 12) {
00376 av_log(ctx, AV_LOG_ERROR,
00377 "Invalid denoise filter strength %d (max=11)\n",
00378 s->denoise_strength);
00379 return -1;
00380 }
00381 s->denoise_tilt_corr = !!(flags & 0x40);
00382 s->dc_level = (flags >> 7) & 0xF;
00383 s->lsp_q_mode = !!(flags & 0x2000);
00384 s->lsp_def_mode = !!(flags & 0x4000);
00385 lsp16_flag = flags & 0x1000;
00386 if (lsp16_flag) {
00387 s->lsps = 16;
00388 s->frame_lsp_bitsize = 34;
00389 s->sframe_lsp_bitsize = 60;
00390 } else {
00391 s->lsps = 10;
00392 s->frame_lsp_bitsize = 24;
00393 s->sframe_lsp_bitsize = 48;
00394 }
00395 for (n = 0; n < s->lsps; n++)
00396 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397
00398 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401 return -1;
00402 }
00403
00404 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00405 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406 pitch_range = s->max_pitch_val - s->min_pitch_val;
00407 if (pitch_range <= 0) {
00408 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409 return -1;
00410 }
00411 s->pitch_nbits = av_ceil_log2(pitch_range);
00412 s->last_pitch_val = 40;
00413 s->last_acb_type = ACB_TYPE_NONE;
00414 s->history_nsamples = s->max_pitch_val + 8;
00415
00416 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419
00420 av_log(ctx, AV_LOG_ERROR,
00421 "Unsupported samplerate %d (min=%d, max=%d)\n",
00422 ctx->sample_rate, min_sr, max_sr);
00423
00424 return -1;
00425 }
00426
00427 s->block_conv_table[0] = s->min_pitch_val;
00428 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00429 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00430 s->block_conv_table[3] = s->max_pitch_val - 1;
00431 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432 if (s->block_delta_pitch_hrange <= 0) {
00433 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434 return -1;
00435 }
00436 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437 s->block_pitch_range = s->block_conv_table[2] +
00438 s->block_conv_table[3] + 1 +
00439 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00441
00442 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00443
00444 avcodec_get_frame_defaults(&s->frame);
00445 ctx->coded_frame = &s->frame;
00446
00447 return 0;
00448 }
00449
00471 static void adaptive_gain_control(float *out, const float *in,
00472 const float *speech_synth,
00473 int size, float alpha, float *gain_mem)
00474 {
00475 int i;
00476 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477 float mem = *gain_mem;
00478
00479 for (i = 0; i < size; i++) {
00480 speech_energy += fabsf(speech_synth[i]);
00481 postfilter_energy += fabsf(in[i]);
00482 }
00483 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484
00485 for (i = 0; i < size; i++) {
00486 mem = alpha * mem + gain_scale_factor;
00487 out[i] = in[i] * mem;
00488 }
00489
00490 *gain_mem = mem;
00491 }
00492
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512 const float *in, float *out, int size)
00513 {
00514 int n;
00515 float optimal_gain = 0, dot;
00516 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518 *best_hist_ptr;
00519
00520
00521 do {
00522 dot = ff_dot_productf(in, ptr, size);
00523 if (dot > optimal_gain) {
00524 optimal_gain = dot;
00525 best_hist_ptr = ptr;
00526 }
00527 } while (--ptr >= end);
00528
00529 if (optimal_gain <= 0)
00530 return -1;
00531 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532 if (dot <= 0)
00533 return -1;
00534
00535 if (optimal_gain <= dot) {
00536 dot = dot / (dot + 0.6 * optimal_gain);
00537 } else
00538 dot = 0.625;
00539
00540
00541 for (n = 0; n < size; n++)
00542 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543
00544 return 0;
00545 }
00546
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559 float rh0, rh1;
00560
00561 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00562 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563
00564 return rh1 / rh0;
00565 }
00566
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571 int fcb_type, float *coeffs, int remainder)
00572 {
00573 float last_coeff, min = 15.0, max = -15.0;
00574 float irange, angle_mul, gain_mul, range, sq;
00575 int n, idx;
00576
00577
00578 s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580 float tmp = log10f(assign); var = tmp; \
00581 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582 } while (0)
00583 log_range(last_coeff, lpcs[1] * lpcs[1]);
00584 for (n = 1; n < 64; n++)
00585 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00586 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00588 #undef log_range
00589 range = max - min;
00590 lpcs[64] = last_coeff;
00591
00592
00593
00594
00595
00596
00597 irange = 64.0 / range;
00598 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599 (5.0 / 14.7));
00600 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601 for (n = 0; n <= 64; n++) {
00602 float pwr;
00603
00604 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606 lpcs[n] = angle_mul * pwr;
00607
00608
00609 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610 if (idx > 127) {
00611 coeffs[n] = wmavoice_energy_table[127] *
00612 powf(1.0331663, idx - 127);
00613 } else
00614 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615 }
00616
00617
00618
00619
00620
00621 s->dct.dct_calc(&s->dct, lpcs);
00622 s->dst.dct_calc(&s->dst, lpcs);
00623
00624
00625 idx = 255 + av_clip(lpcs[64], -255, 255);
00626 coeffs[0] = coeffs[0] * s->cos[idx];
00627 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628 last_coeff = coeffs[64] * s->cos[idx];
00629 for (n = 63;; n--) {
00630 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00633
00634 if (!--n) break;
00635
00636 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00639 }
00640 coeffs[1] = last_coeff;
00641
00642
00643 s->irdft.rdft_calc(&s->irdft, coeffs);
00644
00645
00646 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647 if (s->denoise_tilt_corr) {
00648 float tilt_mem = 0;
00649
00650 coeffs[remainder - 1] = 0;
00651 ff_tilt_compensation(&tilt_mem,
00652 -1.8 * tilt_factor(coeffs, remainder - 1),
00653 coeffs, remainder);
00654 }
00655 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656 for (n = 0; n < remainder; n++)
00657 coeffs[n] *= sq;
00658 }
00659
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687 float *synth_pf, int size,
00688 const float *lpcs)
00689 {
00690 int remainder, lim, n;
00691
00692 if (fcb_type != FCB_TYPE_SILENCE) {
00693 float *tilted_lpcs = s->tilted_lpcs_pf,
00694 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695
00696 tilted_lpcs[0] = 1.0;
00697 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698 memset(&tilted_lpcs[s->lsps + 1], 0,
00699 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701 tilted_lpcs, s->lsps + 2);
00702
00703
00704
00705
00706
00707 remainder = FFMIN(127 - size, size - 1);
00708 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709
00710
00711
00712 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713 s->rdft.rdft_calc(&s->rdft, synth_pf);
00714 s->rdft.rdft_calc(&s->rdft, coeffs);
00715 synth_pf[0] *= coeffs[0];
00716 synth_pf[1] *= coeffs[1];
00717 for (n = 1; n < 64; n++) {
00718 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721 }
00722 s->irdft.rdft_calc(&s->irdft, synth_pf);
00723 }
00724
00725
00726 if (s->denoise_filter_cache_size) {
00727 lim = FFMIN(s->denoise_filter_cache_size, size);
00728 for (n = 0; n < lim; n++)
00729 synth_pf[n] += s->denoise_filter_cache[n];
00730 s->denoise_filter_cache_size -= lim;
00731 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733 }
00734
00735
00736 if (fcb_type != FCB_TYPE_SILENCE) {
00737 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738 for (n = 0; n < lim; n++)
00739 s->denoise_filter_cache[n] += synth_pf[size + n];
00740 if (lim < remainder) {
00741 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743 s->denoise_filter_cache_size = remainder;
00744 }
00745 }
00746 }
00747
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769 float *samples, int size,
00770 const float *lpcs, float *zero_exc_pf,
00771 int fcb_type, int pitch)
00772 {
00773 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775 *synth_filter_in = zero_exc_pf;
00776
00777 assert(size <= MAX_FRAMESIZE / 2);
00778
00779
00780 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781
00782 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784 synth_filter_in = synth_filter_in_buf;
00785
00786
00787 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788 synth_filter_in, size, s->lsps);
00789 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790 sizeof(synth_pf[0]) * s->lsps);
00791
00792 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793
00794 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795 &s->postfilter_agc);
00796
00797 if (s->dc_level > 8) {
00798
00799
00800
00801 ff_acelp_apply_order_2_transfer_function(samples, samples,
00802 (const float[2]) { -1.99997, 1.0 },
00803 (const float[2]) { -1.9330735188, 0.93589198496 },
00804 0.93980580475, s->dcf_mem, size);
00805 }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823 const uint16_t *values,
00824 const uint16_t *sizes,
00825 int n_stages, const uint8_t *table,
00826 const double *mul_q,
00827 const double *base_q)
00828 {
00829 int n, m;
00830
00831 memset(lsps, 0, num * sizeof(*lsps));
00832 for (n = 0; n < n_stages; n++) {
00833 const uint8_t *t_off = &table[values[n] * num];
00834 double base = base_q[n], mul = mul_q[n];
00835
00836 for (m = 0; m < num; m++)
00837 lsps[m] += base + mul * t_off[m];
00838
00839 table += sizes[n] * num;
00840 }
00841 }
00842
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857 static const double mul_lsf[4] = {
00858 5.2187144800e-3, 1.4626986422e-3,
00859 9.6179549166e-4, 1.1325736225e-3
00860 };
00861 static const double base_lsf[4] = {
00862 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00864 };
00865 uint16_t v[4];
00866
00867 v[0] = get_bits(gb, 8);
00868 v[1] = get_bits(gb, 6);
00869 v[2] = get_bits(gb, 5);
00870 v[3] = get_bits(gb, 5);
00871
00872 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873 mul_lsf, base_lsf);
00874 }
00875
00880 static void dequant_lsp10r(GetBitContext *gb,
00881 double *i_lsps, const double *old,
00882 double *a1, double *a2, int q_mode)
00883 {
00884 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885 static const double mul_lsf[3] = {
00886 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00887 };
00888 static const double base_lsf[3] = {
00889 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890 };
00891 const float (*ipol_tab)[2][10] = q_mode ?
00892 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893 uint16_t interpol, v[3];
00894 int n;
00895
00896 dequant_lsp10i(gb, i_lsps);
00897
00898 interpol = get_bits(gb, 5);
00899 v[0] = get_bits(gb, 7);
00900 v[1] = get_bits(gb, 6);
00901 v[2] = get_bits(gb, 6);
00902
00903 for (n = 0; n < 10; n++) {
00904 double delta = old[n] - i_lsps[n];
00905 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907 }
00908
00909 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910 mul_lsf, base_lsf);
00911 }
00912
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919 static const double mul_lsf[5] = {
00920 3.3439586280e-3, 6.9908173703e-4,
00921 3.3216608306e-3, 1.0334960326e-3,
00922 3.1899104283e-3
00923 };
00924 static const double base_lsf[5] = {
00925 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927 M_PI * -1.29816e-1
00928 };
00929 uint16_t v[5];
00930
00931 v[0] = get_bits(gb, 8);
00932 v[1] = get_bits(gb, 6);
00933 v[2] = get_bits(gb, 7);
00934 v[3] = get_bits(gb, 6);
00935 v[4] = get_bits(gb, 7);
00936
00937 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00938 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00939 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00940 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944
00949 static void dequant_lsp16r(GetBitContext *gb,
00950 double *i_lsps, const double *old,
00951 double *a1, double *a2, int q_mode)
00952 {
00953 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954 static const double mul_lsf[3] = {
00955 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00956 };
00957 static const double base_lsf[3] = {
00958 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959 };
00960 const float (*ipol_tab)[2][16] = q_mode ?
00961 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962 uint16_t interpol, v[3];
00963 int n;
00964
00965 dequant_lsp16i(gb, i_lsps);
00966
00967 interpol = get_bits(gb, 5);
00968 v[0] = get_bits(gb, 7);
00969 v[1] = get_bits(gb, 7);
00970 v[2] = get_bits(gb, 7);
00971
00972 for (n = 0; n < 16; n++) {
00973 double delta = old[n] - i_lsps[n];
00974 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976 }
00977
00978 dequant_lsps( a2, 10, v, vec_sizes, 1,
00979 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00980 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000 const int *pitch)
01001 {
01002 static const int16_t start_offset[94] = {
01003 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01004 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01005 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01006 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01007 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01008 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01009 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011 };
01012 int bits, offset;
01013
01014
01015 s->aw_idx_is_ext = 0;
01016 if ((bits = get_bits(gb, 6)) >= 54) {
01017 s->aw_idx_is_ext = 1;
01018 bits += (bits - 54) * 3 + get_bits(gb, 2);
01019 }
01020
01021
01022
01023 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027 offset += s->aw_n_pulses[0] * pitch[0];
01028 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030
01031
01032
01033
01034 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036 s->aw_first_pulse_off[1] -= pitch[1];
01037 if (start_offset[bits] < 0)
01038 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039 s->aw_first_pulse_off[0] -= pitch[0];
01040 }
01041 }
01042
01051 static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01052 int block_idx, AMRFixed *fcb)
01053 {
01054 uint16_t use_mask_mem[9];
01055 uint16_t *use_mask = use_mask_mem + 2;
01056
01057
01058
01059
01060
01061
01062
01063 int pulse_off = s->aw_first_pulse_off[block_idx],
01064 pulse_start, n, idx, range, aidx, start_off = 0;
01065
01066
01067 if (s->aw_n_pulses[block_idx] > 0)
01068 while (pulse_off + s->aw_pulse_range < 1)
01069 pulse_off += fcb->pitch_lag;
01070
01071
01072 if (s->aw_n_pulses[0] > 0) {
01073 if (block_idx == 0) {
01074 range = 32;
01075 } else {
01076 range = 8;
01077 if (s->aw_n_pulses[block_idx] > 0)
01078 pulse_off = s->aw_next_pulse_off_cache;
01079 }
01080 } else
01081 range = 16;
01082 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01083
01084
01085
01086
01087 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01088 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01089 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01090 if (s->aw_n_pulses[block_idx] > 0)
01091 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01092 int excl_range = s->aw_pulse_range;
01093 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01094 int first_sh = 16 - (idx & 15);
01095 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01096 excl_range -= first_sh;
01097 if (excl_range >= 16) {
01098 *use_mask_ptr++ = 0;
01099 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01100 } else
01101 *use_mask_ptr &= 0xFFFF >> excl_range;
01102 }
01103
01104
01105 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01106 for (n = 0; n <= aidx; pulse_start++) {
01107 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01108 if (idx >= MAX_FRAMESIZE / 2) {
01109 if (use_mask[0]) idx = 0x0F;
01110 else if (use_mask[1]) idx = 0x1F;
01111 else if (use_mask[2]) idx = 0x2F;
01112 else if (use_mask[3]) idx = 0x3F;
01113 else if (use_mask[4]) idx = 0x4F;
01114 else return -1;
01115 idx -= av_log2_16bit(use_mask[idx >> 4]);
01116 }
01117 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01118 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01119 n++;
01120 start_off = idx;
01121 }
01122 }
01123
01124 fcb->x[fcb->n] = start_off;
01125 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01126 fcb->n++;
01127
01128
01129 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01130 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01131 return 0;
01132 }
01133
01141 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01142 int block_idx, AMRFixed *fcb)
01143 {
01144 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01145 float v;
01146
01147 if (s->aw_n_pulses[block_idx] > 0) {
01148 int n, v_mask, i_mask, sh, n_pulses;
01149
01150 if (s->aw_pulse_range == 24) {
01151 n_pulses = 3;
01152 v_mask = 8;
01153 i_mask = 7;
01154 sh = 4;
01155 } else {
01156 n_pulses = 4;
01157 v_mask = 4;
01158 i_mask = 3;
01159 sh = 3;
01160 }
01161
01162 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01163 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01164 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01165 s->aw_first_pulse_off[block_idx];
01166 while (fcb->x[fcb->n] < 0)
01167 fcb->x[fcb->n] += fcb->pitch_lag;
01168 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01169 fcb->n++;
01170 }
01171 } else {
01172 int num2 = (val & 0x1FF) >> 1, delta, idx;
01173
01174 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01175 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01176 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01177 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01178 v = (val & 0x200) ? -1.0 : 1.0;
01179
01180 fcb->no_repeat_mask |= 3 << fcb->n;
01181 fcb->x[fcb->n] = idx - delta;
01182 fcb->y[fcb->n] = v;
01183 fcb->x[fcb->n + 1] = idx;
01184 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01185 fcb->n += 2;
01186 }
01187 }
01188
01202 static int pRNG(int frame_cntr, int block_num, int block_size)
01203 {
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214 static const unsigned int div_tbl[9][2] = {
01215 { 8332, 3 * 715827883U },
01216 { 4545, 0 * 390451573U },
01217 { 3124, 11 * 268435456U },
01218 { 2380, 15 * 204522253U },
01219 { 1922, 23 * 165191050U },
01220 { 1612, 23 * 138547333U },
01221 { 1388, 27 * 119304648U },
01222 { 1219, 16 * 104755300U },
01223 { 1086, 39 * 93368855U }
01224 };
01225 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01226 if (x >= 0xFFFF) x -= 0xFFFF;
01227
01228 y = x - 9 * MULH(477218589, x);
01229 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01230
01231 return z % (1000 - block_size);
01232 }
01233
01238 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01239 int block_idx, int size,
01240 const struct frame_type_desc *frame_desc,
01241 float *excitation)
01242 {
01243 float gain;
01244 int n, r_idx;
01245
01246 assert(size <= MAX_FRAMESIZE);
01247
01248
01249 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01250 r_idx = pRNG(s->frame_cntr, block_idx, size);
01251 gain = s->silence_gain;
01252 } else {
01253 r_idx = get_bits(gb, 8);
01254 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01255 }
01256
01257
01258 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01259
01260
01261 for (n = 0; n < size; n++)
01262 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01263 }
01264
01269 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01270 int block_idx, int size,
01271 int block_pitch_sh2,
01272 const struct frame_type_desc *frame_desc,
01273 float *excitation)
01274 {
01275 static const float gain_coeff[6] = {
01276 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01277 };
01278 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01279 int n, idx, gain_weight;
01280 AMRFixed fcb;
01281
01282 assert(size <= MAX_FRAMESIZE / 2);
01283 memset(pulses, 0, sizeof(*pulses) * size);
01284
01285 fcb.pitch_lag = block_pitch_sh2 >> 2;
01286 fcb.pitch_fac = 1.0;
01287 fcb.no_repeat_mask = 0;
01288 fcb.n = 0;
01289
01290
01291
01292 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01293 aw_pulse_set1(s, gb, block_idx, &fcb);
01294 if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
01295
01296
01297
01298 int r_idx = pRNG(s->frame_cntr, block_idx, size);
01299
01300 for (n = 0; n < size; n++)
01301 excitation[n] =
01302 wmavoice_std_codebook[r_idx + n] * s->silence_gain;
01303 skip_bits(gb, 7 + 1);
01304 return;
01305 }
01306 } else {
01307 int offset_nbits = 5 - frame_desc->log_n_blocks;
01308
01309 fcb.no_repeat_mask = -1;
01310
01311
01312 for (n = 0; n < 5; n++) {
01313 float sign;
01314 int pos1, pos2;
01315
01316 sign = get_bits1(gb) ? 1.0 : -1.0;
01317 pos1 = get_bits(gb, offset_nbits);
01318 fcb.x[fcb.n] = n + 5 * pos1;
01319 fcb.y[fcb.n++] = sign;
01320 if (n < frame_desc->dbl_pulses) {
01321 pos2 = get_bits(gb, offset_nbits);
01322 fcb.x[fcb.n] = n + 5 * pos2;
01323 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01324 }
01325 }
01326 }
01327 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01328
01329
01330
01331 idx = get_bits(gb, 7);
01332 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01333 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01334 acb_gain = wmavoice_gain_codebook_acb[idx];
01335 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01336 -2.9957322736 ,
01337 1.6094379124 );
01338
01339 gain_weight = 8 >> frame_desc->log_n_blocks;
01340 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01341 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01342 for (n = 0; n < gain_weight; n++)
01343 s->gain_pred_err[n] = pred_err;
01344
01345
01346 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01347 int len;
01348 for (n = 0; n < size; n += len) {
01349 int next_idx_sh16;
01350 int abs_idx = block_idx * size + n;
01351 int pitch_sh16 = (s->last_pitch_val << 16) +
01352 s->pitch_diff_sh16 * abs_idx;
01353 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01354 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01355 idx = idx_sh16 >> 16;
01356 if (s->pitch_diff_sh16) {
01357 if (s->pitch_diff_sh16 > 0) {
01358 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01359 } else
01360 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01361 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01362 1, size - n);
01363 } else
01364 len = size;
01365
01366 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01367 wmavoice_ipol1_coeffs, 17,
01368 idx, 9, len);
01369 }
01370 } else {
01371 int block_pitch = block_pitch_sh2 >> 2;
01372 idx = block_pitch_sh2 & 3;
01373 if (idx) {
01374 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01375 wmavoice_ipol2_coeffs, 4,
01376 idx, 8, size);
01377 } else
01378 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01379 sizeof(float) * size);
01380 }
01381
01382
01383 ff_weighted_vector_sumf(excitation, excitation, pulses,
01384 acb_gain, fcb_gain, size);
01385 }
01386
01403 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01404 int block_idx, int size,
01405 int block_pitch_sh2,
01406 const double *lsps, const double *prev_lsps,
01407 const struct frame_type_desc *frame_desc,
01408 float *excitation, float *synth)
01409 {
01410 double i_lsps[MAX_LSPS];
01411 float lpcs[MAX_LSPS];
01412 float fac;
01413 int n;
01414
01415 if (frame_desc->acb_type == ACB_TYPE_NONE)
01416 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01417 else
01418 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01419 frame_desc, excitation);
01420
01421
01422 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01423 for (n = 0; n < s->lsps; n++)
01424 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01425 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01426
01427
01428 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01429 }
01430
01446 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01447 float *samples,
01448 const double *lsps, const double *prev_lsps,
01449 float *excitation, float *synth)
01450 {
01451 WMAVoiceContext *s = ctx->priv_data;
01452 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01453 int pitch[MAX_BLOCKS], last_block_pitch;
01454
01455
01456 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01457
01458 if (bd_idx < 0) {
01459 av_log(ctx, AV_LOG_ERROR,
01460 "Invalid frame type VLC code, skipping\n");
01461 return -1;
01462 }
01463
01464 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01465
01466
01467 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01468
01469
01470
01471
01472 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01473 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01474 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01475 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01476 if (s->last_acb_type == ACB_TYPE_NONE ||
01477 20 * abs(cur_pitch_val - s->last_pitch_val) >
01478 (cur_pitch_val + s->last_pitch_val))
01479 s->last_pitch_val = cur_pitch_val;
01480
01481
01482 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01483 int fac = n * 2 + 1;
01484
01485 pitch[n] = (MUL16(fac, cur_pitch_val) +
01486 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01487 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01488 }
01489
01490
01491 s->pitch_diff_sh16 =
01492 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01493 }
01494
01495
01496 switch (frame_descs[bd_idx].fcb_type) {
01497 case FCB_TYPE_SILENCE:
01498 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01499 break;
01500 case FCB_TYPE_AW_PULSES:
01501 aw_parse_coords(s, gb, pitch);
01502 break;
01503 }
01504
01505 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01506 int bl_pitch_sh2;
01507
01508
01509 switch (frame_descs[bd_idx].acb_type) {
01510 case ACB_TYPE_HAMMING: {
01511
01512
01513
01514
01515
01516 int block_pitch,
01517 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01518 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01519 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01520
01521 if (n == 0) {
01522 block_pitch = get_bits(gb, s->block_pitch_nbits);
01523 } else
01524 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01525 get_bits(gb, s->block_delta_pitch_nbits);
01526
01527 last_block_pitch = av_clip(block_pitch,
01528 s->block_delta_pitch_hrange,
01529 s->block_pitch_range -
01530 s->block_delta_pitch_hrange);
01531
01532
01533 if (block_pitch < t1) {
01534 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01535 } else {
01536 block_pitch -= t1;
01537 if (block_pitch < t2) {
01538 bl_pitch_sh2 =
01539 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01540 } else {
01541 block_pitch -= t2;
01542 if (block_pitch < t3) {
01543 bl_pitch_sh2 =
01544 (s->block_conv_table[2] + block_pitch) << 2;
01545 } else
01546 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01547 }
01548 }
01549 pitch[n] = bl_pitch_sh2 >> 2;
01550 break;
01551 }
01552
01553 case ACB_TYPE_ASYMMETRIC: {
01554 bl_pitch_sh2 = pitch[n] << 2;
01555 break;
01556 }
01557
01558 default:
01559 bl_pitch_sh2 = 0;
01560 break;
01561 }
01562
01563 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01564 lsps, prev_lsps, &frame_descs[bd_idx],
01565 &excitation[n * block_nsamples],
01566 &synth[n * block_nsamples]);
01567 }
01568
01569
01570
01571 if (s->do_apf) {
01572 double i_lsps[MAX_LSPS];
01573 float lpcs[MAX_LSPS];
01574
01575 for (n = 0; n < s->lsps; n++)
01576 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01577 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01578 postfilter(s, synth, samples, 80, lpcs,
01579 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01580 frame_descs[bd_idx].fcb_type, pitch[0]);
01581
01582 for (n = 0; n < s->lsps; n++)
01583 i_lsps[n] = cos(lsps[n]);
01584 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01585 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01586 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01587 frame_descs[bd_idx].fcb_type, pitch[0]);
01588 } else
01589 memcpy(samples, synth, 160 * sizeof(synth[0]));
01590
01591
01592 s->frame_cntr++;
01593 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01594 s->last_acb_type = frame_descs[bd_idx].acb_type;
01595 switch (frame_descs[bd_idx].acb_type) {
01596 case ACB_TYPE_NONE:
01597 s->last_pitch_val = 0;
01598 break;
01599 case ACB_TYPE_ASYMMETRIC:
01600 s->last_pitch_val = cur_pitch_val;
01601 break;
01602 case ACB_TYPE_HAMMING:
01603 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01604 break;
01605 }
01606
01607 return 0;
01608 }
01609
01622 static void stabilize_lsps(double *lsps, int num)
01623 {
01624 int n, m, l;
01625
01626
01627
01628
01629 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01630 for (n = 1; n < num; n++)
01631 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01632 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01633
01634
01635
01636 for (n = 1; n < num; n++) {
01637 if (lsps[n] < lsps[n - 1]) {
01638 for (m = 1; m < num; m++) {
01639 double tmp = lsps[m];
01640 for (l = m - 1; l >= 0; l--) {
01641 if (lsps[l] <= tmp) break;
01642 lsps[l + 1] = lsps[l];
01643 }
01644 lsps[l + 1] = tmp;
01645 }
01646 break;
01647 }
01648 }
01649 }
01650
01660 static int check_bits_for_superframe(GetBitContext *orig_gb,
01661 WMAVoiceContext *s)
01662 {
01663 GetBitContext s_gb, *gb = &s_gb;
01664 int n, need_bits, bd_idx;
01665 const struct frame_type_desc *frame_desc;
01666
01667
01668 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01669 skip_bits_long(gb, get_bits_count(orig_gb));
01670 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01671
01672
01673 if (get_bits_left(gb) < 14)
01674 return 1;
01675 if (!get_bits1(gb))
01676 return -1;
01677 if (get_bits1(gb)) skip_bits(gb, 12);
01678 if (s->has_residual_lsps) {
01679 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01680 return 1;
01681 skip_bits_long(gb, s->sframe_lsp_bitsize);
01682 }
01683
01684
01685 for (n = 0; n < MAX_FRAMES; n++) {
01686 int aw_idx_is_ext = 0;
01687
01688 if (!s->has_residual_lsps) {
01689 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01690 skip_bits_long(gb, s->frame_lsp_bitsize);
01691 }
01692 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01693 if (bd_idx < 0)
01694 return -1;
01695 frame_desc = &frame_descs[bd_idx];
01696 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01697 if (get_bits_left(gb) < s->pitch_nbits)
01698 return 1;
01699 skip_bits_long(gb, s->pitch_nbits);
01700 }
01701 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01702 skip_bits(gb, 8);
01703 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01704 int tmp = get_bits(gb, 6);
01705 if (tmp >= 0x36) {
01706 skip_bits(gb, 2);
01707 aw_idx_is_ext = 1;
01708 }
01709 }
01710
01711
01712 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01713 need_bits = s->block_pitch_nbits +
01714 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01715 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01716 need_bits = 2 * !aw_idx_is_ext;
01717 } else
01718 need_bits = 0;
01719 need_bits += frame_desc->frame_size;
01720 if (get_bits_left(gb) < need_bits)
01721 return 1;
01722 skip_bits_long(gb, need_bits);
01723 }
01724
01725 return 0;
01726 }
01727
01748 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01749 {
01750 WMAVoiceContext *s = ctx->priv_data;
01751 GetBitContext *gb = &s->gb, s_gb;
01752 int n, res, n_samples = 480;
01753 double lsps[MAX_FRAMES][MAX_LSPS];
01754 const double *mean_lsf = s->lsps == 16 ?
01755 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01756 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01757 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01758 float *samples;
01759
01760 memcpy(synth, s->synth_history,
01761 s->lsps * sizeof(*synth));
01762 memcpy(excitation, s->excitation_history,
01763 s->history_nsamples * sizeof(*excitation));
01764
01765 if (s->sframe_cache_size > 0) {
01766 gb = &s_gb;
01767 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01768 s->sframe_cache_size = 0;
01769 }
01770
01771 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01772 *got_frame_ptr = 0;
01773 return 1;
01774 }
01775
01776
01777
01778
01779
01780 if (!get_bits1(gb)) {
01781 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01782 return -1;
01783 }
01784
01785
01786 if (get_bits1(gb)) {
01787 if ((n_samples = get_bits(gb, 12)) > 480) {
01788 av_log(ctx, AV_LOG_ERROR,
01789 "Superframe encodes >480 samples (%d), not allowed\n",
01790 n_samples);
01791 return -1;
01792 }
01793 }
01794
01795 if (s->has_residual_lsps) {
01796 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01797
01798 for (n = 0; n < s->lsps; n++)
01799 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01800
01801 if (s->lsps == 10) {
01802 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01803 } else
01804 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01805
01806 for (n = 0; n < s->lsps; n++) {
01807 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01808 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01809 lsps[2][n] += mean_lsf[n];
01810 }
01811 for (n = 0; n < 3; n++)
01812 stabilize_lsps(lsps[n], s->lsps);
01813 }
01814
01815
01816 s->frame.nb_samples = 480;
01817 if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01818 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01819 return res;
01820 }
01821 s->frame.nb_samples = n_samples;
01822 samples = (float *)s->frame.data[0];
01823
01824
01825 for (n = 0; n < 3; n++) {
01826 if (!s->has_residual_lsps) {
01827 int m;
01828
01829 if (s->lsps == 10) {
01830 dequant_lsp10i(gb, lsps[n]);
01831 } else
01832 dequant_lsp16i(gb, lsps[n]);
01833
01834 for (m = 0; m < s->lsps; m++)
01835 lsps[n][m] += mean_lsf[m];
01836 stabilize_lsps(lsps[n], s->lsps);
01837 }
01838
01839 if ((res = synth_frame(ctx, gb, n,
01840 &samples[n * MAX_FRAMESIZE],
01841 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01842 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01843 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01844 *got_frame_ptr = 0;
01845 return res;
01846 }
01847 }
01848
01849
01850
01851
01852 if (get_bits1(gb)) {
01853 res = get_bits(gb, 4);
01854 skip_bits(gb, 10 * (res + 1));
01855 }
01856
01857 *got_frame_ptr = 1;
01858
01859
01860 memcpy(s->prev_lsps, lsps[2],
01861 s->lsps * sizeof(*s->prev_lsps));
01862 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01863 s->lsps * sizeof(*synth));
01864 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01865 s->history_nsamples * sizeof(*excitation));
01866 if (s->do_apf)
01867 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01868 s->history_nsamples * sizeof(*s->zero_exc_pf));
01869
01870 return 0;
01871 }
01872
01880 static int parse_packet_header(WMAVoiceContext *s)
01881 {
01882 GetBitContext *gb = &s->gb;
01883 unsigned int res;
01884
01885 if (get_bits_left(gb) < 11)
01886 return 1;
01887 skip_bits(gb, 4);
01888 s->has_residual_lsps = get_bits1(gb);
01889 do {
01890 res = get_bits(gb, 6);
01891
01892 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01893 return 1;
01894 } while (res == 0x3F);
01895 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01896
01897 return 0;
01898 }
01899
01915 static void copy_bits(PutBitContext *pb,
01916 const uint8_t *data, int size,
01917 GetBitContext *gb, int nbits)
01918 {
01919 int rmn_bytes, rmn_bits;
01920
01921 rmn_bits = rmn_bytes = get_bits_left(gb);
01922 if (rmn_bits < nbits)
01923 return;
01924 if (nbits > pb->size_in_bits - put_bits_count(pb))
01925 return;
01926 rmn_bits &= 7; rmn_bytes >>= 3;
01927 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01928 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01929 avpriv_copy_bits(pb, data + size - rmn_bytes,
01930 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01931 }
01932
01944 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01945 int *got_frame_ptr, AVPacket *avpkt)
01946 {
01947 WMAVoiceContext *s = ctx->priv_data;
01948 GetBitContext *gb = &s->gb;
01949 int size, res, pos;
01950
01951
01952
01953
01954
01955
01956 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01957 if (!size) {
01958 *got_frame_ptr = 0;
01959 return 0;
01960 }
01961 init_get_bits(&s->gb, avpkt->data, size << 3);
01962
01963
01964
01965
01966 if (size == ctx->block_align) {
01967 if ((res = parse_packet_header(s)) < 0)
01968 return res;
01969
01970
01971
01972
01973 if (s->spillover_nbits > 0) {
01974 if (s->sframe_cache_size > 0) {
01975 int cnt = get_bits_count(gb);
01976 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01977 flush_put_bits(&s->pb);
01978 s->sframe_cache_size += s->spillover_nbits;
01979 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01980 *got_frame_ptr) {
01981 cnt += s->spillover_nbits;
01982 s->skip_bits_next = cnt & 7;
01983 *(AVFrame *)data = s->frame;
01984 return cnt >> 3;
01985 } else
01986 skip_bits_long (gb, s->spillover_nbits - cnt +
01987 get_bits_count(gb));
01988 } else
01989 skip_bits_long(gb, s->spillover_nbits);
01990 }
01991 } else if (s->skip_bits_next)
01992 skip_bits(gb, s->skip_bits_next);
01993
01994
01995 s->sframe_cache_size = 0;
01996 s->skip_bits_next = 0;
01997 pos = get_bits_left(gb);
01998 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01999 return res;
02000 } else if (*got_frame_ptr) {
02001 int cnt = get_bits_count(gb);
02002 s->skip_bits_next = cnt & 7;
02003 *(AVFrame *)data = s->frame;
02004 return cnt >> 3;
02005 } else if ((s->sframe_cache_size = pos) > 0) {
02006
02007 init_get_bits(gb, avpkt->data, size << 3);
02008 skip_bits_long(gb, (size << 3) - pos);
02009 assert(get_bits_left(gb) == pos);
02010
02011
02012 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02013 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02014
02015
02016 }
02017
02018 return size;
02019 }
02020
02021 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02022 {
02023 WMAVoiceContext *s = ctx->priv_data;
02024
02025 if (s->do_apf) {
02026 ff_rdft_end(&s->rdft);
02027 ff_rdft_end(&s->irdft);
02028 ff_dct_end(&s->dct);
02029 ff_dct_end(&s->dst);
02030 }
02031
02032 return 0;
02033 }
02034
02035 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02036 {
02037 WMAVoiceContext *s = ctx->priv_data;
02038 int n;
02039
02040 s->postfilter_agc = 0;
02041 s->sframe_cache_size = 0;
02042 s->skip_bits_next = 0;
02043 for (n = 0; n < s->lsps; n++)
02044 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02045 memset(s->excitation_history, 0,
02046 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02047 memset(s->synth_history, 0,
02048 sizeof(*s->synth_history) * MAX_LSPS);
02049 memset(s->gain_pred_err, 0,
02050 sizeof(s->gain_pred_err));
02051
02052 if (s->do_apf) {
02053 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02054 sizeof(*s->synth_filter_out_buf) * s->lsps);
02055 memset(s->dcf_mem, 0,
02056 sizeof(*s->dcf_mem) * 2);
02057 memset(s->zero_exc_pf, 0,
02058 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02059 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02060 }
02061 }
02062
02063 AVCodec ff_wmavoice_decoder = {
02064 .name = "wmavoice",
02065 .type = AVMEDIA_TYPE_AUDIO,
02066 .id = CODEC_ID_WMAVOICE,
02067 .priv_data_size = sizeof(WMAVoiceContext),
02068 .init = wmavoice_decode_init,
02069 .close = wmavoice_decode_end,
02070 .decode = wmavoice_decode_packet,
02071 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02072 .flush = wmavoice_flush,
02073 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02074 };