libavcodec/wmavoice.c
Go to the documentation of this file.
00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 #include "avcodec.h"
00032 #include "get_bits.h"
00033 #include "put_bits.h"
00034 #include "wmavoice_data.h"
00035 #include "celp_math.h"
00036 #include "celp_filters.h"
00037 #include "acelp_vectors.h"
00038 #include "acelp_filters.h"
00039 #include "lsp.h"
00040 #include "libavutil/lzo.h"
00041 #include "dct.h"
00042 #include "rdft.h"
00043 #include "sinewin.h"
00044 
00045 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
00046 #define MAX_LSPS             16  ///< maximum filter order
00047 #define MAX_LSPS_ALIGN16     16  ///< same as #MAX_LSPS; needs to be multiple
00048 
00049 #define MAX_FRAMES           3   ///< maximum number of frames per superframe
00050 #define MAX_FRAMESIZE        160 ///< maximum number of samples per frame
00051 #define MAX_SIGNAL_HISTORY   416 ///< maximum excitation signal history
00052 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00053 
00054 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00055 
00056 #define VLC_NBITS            6   ///< number of bits to read per VLC iteration
00057 
00061 static VLC frame_type_vlc;
00062 
00066 enum {
00067     ACB_TYPE_NONE       = 0, 
00068     ACB_TYPE_ASYMMETRIC = 1, 
00069 
00070 
00071 
00072 
00073     ACB_TYPE_HAMMING    = 2  
00074 
00075 
00076 };
00077 
00081 enum {
00082     FCB_TYPE_SILENCE    = 0, 
00083 
00084 
00085     FCB_TYPE_HARDCODED  = 1, 
00086 
00087     FCB_TYPE_AW_PULSES  = 2, 
00088 
00089     FCB_TYPE_EXC_PULSES = 3, 
00090 
00091 
00092 };
00093 
00097 static const struct frame_type_desc {
00098     uint8_t n_blocks;     
00099 
00100     uint8_t log_n_blocks; 
00101     uint8_t acb_type;     
00102     uint8_t fcb_type;     
00103     uint8_t dbl_pulses;   
00104 
00105 
00106     uint16_t frame_size;  
00107 
00108 } frame_descs[17] = {
00109     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00110     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00112     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00113     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00115     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00116     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00118     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00119     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00121     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00122     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00124     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00125     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00126 };
00127 
00131 typedef struct {
00136     AVFrame frame;
00137     GetBitContext gb;             
00138 
00139 
00140 
00141     int8_t vbm_tree[25];          
00142 
00143     int spillover_bitsize;        
00144 
00145 
00146     int history_nsamples;         
00147 
00148 
00149     /* postfilter specific values */
00150     int do_apf;                   
00151 
00152     int denoise_strength;         
00153 
00154     int denoise_tilt_corr;        
00155 
00156     int dc_level;                 
00157 
00158 
00159     int lsps;                     
00160     int lsp_q_mode;               
00161     int lsp_def_mode;             
00162 
00163     int frame_lsp_bitsize;        
00164 
00165     int sframe_lsp_bitsize;       
00166 
00167 
00168     int min_pitch_val;            
00169     int max_pitch_val;            
00170     int pitch_nbits;              
00171 
00172     int block_pitch_nbits;        
00173 
00174     int block_pitch_range;        
00175     int block_delta_pitch_nbits;  
00176 
00177 
00178 
00179     int block_delta_pitch_hrange; 
00180 
00181     uint16_t block_conv_table[4]; 
00182 
00183 
00193     int spillover_nbits;          
00194 
00195 
00196 
00197     int has_residual_lsps;        
00198 
00199 
00200 
00201 
00202     int skip_bits_next;           
00203 
00204 
00205 
00206     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00209     int sframe_cache_size;        
00210 
00211 
00212 
00213 
00214     PutBitContext pb;             
00215 
00225     double prev_lsps[MAX_LSPS];   
00226 
00227     int last_pitch_val;           
00228     int last_acb_type;            
00229     int pitch_diff_sh16;          
00230 
00231     float silence_gain;           
00232 
00233     int aw_idx_is_ext;            
00234 
00235     int aw_pulse_range;           
00236 
00237 
00238 
00239 
00240 
00241     int aw_n_pulses[2];           
00242 
00243 
00244     int aw_first_pulse_off[2];    
00245 
00246     int aw_next_pulse_off_cache;  
00247 
00248 
00249 
00250 
00251 
00252     int frame_cntr;               
00253 
00254     float gain_pred_err[6];       
00255     float excitation_history[MAX_SIGNAL_HISTORY];
00259     float synth_history[MAX_LSPS]; 
00260 
00269     RDFTContext rdft, irdft;      
00270 
00271     DCTContext dct, dst;          
00272 
00273     float sin[511], cos[511];     
00274 
00275     float postfilter_agc;         
00276 
00277     float dcf_mem[2];             
00278     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00281     float denoise_filter_cache[MAX_FRAMESIZE];
00282     int   denoise_filter_cache_size; 
00283     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00285     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00287     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00290 
00293 } WMAVoiceContext;
00294 
00304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00305 {
00306     static const uint8_t bits[] = {
00307          2,  2,  2,  4,  4,  4,
00308          6,  6,  6,  8,  8,  8,
00309         10, 10, 10, 12, 12, 12,
00310         14, 14, 14, 14
00311     };
00312     static const uint16_t codes[] = {
00313           0x0000, 0x0001, 0x0002,        //              00/01/10
00314           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00315           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00316           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00317           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00318           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00319           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00320     };
00321     int cntr[8], n, res;
00322 
00323     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00324     memset(cntr,     0,    sizeof(cntr));
00325     for (n = 0; n < 17; n++) {
00326         res = get_bits(gb, 3);
00327         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00328             return -1;
00329         vbm_tree[res * 3 + cntr[res]++] = n;
00330     }
00331     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00332                     bits, 1, 1, codes, 2, 2, 132);
00333     return 0;
00334 }
00335 
00339 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00340 {
00341     int n, flags, pitch_range, lsp16_flag;
00342     WMAVoiceContext *s = ctx->priv_data;
00343 
00352     if (ctx->extradata_size != 46) {
00353         av_log(ctx, AV_LOG_ERROR,
00354                "Invalid extradata size %d (should be 46)\n",
00355                ctx->extradata_size);
00356         return -1;
00357     }
00358     flags                = AV_RL32(ctx->extradata + 18);
00359     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00360     s->do_apf            =    flags & 0x1;
00361     if (s->do_apf) {
00362         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00363         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00364         ff_dct_init(&s->dct,  6, DCT_I);
00365         ff_dct_init(&s->dst,  6, DST_I);
00366 
00367         ff_sine_window_init(s->cos, 256);
00368         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00369         for (n = 0; n < 255; n++) {
00370             s->sin[n]       = -s->sin[510 - n];
00371             s->cos[510 - n] =  s->cos[n];
00372         }
00373     }
00374     s->denoise_strength  =   (flags >> 2) & 0xF;
00375     if (s->denoise_strength >= 12) {
00376         av_log(ctx, AV_LOG_ERROR,
00377                "Invalid denoise filter strength %d (max=11)\n",
00378                s->denoise_strength);
00379         return -1;
00380     }
00381     s->denoise_tilt_corr = !!(flags & 0x40);
00382     s->dc_level          =   (flags >> 7) & 0xF;
00383     s->lsp_q_mode        = !!(flags & 0x2000);
00384     s->lsp_def_mode      = !!(flags & 0x4000);
00385     lsp16_flag           =    flags & 0x1000;
00386     if (lsp16_flag) {
00387         s->lsps               = 16;
00388         s->frame_lsp_bitsize  = 34;
00389         s->sframe_lsp_bitsize = 60;
00390     } else {
00391         s->lsps               = 10;
00392         s->frame_lsp_bitsize  = 24;
00393         s->sframe_lsp_bitsize = 48;
00394     }
00395     for (n = 0; n < s->lsps; n++)
00396         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00397 
00398     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00399     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00400         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00401         return -1;
00402     }
00403 
00404     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00405     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00406     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00407     if (pitch_range <= 0) {
00408         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00409         return -1;
00410     }
00411     s->pitch_nbits      = av_ceil_log2(pitch_range);
00412     s->last_pitch_val   = 40;
00413     s->last_acb_type    = ACB_TYPE_NONE;
00414     s->history_nsamples = s->max_pitch_val + 8;
00415 
00416     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00417         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00418             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00419 
00420         av_log(ctx, AV_LOG_ERROR,
00421                "Unsupported samplerate %d (min=%d, max=%d)\n",
00422                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00423 
00424         return -1;
00425     }
00426 
00427     s->block_conv_table[0]      = s->min_pitch_val;
00428     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00429     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00430     s->block_conv_table[3]      = s->max_pitch_val - 1;
00431     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00432     if (s->block_delta_pitch_hrange <= 0) {
00433         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00434         return -1;
00435     }
00436     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00437     s->block_pitch_range        = s->block_conv_table[2] +
00438                                   s->block_conv_table[3] + 1 +
00439                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00440     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00441 
00442     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00443 
00444     avcodec_get_frame_defaults(&s->frame);
00445     ctx->coded_frame = &s->frame;
00446 
00447     return 0;
00448 }
00449 
00471 static void adaptive_gain_control(float *out, const float *in,
00472                                   const float *speech_synth,
00473                                   int size, float alpha, float *gain_mem)
00474 {
00475     int i;
00476     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00477     float mem = *gain_mem;
00478 
00479     for (i = 0; i < size; i++) {
00480         speech_energy     += fabsf(speech_synth[i]);
00481         postfilter_energy += fabsf(in[i]);
00482     }
00483     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00484 
00485     for (i = 0; i < size; i++) {
00486         mem = alpha * mem + gain_scale_factor;
00487         out[i] = in[i] * mem;
00488     }
00489 
00490     *gain_mem = mem;
00491 }
00492 
00511 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00512                            const float *in, float *out, int size)
00513 {
00514     int n;
00515     float optimal_gain = 0, dot;
00516     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00517                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00518                 *best_hist_ptr;
00519 
00520     /* find best fitting point in history */
00521     do {
00522         dot = ff_dot_productf(in, ptr, size);
00523         if (dot > optimal_gain) {
00524             optimal_gain  = dot;
00525             best_hist_ptr = ptr;
00526         }
00527     } while (--ptr >= end);
00528 
00529     if (optimal_gain <= 0)
00530         return -1;
00531     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00532     if (dot <= 0) // would be 1.0
00533         return -1;
00534 
00535     if (optimal_gain <= dot) {
00536         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00537     } else
00538         dot = 0.625;
00539 
00540     /* actual smoothing */
00541     for (n = 0; n < size; n++)
00542         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00543 
00544     return 0;
00545 }
00546 
00557 static float tilt_factor(const float *lpcs, int n_lpcs)
00558 {
00559     float rh0, rh1;
00560 
00561     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00562     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00563 
00564     return rh1 / rh0;
00565 }
00566 
00570 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00571                                 int fcb_type, float *coeffs, int remainder)
00572 {
00573     float last_coeff, min = 15.0, max = -15.0;
00574     float irange, angle_mul, gain_mul, range, sq;
00575     int n, idx;
00576 
00577     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00578     s->rdft.rdft_calc(&s->rdft, lpcs);
00579 #define log_range(var, assign) do { \
00580         float tmp = log10f(assign);  var = tmp; \
00581         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00582     } while (0)
00583     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00584     for (n = 1; n < 64; n++)
00585         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00586                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00587     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00588 #undef log_range
00589     range    = max - min;
00590     lpcs[64] = last_coeff;
00591 
00592     /* Now, use this spectrum to pick out these frequencies with higher
00593      * (relative) power/energy (which we then take to be "not noise"),
00594      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00595      * These frequencies will be maintained, while others ("noise") will be
00596      * decreased in the filter output. */
00597     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00598     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00599                                                           (5.0 / 14.7));
00600     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00601     for (n = 0; n <= 64; n++) {
00602         float pwr;
00603 
00604         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00605         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00606         lpcs[n] = angle_mul * pwr;
00607 
00608         /* 70.57 =~ 1/log10(1.0331663) */
00609         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00610         if (idx > 127) { // fallback if index falls outside table range
00611             coeffs[n] = wmavoice_energy_table[127] *
00612                         powf(1.0331663, idx - 127);
00613         } else
00614             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00615     }
00616 
00617     /* calculate the Hilbert transform of the gains, which we do (since this
00618      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00619      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00620      * "moment" of the LPCs in this filter. */
00621     s->dct.dct_calc(&s->dct, lpcs);
00622     s->dst.dct_calc(&s->dst, lpcs);
00623 
00624     /* Split out the coefficient indexes into phase/magnitude pairs */
00625     idx = 255 + av_clip(lpcs[64],               -255, 255);
00626     coeffs[0]  = coeffs[0]  * s->cos[idx];
00627     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00628     last_coeff = coeffs[64] * s->cos[idx];
00629     for (n = 63;; n--) {
00630         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633 
00634         if (!--n) break;
00635 
00636         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00637         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00638         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00639     }
00640     coeffs[1] = last_coeff;
00641 
00642     /* move into real domain */
00643     s->irdft.rdft_calc(&s->irdft, coeffs);
00644 
00645     /* tilt correction and normalize scale */
00646     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00647     if (s->denoise_tilt_corr) {
00648         float tilt_mem = 0;
00649 
00650         coeffs[remainder - 1] = 0;
00651         ff_tilt_compensation(&tilt_mem,
00652                              -1.8 * tilt_factor(coeffs, remainder - 1),
00653                              coeffs, remainder);
00654     }
00655     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00656     for (n = 0; n < remainder; n++)
00657         coeffs[n] *= sq;
00658 }
00659 
00686 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00687                            float *synth_pf, int size,
00688                            const float *lpcs)
00689 {
00690     int remainder, lim, n;
00691 
00692     if (fcb_type != FCB_TYPE_SILENCE) {
00693         float *tilted_lpcs = s->tilted_lpcs_pf,
00694               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00695 
00696         tilted_lpcs[0]           = 1.0;
00697         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00698         memset(&tilted_lpcs[s->lsps + 1], 0,
00699                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00700         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00701                              tilted_lpcs, s->lsps + 2);
00702 
00703         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00704          * size is applied to the next frame. All input beyond this is zero,
00705          * and thus all output beyond this will go towards zero, hence we can
00706          * limit to min(size-1, 127-size) as a performance consideration. */
00707         remainder = FFMIN(127 - size, size - 1);
00708         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00709 
00710         /* apply coefficients (in frequency spectrum domain), i.e. complex
00711          * number multiplication */
00712         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00713         s->rdft.rdft_calc(&s->rdft, synth_pf);
00714         s->rdft.rdft_calc(&s->rdft, coeffs);
00715         synth_pf[0] *= coeffs[0];
00716         synth_pf[1] *= coeffs[1];
00717         for (n = 1; n < 64; n++) {
00718             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00719             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00720             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00721         }
00722         s->irdft.rdft_calc(&s->irdft, synth_pf);
00723     }
00724 
00725     /* merge filter output with the history of previous runs */
00726     if (s->denoise_filter_cache_size) {
00727         lim = FFMIN(s->denoise_filter_cache_size, size);
00728         for (n = 0; n < lim; n++)
00729             synth_pf[n] += s->denoise_filter_cache[n];
00730         s->denoise_filter_cache_size -= lim;
00731         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00732                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00733     }
00734 
00735     /* move remainder of filter output into a cache for future runs */
00736     if (fcb_type != FCB_TYPE_SILENCE) {
00737         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00738         for (n = 0; n < lim; n++)
00739             s->denoise_filter_cache[n] += synth_pf[size + n];
00740         if (lim < remainder) {
00741             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00742                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00743             s->denoise_filter_cache_size = remainder;
00744         }
00745     }
00746 }
00747 
00768 static void postfilter(WMAVoiceContext *s, const float *synth,
00769                        float *samples,    int size,
00770                        const float *lpcs, float *zero_exc_pf,
00771                        int fcb_type,      int pitch)
00772 {
00773     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00774           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00775           *synth_filter_in = zero_exc_pf;
00776 
00777     assert(size <= MAX_FRAMESIZE / 2);
00778 
00779     /* generate excitation from input signal */
00780     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00781 
00782     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00783         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00784         synth_filter_in = synth_filter_in_buf;
00785 
00786     /* re-synthesize speech after smoothening, and keep history */
00787     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00788                                  synth_filter_in, size, s->lsps);
00789     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00790            sizeof(synth_pf[0]) * s->lsps);
00791 
00792     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00793 
00794     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00795                           &s->postfilter_agc);
00796 
00797     if (s->dc_level > 8) {
00798         /* remove ultra-low frequency DC noise / highpass filter;
00799          * coefficients are identical to those used in SIPR decoding,
00800          * and very closely resemble those used in AMR-NB decoding. */
00801         ff_acelp_apply_order_2_transfer_function(samples, samples,
00802             (const float[2]) { -1.99997,      1.0 },
00803             (const float[2]) { -1.9330735188, 0.93589198496 },
00804             0.93980580475, s->dcf_mem, size);
00805     }
00806 }
00822 static void dequant_lsps(double *lsps, int num,
00823                          const uint16_t *values,
00824                          const uint16_t *sizes,
00825                          int n_stages, const uint8_t *table,
00826                          const double *mul_q,
00827                          const double *base_q)
00828 {
00829     int n, m;
00830 
00831     memset(lsps, 0, num * sizeof(*lsps));
00832     for (n = 0; n < n_stages; n++) {
00833         const uint8_t *t_off = &table[values[n] * num];
00834         double base = base_q[n], mul = mul_q[n];
00835 
00836         for (m = 0; m < num; m++)
00837             lsps[m] += base + mul * t_off[m];
00838 
00839         table += sizes[n] * num;
00840     }
00841 }
00842 
00854 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00855 {
00856     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00857     static const double mul_lsf[4] = {
00858         5.2187144800e-3,    1.4626986422e-3,
00859         9.6179549166e-4,    1.1325736225e-3
00860     };
00861     static const double base_lsf[4] = {
00862         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00863         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00864     };
00865     uint16_t v[4];
00866 
00867     v[0] = get_bits(gb, 8);
00868     v[1] = get_bits(gb, 6);
00869     v[2] = get_bits(gb, 5);
00870     v[3] = get_bits(gb, 5);
00871 
00872     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00873                  mul_lsf, base_lsf);
00874 }
00875 
00880 static void dequant_lsp10r(GetBitContext *gb,
00881                            double *i_lsps, const double *old,
00882                            double *a1, double *a2, int q_mode)
00883 {
00884     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00885     static const double mul_lsf[3] = {
00886         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00887     };
00888     static const double base_lsf[3] = {
00889         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00890     };
00891     const float (*ipol_tab)[2][10] = q_mode ?
00892         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00893     uint16_t interpol, v[3];
00894     int n;
00895 
00896     dequant_lsp10i(gb, i_lsps);
00897 
00898     interpol = get_bits(gb, 5);
00899     v[0]     = get_bits(gb, 7);
00900     v[1]     = get_bits(gb, 6);
00901     v[2]     = get_bits(gb, 6);
00902 
00903     for (n = 0; n < 10; n++) {
00904         double delta = old[n] - i_lsps[n];
00905         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00906         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00907     }
00908 
00909     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00910                  mul_lsf, base_lsf);
00911 }
00912 
00916 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00917 {
00918     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00919     static const double mul_lsf[5] = {
00920         3.3439586280e-3,    6.9908173703e-4,
00921         3.3216608306e-3,    1.0334960326e-3,
00922         3.1899104283e-3
00923     };
00924     static const double base_lsf[5] = {
00925         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00926         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00927         M_PI * -1.29816e-1
00928     };
00929     uint16_t v[5];
00930 
00931     v[0] = get_bits(gb, 8);
00932     v[1] = get_bits(gb, 6);
00933     v[2] = get_bits(gb, 7);
00934     v[3] = get_bits(gb, 6);
00935     v[4] = get_bits(gb, 7);
00936 
00937     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00938                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00939     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00940                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00941     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00942                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00943 }
00944 
00949 static void dequant_lsp16r(GetBitContext *gb,
00950                            double *i_lsps, const double *old,
00951                            double *a1, double *a2, int q_mode)
00952 {
00953     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00954     static const double mul_lsf[3] = {
00955         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00956     };
00957     static const double base_lsf[3] = {
00958         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00959     };
00960     const float (*ipol_tab)[2][16] = q_mode ?
00961         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00962     uint16_t interpol, v[3];
00963     int n;
00964 
00965     dequant_lsp16i(gb, i_lsps);
00966 
00967     interpol = get_bits(gb, 5);
00968     v[0]     = get_bits(gb, 7);
00969     v[1]     = get_bits(gb, 7);
00970     v[2]     = get_bits(gb, 7);
00971 
00972     for (n = 0; n < 16; n++) {
00973         double delta = old[n] - i_lsps[n];
00974         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00975         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00976     }
00977 
00978     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00979                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00980     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00981                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00982     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00983                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00984 }
00985 
00999 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01000                             const int *pitch)
01001 {
01002     static const int16_t start_offset[94] = {
01003         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01004          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01005          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01006          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01007          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01008          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01009         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01010         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01011     };
01012     int bits, offset;
01013 
01014     /* position of pulse */
01015     s->aw_idx_is_ext = 0;
01016     if ((bits = get_bits(gb, 6)) >= 54) {
01017         s->aw_idx_is_ext = 1;
01018         bits += (bits - 54) * 3 + get_bits(gb, 2);
01019     }
01020 
01021     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01022      * the distribution of the pulses in each block contained in this frame. */
01023     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01024     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01025     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01026     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01027     offset                  += s->aw_n_pulses[0] * pitch[0];
01028     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01029     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01030 
01031     /* if continuing from a position before the block, reset position to
01032      * start of block (when corrected for the range over which it can be
01033      * spread in aw_pulse_set1()). */
01034     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01035         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01036             s->aw_first_pulse_off[1] -= pitch[1];
01037         if (start_offset[bits] < 0)
01038             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01039                 s->aw_first_pulse_off[0] -= pitch[0];
01040     }
01041 }
01042 
01051 static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01052                          int block_idx, AMRFixed *fcb)
01053 {
01054     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01055     uint16_t *use_mask = use_mask_mem + 2;
01056     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01057      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01058      * of idx are the position of the bit within a particular item in the
01059      * array (0 being the most significant bit, and 15 being the least
01060      * significant bit), and the remainder (>> 4) is the index in the
01061      * use_mask[]-array. This is faster and uses less memory than using a
01062      * 80-byte/80-int array. */
01063     int pulse_off = s->aw_first_pulse_off[block_idx],
01064         pulse_start, n, idx, range, aidx, start_off = 0;
01065 
01066     /* set offset of first pulse to within this block */
01067     if (s->aw_n_pulses[block_idx] > 0)
01068         while (pulse_off + s->aw_pulse_range < 1)
01069             pulse_off += fcb->pitch_lag;
01070 
01071     /* find range per pulse */
01072     if (s->aw_n_pulses[0] > 0) {
01073         if (block_idx == 0) {
01074             range = 32;
01075         } else /* block_idx = 1 */ {
01076             range = 8;
01077             if (s->aw_n_pulses[block_idx] > 0)
01078                 pulse_off = s->aw_next_pulse_off_cache;
01079         }
01080     } else
01081         range = 16;
01082     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01083 
01084     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01085      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01086      * we exclude that range from being pulsed again in this function. */
01087     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01088     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01089     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01090     if (s->aw_n_pulses[block_idx] > 0)
01091         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01092             int excl_range         = s->aw_pulse_range; // always 16 or 24
01093             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01094             int first_sh           = 16 - (idx & 15);
01095             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01096             excl_range            -= first_sh;
01097             if (excl_range >= 16) {
01098                 *use_mask_ptr++    = 0;
01099                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01100             } else
01101                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01102         }
01103 
01104     /* find the 'aidx'th offset that is not excluded */
01105     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01106     for (n = 0; n <= aidx; pulse_start++) {
01107         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01108         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01109             if (use_mask[0])      idx = 0x0F;
01110             else if (use_mask[1]) idx = 0x1F;
01111             else if (use_mask[2]) idx = 0x2F;
01112             else if (use_mask[3]) idx = 0x3F;
01113             else if (use_mask[4]) idx = 0x4F;
01114             else return -1;
01115             idx -= av_log2_16bit(use_mask[idx >> 4]);
01116         }
01117         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01118             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01119             n++;
01120             start_off = idx;
01121         }
01122     }
01123 
01124     fcb->x[fcb->n] = start_off;
01125     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01126     fcb->n++;
01127 
01128     /* set offset for next block, relative to start of that block */
01129     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01130     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01131     return 0;
01132 }
01133 
01141 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01142                           int block_idx, AMRFixed *fcb)
01143 {
01144     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01145     float v;
01146 
01147     if (s->aw_n_pulses[block_idx] > 0) {
01148         int n, v_mask, i_mask, sh, n_pulses;
01149 
01150         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01151             n_pulses = 3;
01152             v_mask   = 8;
01153             i_mask   = 7;
01154             sh       = 4;
01155         } else { // 4 pulses, 1:sign + 2:index each
01156             n_pulses = 4;
01157             v_mask   = 4;
01158             i_mask   = 3;
01159             sh       = 3;
01160         }
01161 
01162         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01163             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01164             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01165                                  s->aw_first_pulse_off[block_idx];
01166             while (fcb->x[fcb->n] < 0)
01167                 fcb->x[fcb->n] += fcb->pitch_lag;
01168             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01169                 fcb->n++;
01170         }
01171     } else {
01172         int num2 = (val & 0x1FF) >> 1, delta, idx;
01173 
01174         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01175         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01176         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01177         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01178         v = (val & 0x200) ? -1.0 : 1.0;
01179 
01180         fcb->no_repeat_mask |= 3 << fcb->n;
01181         fcb->x[fcb->n]       = idx - delta;
01182         fcb->y[fcb->n]       = v;
01183         fcb->x[fcb->n + 1]   = idx;
01184         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01185         fcb->n              += 2;
01186     }
01187 }
01188 
01202 static int pRNG(int frame_cntr, int block_num, int block_size)
01203 {
01204     /* array to simplify the calculation of z:
01205      * y = (x % 9) * 5 + 6;
01206      * z = (49995 * x) / y;
01207      * Since y only has 9 values, we can remove the division by using a
01208      * LUT and using FASTDIV-style divisions. For each of the 9 values
01209      * of y, we can rewrite z as:
01210      * z = x * (49995 / y) + x * ((49995 % y) / y)
01211      * In this table, each col represents one possible value of y, the
01212      * first number is 49995 / y, and the second is the FASTDIV variant
01213      * of 49995 % y / y. */
01214     static const unsigned int div_tbl[9][2] = {
01215         { 8332,  3 * 715827883U }, // y =  6
01216         { 4545,  0 * 390451573U }, // y = 11
01217         { 3124, 11 * 268435456U }, // y = 16
01218         { 2380, 15 * 204522253U }, // y = 21
01219         { 1922, 23 * 165191050U }, // y = 26
01220         { 1612, 23 * 138547333U }, // y = 31
01221         { 1388, 27 * 119304648U }, // y = 36
01222         { 1219, 16 * 104755300U }, // y = 41
01223         { 1086, 39 *  93368855U }  // y = 46
01224     };
01225     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01226     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01227                                     // so this is effectively a modulo (%)
01228     y = x - 9 * MULH(477218589, x); // x % 9
01229     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01230                                     // z = x * 49995 / (y * 5 + 6)
01231     return z % (1000 - block_size);
01232 }
01233 
01238 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01239                                  int block_idx, int size,
01240                                  const struct frame_type_desc *frame_desc,
01241                                  float *excitation)
01242 {
01243     float gain;
01244     int n, r_idx;
01245 
01246     assert(size <= MAX_FRAMESIZE);
01247 
01248     /* Set the offset from which we start reading wmavoice_std_codebook */
01249     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01250         r_idx = pRNG(s->frame_cntr, block_idx, size);
01251         gain  = s->silence_gain;
01252     } else /* FCB_TYPE_HARDCODED */ {
01253         r_idx = get_bits(gb, 8);
01254         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01255     }
01256 
01257     /* Clear gain prediction parameters */
01258     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01259 
01260     /* Apply gain to hardcoded codebook and use that as excitation signal */
01261     for (n = 0; n < size; n++)
01262         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01263 }
01264 
01269 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01270                                 int block_idx, int size,
01271                                 int block_pitch_sh2,
01272                                 const struct frame_type_desc *frame_desc,
01273                                 float *excitation)
01274 {
01275     static const float gain_coeff[6] = {
01276         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01277     };
01278     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01279     int n, idx, gain_weight;
01280     AMRFixed fcb;
01281 
01282     assert(size <= MAX_FRAMESIZE / 2);
01283     memset(pulses, 0, sizeof(*pulses) * size);
01284 
01285     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01286     fcb.pitch_fac      = 1.0;
01287     fcb.no_repeat_mask = 0;
01288     fcb.n              = 0;
01289 
01290     /* For the other frame types, this is where we apply the innovation
01291      * (fixed) codebook pulses of the speech signal. */
01292     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01293         aw_pulse_set1(s, gb, block_idx, &fcb);
01294         if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
01295             /* Conceal the block with silence and return.
01296              * Skip the correct amount of bits to read the next
01297              * block from the correct offset. */
01298             int r_idx = pRNG(s->frame_cntr, block_idx, size);
01299 
01300             for (n = 0; n < size; n++)
01301                 excitation[n] =
01302                     wmavoice_std_codebook[r_idx + n] * s->silence_gain;
01303             skip_bits(gb, 7 + 1);
01304             return;
01305         }
01306     } else /* FCB_TYPE_EXC_PULSES */ {
01307         int offset_nbits = 5 - frame_desc->log_n_blocks;
01308 
01309         fcb.no_repeat_mask = -1;
01310         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01311          * (instead of double) for a subset of pulses */
01312         for (n = 0; n < 5; n++) {
01313             float sign;
01314             int pos1, pos2;
01315 
01316             sign           = get_bits1(gb) ? 1.0 : -1.0;
01317             pos1           = get_bits(gb, offset_nbits);
01318             fcb.x[fcb.n]   = n + 5 * pos1;
01319             fcb.y[fcb.n++] = sign;
01320             if (n < frame_desc->dbl_pulses) {
01321                 pos2           = get_bits(gb, offset_nbits);
01322                 fcb.x[fcb.n]   = n + 5 * pos2;
01323                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01324             }
01325         }
01326     }
01327     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01328 
01329     /* Calculate gain for adaptive & fixed codebook signal.
01330      * see ff_amr_set_fixed_gain(). */
01331     idx = get_bits(gb, 7);
01332     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01333                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01334     acb_gain = wmavoice_gain_codebook_acb[idx];
01335     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01336                         -2.9957322736 /* log(0.05) */,
01337                          1.6094379124 /* log(5.0)  */);
01338 
01339     gain_weight = 8 >> frame_desc->log_n_blocks;
01340     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01341             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01342     for (n = 0; n < gain_weight; n++)
01343         s->gain_pred_err[n] = pred_err;
01344 
01345     /* Calculation of adaptive codebook */
01346     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01347         int len;
01348         for (n = 0; n < size; n += len) {
01349             int next_idx_sh16;
01350             int abs_idx    = block_idx * size + n;
01351             int pitch_sh16 = (s->last_pitch_val << 16) +
01352                              s->pitch_diff_sh16 * abs_idx;
01353             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01354             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01355             idx            = idx_sh16 >> 16;
01356             if (s->pitch_diff_sh16) {
01357                 if (s->pitch_diff_sh16 > 0) {
01358                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01359                 } else
01360                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01361                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01362                               1, size - n);
01363             } else
01364                 len = size;
01365 
01366             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01367                                   wmavoice_ipol1_coeffs, 17,
01368                                   idx, 9, len);
01369         }
01370     } else /* ACB_TYPE_HAMMING */ {
01371         int block_pitch = block_pitch_sh2 >> 2;
01372         idx             = block_pitch_sh2 & 3;
01373         if (idx) {
01374             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01375                                   wmavoice_ipol2_coeffs, 4,
01376                                   idx, 8, size);
01377         } else
01378             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01379                               sizeof(float) * size);
01380     }
01381 
01382     /* Interpolate ACB/FCB and use as excitation signal */
01383     ff_weighted_vector_sumf(excitation, excitation, pulses,
01384                             acb_gain, fcb_gain, size);
01385 }
01386 
01403 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01404                         int block_idx, int size,
01405                         int block_pitch_sh2,
01406                         const double *lsps, const double *prev_lsps,
01407                         const struct frame_type_desc *frame_desc,
01408                         float *excitation, float *synth)
01409 {
01410     double i_lsps[MAX_LSPS];
01411     float lpcs[MAX_LSPS];
01412     float fac;
01413     int n;
01414 
01415     if (frame_desc->acb_type == ACB_TYPE_NONE)
01416         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01417     else
01418         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01419                             frame_desc, excitation);
01420 
01421     /* convert interpolated LSPs to LPCs */
01422     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01423     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01424         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01425     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01426 
01427     /* Speech synthesis */
01428     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01429 }
01430 
01446 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01447                        float *samples,
01448                        const double *lsps, const double *prev_lsps,
01449                        float *excitation, float *synth)
01450 {
01451     WMAVoiceContext *s = ctx->priv_data;
01452     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01453     int pitch[MAX_BLOCKS], last_block_pitch;
01454 
01455     /* Parse frame type ("frame header"), see frame_descs */
01456     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01457 
01458     if (bd_idx < 0) {
01459         av_log(ctx, AV_LOG_ERROR,
01460                "Invalid frame type VLC code, skipping\n");
01461         return -1;
01462     }
01463 
01464     block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01465 
01466     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01467     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01468         /* Pitch is provided per frame, which is interpreted as the pitch of
01469          * the last sample of the last block of this frame. We can interpolate
01470          * the pitch of other blocks (and even pitch-per-sample) by gradually
01471          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01472         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01473         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01474         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01475         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01476         if (s->last_acb_type == ACB_TYPE_NONE ||
01477             20 * abs(cur_pitch_val - s->last_pitch_val) >
01478                 (cur_pitch_val + s->last_pitch_val))
01479             s->last_pitch_val = cur_pitch_val;
01480 
01481         /* pitch per block */
01482         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01483             int fac = n * 2 + 1;
01484 
01485             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01486                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01487                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01488         }
01489 
01490         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01491         s->pitch_diff_sh16 =
01492             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01493     }
01494 
01495     /* Global gain (if silence) and pitch-adaptive window coordinates */
01496     switch (frame_descs[bd_idx].fcb_type) {
01497     case FCB_TYPE_SILENCE:
01498         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01499         break;
01500     case FCB_TYPE_AW_PULSES:
01501         aw_parse_coords(s, gb, pitch);
01502         break;
01503     }
01504 
01505     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01506         int bl_pitch_sh2;
01507 
01508         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01509         switch (frame_descs[bd_idx].acb_type) {
01510         case ACB_TYPE_HAMMING: {
01511             /* Pitch is given per block. Per-block pitches are encoded as an
01512              * absolute value for the first block, and then delta values
01513              * relative to this value) for all subsequent blocks. The scale of
01514              * this pitch value is semi-logaritmic compared to its use in the
01515              * decoder, so we convert it to normal scale also. */
01516             int block_pitch,
01517                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01518                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01519                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01520 
01521             if (n == 0) {
01522                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01523             } else
01524                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01525                                  get_bits(gb, s->block_delta_pitch_nbits);
01526             /* Convert last_ so that any next delta is within _range */
01527             last_block_pitch = av_clip(block_pitch,
01528                                        s->block_delta_pitch_hrange,
01529                                        s->block_pitch_range -
01530                                            s->block_delta_pitch_hrange);
01531 
01532             /* Convert semi-log-style scale back to normal scale */
01533             if (block_pitch < t1) {
01534                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01535             } else {
01536                 block_pitch -= t1;
01537                 if (block_pitch < t2) {
01538                     bl_pitch_sh2 =
01539                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01540                 } else {
01541                     block_pitch -= t2;
01542                     if (block_pitch < t3) {
01543                         bl_pitch_sh2 =
01544                             (s->block_conv_table[2] + block_pitch) << 2;
01545                     } else
01546                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01547                 }
01548             }
01549             pitch[n] = bl_pitch_sh2 >> 2;
01550             break;
01551         }
01552 
01553         case ACB_TYPE_ASYMMETRIC: {
01554             bl_pitch_sh2 = pitch[n] << 2;
01555             break;
01556         }
01557 
01558         default: // ACB_TYPE_NONE has no pitch
01559             bl_pitch_sh2 = 0;
01560             break;
01561         }
01562 
01563         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01564                     lsps, prev_lsps, &frame_descs[bd_idx],
01565                     &excitation[n * block_nsamples],
01566                     &synth[n * block_nsamples]);
01567     }
01568 
01569     /* Averaging projection filter, if applicable. Else, just copy samples
01570      * from synthesis buffer */
01571     if (s->do_apf) {
01572         double i_lsps[MAX_LSPS];
01573         float lpcs[MAX_LSPS];
01574 
01575         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01576             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01577         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01578         postfilter(s, synth, samples, 80, lpcs,
01579                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01580                    frame_descs[bd_idx].fcb_type, pitch[0]);
01581 
01582         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01583             i_lsps[n] = cos(lsps[n]);
01584         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01585         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01586                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01587                    frame_descs[bd_idx].fcb_type, pitch[0]);
01588     } else
01589         memcpy(samples, synth, 160 * sizeof(synth[0]));
01590 
01591     /* Cache values for next frame */
01592     s->frame_cntr++;
01593     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01594     s->last_acb_type = frame_descs[bd_idx].acb_type;
01595     switch (frame_descs[bd_idx].acb_type) {
01596     case ACB_TYPE_NONE:
01597         s->last_pitch_val = 0;
01598         break;
01599     case ACB_TYPE_ASYMMETRIC:
01600         s->last_pitch_val = cur_pitch_val;
01601         break;
01602     case ACB_TYPE_HAMMING:
01603         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01604         break;
01605     }
01606 
01607     return 0;
01608 }
01609 
01622 static void stabilize_lsps(double *lsps, int num)
01623 {
01624     int n, m, l;
01625 
01626     /* set minimum value for first, maximum value for last and minimum
01627      * spacing between LSF values.
01628      * Very similar to ff_set_min_dist_lsf(), but in double. */
01629     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01630     for (n = 1; n < num; n++)
01631         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01632     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01633 
01634     /* reorder (looks like one-time / non-recursed bubblesort).
01635      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01636     for (n = 1; n < num; n++) {
01637         if (lsps[n] < lsps[n - 1]) {
01638             for (m = 1; m < num; m++) {
01639                 double tmp = lsps[m];
01640                 for (l = m - 1; l >= 0; l--) {
01641                     if (lsps[l] <= tmp) break;
01642                     lsps[l + 1] = lsps[l];
01643                 }
01644                 lsps[l + 1] = tmp;
01645             }
01646             break;
01647         }
01648     }
01649 }
01650 
01660 static int check_bits_for_superframe(GetBitContext *orig_gb,
01661                                      WMAVoiceContext *s)
01662 {
01663     GetBitContext s_gb, *gb = &s_gb;
01664     int n, need_bits, bd_idx;
01665     const struct frame_type_desc *frame_desc;
01666 
01667     /* initialize a copy */
01668     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01669     skip_bits_long(gb, get_bits_count(orig_gb));
01670     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01671 
01672     /* superframe header */
01673     if (get_bits_left(gb) < 14)
01674         return 1;
01675     if (!get_bits1(gb))
01676         return -1;                        // WMAPro-in-WMAVoice superframe
01677     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01678     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01679         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01680             return 1;
01681         skip_bits_long(gb, s->sframe_lsp_bitsize);
01682     }
01683 
01684     /* frames */
01685     for (n = 0; n < MAX_FRAMES; n++) {
01686         int aw_idx_is_ext = 0;
01687 
01688         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01689            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01690            skip_bits_long(gb, s->frame_lsp_bitsize);
01691         }
01692         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01693         if (bd_idx < 0)
01694             return -1;                   // invalid frame type VLC code
01695         frame_desc = &frame_descs[bd_idx];
01696         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01697             if (get_bits_left(gb) < s->pitch_nbits)
01698                 return 1;
01699             skip_bits_long(gb, s->pitch_nbits);
01700         }
01701         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01702             skip_bits(gb, 8);
01703         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01704             int tmp = get_bits(gb, 6);
01705             if (tmp >= 0x36) {
01706                 skip_bits(gb, 2);
01707                 aw_idx_is_ext = 1;
01708             }
01709         }
01710 
01711         /* blocks */
01712         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01713             need_bits = s->block_pitch_nbits +
01714                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01715         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01716             need_bits = 2 * !aw_idx_is_ext;
01717         } else
01718             need_bits = 0;
01719         need_bits += frame_desc->frame_size;
01720         if (get_bits_left(gb) < need_bits)
01721             return 1;
01722         skip_bits_long(gb, need_bits);
01723     }
01724 
01725     return 0;
01726 }
01727 
01748 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01749 {
01750     WMAVoiceContext *s = ctx->priv_data;
01751     GetBitContext *gb = &s->gb, s_gb;
01752     int n, res, n_samples = 480;
01753     double lsps[MAX_FRAMES][MAX_LSPS];
01754     const double *mean_lsf = s->lsps == 16 ?
01755         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01756     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01757     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01758     float *samples;
01759 
01760     memcpy(synth,      s->synth_history,
01761            s->lsps             * sizeof(*synth));
01762     memcpy(excitation, s->excitation_history,
01763            s->history_nsamples * sizeof(*excitation));
01764 
01765     if (s->sframe_cache_size > 0) {
01766         gb = &s_gb;
01767         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01768         s->sframe_cache_size = 0;
01769     }
01770 
01771     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01772         *got_frame_ptr = 0;
01773         return 1;
01774     }
01775 
01776     /* First bit is speech/music bit, it differentiates between WMAVoice
01777      * speech samples (the actual codec) and WMAVoice music samples, which
01778      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01779      * the wild yet. */
01780     if (!get_bits1(gb)) {
01781         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01782         return -1;
01783     }
01784 
01785     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01786     if (get_bits1(gb)) {
01787         if ((n_samples = get_bits(gb, 12)) > 480) {
01788             av_log(ctx, AV_LOG_ERROR,
01789                    "Superframe encodes >480 samples (%d), not allowed\n",
01790                    n_samples);
01791             return -1;
01792         }
01793     }
01794     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01795     if (s->has_residual_lsps) {
01796         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01797 
01798         for (n = 0; n < s->lsps; n++)
01799             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01800 
01801         if (s->lsps == 10) {
01802             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01803         } else /* s->lsps == 16 */
01804             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01805 
01806         for (n = 0; n < s->lsps; n++) {
01807             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01808             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01809             lsps[2][n] += mean_lsf[n];
01810         }
01811         for (n = 0; n < 3; n++)
01812             stabilize_lsps(lsps[n], s->lsps);
01813     }
01814 
01815     /* get output buffer */
01816     s->frame.nb_samples = 480;
01817     if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
01818         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01819         return res;
01820     }
01821     s->frame.nb_samples = n_samples;
01822     samples = (float *)s->frame.data[0];
01823 
01824     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01825     for (n = 0; n < 3; n++) {
01826         if (!s->has_residual_lsps) {
01827             int m;
01828 
01829             if (s->lsps == 10) {
01830                 dequant_lsp10i(gb, lsps[n]);
01831             } else /* s->lsps == 16 */
01832                 dequant_lsp16i(gb, lsps[n]);
01833 
01834             for (m = 0; m < s->lsps; m++)
01835                 lsps[n][m] += mean_lsf[m];
01836             stabilize_lsps(lsps[n], s->lsps);
01837         }
01838 
01839         if ((res = synth_frame(ctx, gb, n,
01840                                &samples[n * MAX_FRAMESIZE],
01841                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01842                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01843                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01844             *got_frame_ptr = 0;
01845             return res;
01846         }
01847     }
01848 
01849     /* Statistics? FIXME - we don't check for length, a slight overrun
01850      * will be caught by internal buffer padding, and anything else
01851      * will be skipped, not read. */
01852     if (get_bits1(gb)) {
01853         res = get_bits(gb, 4);
01854         skip_bits(gb, 10 * (res + 1));
01855     }
01856 
01857     *got_frame_ptr = 1;
01858 
01859     /* Update history */
01860     memcpy(s->prev_lsps,           lsps[2],
01861            s->lsps             * sizeof(*s->prev_lsps));
01862     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01863            s->lsps             * sizeof(*synth));
01864     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01865            s->history_nsamples * sizeof(*excitation));
01866     if (s->do_apf)
01867         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01868                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01869 
01870     return 0;
01871 }
01872 
01880 static int parse_packet_header(WMAVoiceContext *s)
01881 {
01882     GetBitContext *gb = &s->gb;
01883     unsigned int res;
01884 
01885     if (get_bits_left(gb) < 11)
01886         return 1;
01887     skip_bits(gb, 4);          // packet sequence number
01888     s->has_residual_lsps = get_bits1(gb);
01889     do {
01890         res = get_bits(gb, 6); // number of superframes per packet
01891                                // (minus first one if there is spillover)
01892         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01893             return 1;
01894     } while (res == 0x3F);
01895     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01896 
01897     return 0;
01898 }
01899 
01915 static void copy_bits(PutBitContext *pb,
01916                       const uint8_t *data, int size,
01917                       GetBitContext *gb, int nbits)
01918 {
01919     int rmn_bytes, rmn_bits;
01920 
01921     rmn_bits = rmn_bytes = get_bits_left(gb);
01922     if (rmn_bits < nbits)
01923         return;
01924     if (nbits > pb->size_in_bits - put_bits_count(pb))
01925         return;
01926     rmn_bits &= 7; rmn_bytes >>= 3;
01927     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01928         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01929     avpriv_copy_bits(pb, data + size - rmn_bytes,
01930                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01931 }
01932 
01944 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01945                                   int *got_frame_ptr, AVPacket *avpkt)
01946 {
01947     WMAVoiceContext *s = ctx->priv_data;
01948     GetBitContext *gb = &s->gb;
01949     int size, res, pos;
01950 
01951     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01952      * header at each ctx->block_align bytes. However, Libav's ASF demuxer
01953      * feeds us ASF packets, which may concatenate multiple "codec" packets
01954      * in a single "muxer" packet, so we artificially emulate that by
01955      * capping the packet size at ctx->block_align. */
01956     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01957     if (!size) {
01958         *got_frame_ptr = 0;
01959         return 0;
01960     }
01961     init_get_bits(&s->gb, avpkt->data, size << 3);
01962 
01963     /* size == ctx->block_align is used to indicate whether we are dealing with
01964      * a new packet or a packet of which we already read the packet header
01965      * previously. */
01966     if (size == ctx->block_align) { // new packet header
01967         if ((res = parse_packet_header(s)) < 0)
01968             return res;
01969 
01970         /* If the packet header specifies a s->spillover_nbits, then we want
01971          * to push out all data of the previous packet (+ spillover) before
01972          * continuing to parse new superframes in the current packet. */
01973         if (s->spillover_nbits > 0) {
01974             if (s->sframe_cache_size > 0) {
01975                 int cnt = get_bits_count(gb);
01976                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01977                 flush_put_bits(&s->pb);
01978                 s->sframe_cache_size += s->spillover_nbits;
01979                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01980                     *got_frame_ptr) {
01981                     cnt += s->spillover_nbits;
01982                     s->skip_bits_next = cnt & 7;
01983                     *(AVFrame *)data = s->frame;
01984                     return cnt >> 3;
01985                 } else
01986                     skip_bits_long (gb, s->spillover_nbits - cnt +
01987                                     get_bits_count(gb)); // resync
01988             } else
01989                 skip_bits_long(gb, s->spillover_nbits);  // resync
01990         }
01991     } else if (s->skip_bits_next)
01992         skip_bits(gb, s->skip_bits_next);
01993 
01994     /* Try parsing superframes in current packet */
01995     s->sframe_cache_size = 0;
01996     s->skip_bits_next = 0;
01997     pos = get_bits_left(gb);
01998     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01999         return res;
02000     } else if (*got_frame_ptr) {
02001         int cnt = get_bits_count(gb);
02002         s->skip_bits_next = cnt & 7;
02003         *(AVFrame *)data = s->frame;
02004         return cnt >> 3;
02005     } else if ((s->sframe_cache_size = pos) > 0) {
02006         /* rewind bit reader to start of last (incomplete) superframe... */
02007         init_get_bits(gb, avpkt->data, size << 3);
02008         skip_bits_long(gb, (size << 3) - pos);
02009         assert(get_bits_left(gb) == pos);
02010 
02011         /* ...and cache it for spillover in next packet */
02012         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02013         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02014         // FIXME bad - just copy bytes as whole and add use the
02015         // skip_bits_next field
02016     }
02017 
02018     return size;
02019 }
02020 
02021 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02022 {
02023     WMAVoiceContext *s = ctx->priv_data;
02024 
02025     if (s->do_apf) {
02026         ff_rdft_end(&s->rdft);
02027         ff_rdft_end(&s->irdft);
02028         ff_dct_end(&s->dct);
02029         ff_dct_end(&s->dst);
02030     }
02031 
02032     return 0;
02033 }
02034 
02035 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02036 {
02037     WMAVoiceContext *s = ctx->priv_data;
02038     int n;
02039 
02040     s->postfilter_agc    = 0;
02041     s->sframe_cache_size = 0;
02042     s->skip_bits_next    = 0;
02043     for (n = 0; n < s->lsps; n++)
02044         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02045     memset(s->excitation_history, 0,
02046            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02047     memset(s->synth_history,      0,
02048            sizeof(*s->synth_history)      * MAX_LSPS);
02049     memset(s->gain_pred_err,      0,
02050            sizeof(s->gain_pred_err));
02051 
02052     if (s->do_apf) {
02053         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02054                sizeof(*s->synth_filter_out_buf) * s->lsps);
02055         memset(s->dcf_mem,              0,
02056                sizeof(*s->dcf_mem)              * 2);
02057         memset(s->zero_exc_pf,          0,
02058                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02059         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02060     }
02061 }
02062 
02063 AVCodec ff_wmavoice_decoder = {
02064     .name           = "wmavoice",
02065     .type           = AVMEDIA_TYPE_AUDIO,
02066     .id             = CODEC_ID_WMAVOICE,
02067     .priv_data_size = sizeof(WMAVoiceContext),
02068     .init           = wmavoice_decode_init,
02069     .close          = wmavoice_decode_end,
02070     .decode         = wmavoice_decode_packet,
02071     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02072     .flush     = wmavoice_flush,
02073     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02074 };