Libav
|
00001 /* 00002 * WMA compatible encoder 00003 * Copyright (c) 2007 Michael Niedermayer 00004 * 00005 * This file is part of FFmpeg. 00006 * 00007 * FFmpeg is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * FFmpeg is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with FFmpeg; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "avcodec.h" 00023 #include "wma.h" 00024 00025 #undef NDEBUG 00026 #include <assert.h> 00027 00028 00029 static int encode_init(AVCodecContext * avctx){ 00030 WMACodecContext *s = avctx->priv_data; 00031 int i, flags1, flags2; 00032 uint8_t *extradata; 00033 00034 s->avctx = avctx; 00035 00036 if(avctx->channels > MAX_CHANNELS) 00037 return -1; 00038 00039 if(avctx->bit_rate < 24*1000) 00040 return -1; 00041 00042 /* extract flag infos */ 00043 flags1 = 0; 00044 flags2 = 1; 00045 if (avctx->codec->id == CODEC_ID_WMAV1) { 00046 extradata= av_malloc(4); 00047 avctx->extradata_size= 4; 00048 AV_WL16(extradata, flags1); 00049 AV_WL16(extradata+2, flags2); 00050 } else if (avctx->codec->id == CODEC_ID_WMAV2) { 00051 extradata= av_mallocz(10); 00052 avctx->extradata_size= 10; 00053 AV_WL32(extradata, flags1); 00054 AV_WL16(extradata+4, flags2); 00055 }else 00056 assert(0); 00057 avctx->extradata= extradata; 00058 s->use_exp_vlc = flags2 & 0x0001; 00059 s->use_bit_reservoir = flags2 & 0x0002; 00060 s->use_variable_block_len = flags2 & 0x0004; 00061 00062 ff_wma_init(avctx, flags2); 00063 00064 /* init MDCT */ 00065 for(i = 0; i < s->nb_block_sizes; i++) 00066 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); 00067 00068 avctx->block_align= 00069 s->block_align= avctx->bit_rate*(int64_t)s->frame_len / (avctx->sample_rate*8); 00070 //av_log(NULL, AV_LOG_ERROR, "%d %d %d %d\n", s->block_align, avctx->bit_rate, s->frame_len, avctx->sample_rate); 00071 avctx->frame_size= s->frame_len; 00072 00073 return 0; 00074 } 00075 00076 00077 static void apply_window_and_mdct(AVCodecContext * avctx, signed short * audio, int len) { 00078 WMACodecContext *s = avctx->priv_data; 00079 int window_index= s->frame_len_bits - s->block_len_bits; 00080 int i, j, channel; 00081 const float * win = s->windows[window_index]; 00082 int window_len = 1 << s->block_len_bits; 00083 float n = window_len/2; 00084 00085 for (channel = 0; channel < avctx->channels; channel++) { 00086 memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len); 00087 j = channel; 00088 for (i = 0; i < len; i++, j += avctx->channels){ 00089 s->output[i+window_len] = audio[j] / n * win[window_len - i - 1]; 00090 s->frame_out[channel][i] = audio[j] / n * win[i]; 00091 } 00092 ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output); 00093 } 00094 } 00095 00096 //FIXME use for decoding too 00097 static void init_exp(WMACodecContext *s, int ch, const int *exp_param){ 00098 int n; 00099 const uint16_t *ptr; 00100 float v, *q, max_scale, *q_end; 00101 00102 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; 00103 q = s->exponents[ch]; 00104 q_end = q + s->block_len; 00105 max_scale = 0; 00106 while (q < q_end) { 00107 /* XXX: use a table */ 00108 v = pow(10, *exp_param++ * (1.0 / 16.0)); 00109 max_scale= FFMAX(max_scale, v); 00110 n = *ptr++; 00111 do { 00112 *q++ = v; 00113 } while (--n); 00114 } 00115 s->max_exponent[ch] = max_scale; 00116 } 00117 00118 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param){ 00119 int last_exp; 00120 const uint16_t *ptr; 00121 float *q, *q_end; 00122 00123 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; 00124 q = s->exponents[ch]; 00125 q_end = q + s->block_len; 00126 if (s->version == 1) { 00127 last_exp= *exp_param++; 00128 assert(last_exp-10 >= 0 && last_exp-10 < 32); 00129 put_bits(&s->pb, 5, last_exp - 10); 00130 q+= *ptr++; 00131 }else 00132 last_exp = 36; 00133 while (q < q_end) { 00134 int exp = *exp_param++; 00135 int code = exp - last_exp + 60; 00136 assert(code >= 0 && code < 120); 00137 put_bits(&s->pb, ff_aac_scalefactor_bits[code], ff_aac_scalefactor_code[code]); 00138 /* XXX: use a table */ 00139 q+= *ptr++; 00140 last_exp= exp; 00141 } 00142 } 00143 00144 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], int total_gain){ 00145 int v, bsize, ch, coef_nb_bits, parse_exponents; 00146 float mdct_norm; 00147 int nb_coefs[MAX_CHANNELS]; 00148 static const int fixed_exp[25]={20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20}; 00149 00150 //FIXME remove duplication relative to decoder 00151 if (s->use_variable_block_len) { 00152 assert(0); //FIXME not implemented 00153 }else{ 00154 /* fixed block len */ 00155 s->next_block_len_bits = s->frame_len_bits; 00156 s->prev_block_len_bits = s->frame_len_bits; 00157 s->block_len_bits = s->frame_len_bits; 00158 } 00159 00160 s->block_len = 1 << s->block_len_bits; 00161 // assert((s->block_pos + s->block_len) <= s->frame_len); 00162 bsize = s->frame_len_bits - s->block_len_bits; 00163 00164 //FIXME factor 00165 v = s->coefs_end[bsize] - s->coefs_start; 00166 for(ch = 0; ch < s->nb_channels; ch++) 00167 nb_coefs[ch] = v; 00168 { 00169 int n4 = s->block_len / 2; 00170 mdct_norm = 1.0 / (float)n4; 00171 if (s->version == 1) { 00172 mdct_norm *= sqrt(n4); 00173 } 00174 } 00175 00176 if (s->nb_channels == 2) { 00177 put_bits(&s->pb, 1, s->ms_stereo= 1); 00178 } 00179 00180 for(ch = 0; ch < s->nb_channels; ch++) { 00181 s->channel_coded[ch] = 1; //FIXME only set channel_coded when needed, instead of always 00182 if (s->channel_coded[ch]) { 00183 init_exp(s, ch, fixed_exp); 00184 } 00185 } 00186 00187 for(ch = 0; ch < s->nb_channels; ch++) { 00188 if (s->channel_coded[ch]) { 00189 WMACoef *coefs1; 00190 float *coefs, *exponents, mult; 00191 int i, n; 00192 00193 coefs1 = s->coefs1[ch]; 00194 exponents = s->exponents[ch]; 00195 mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; 00196 mult *= mdct_norm; 00197 coefs = src_coefs[ch]; 00198 if (s->use_noise_coding && 0) { 00199 assert(0); //FIXME not implemented 00200 } else { 00201 coefs += s->coefs_start; 00202 n = nb_coefs[ch]; 00203 for(i = 0;i < n; i++){ 00204 double t= *coefs++ / (exponents[i] * mult); 00205 if(t<-32768 || t>32767) 00206 return -1; 00207 00208 coefs1[i] = lrint(t); 00209 } 00210 } 00211 } 00212 } 00213 00214 v = 0; 00215 for(ch = 0; ch < s->nb_channels; ch++) { 00216 int a = s->channel_coded[ch]; 00217 put_bits(&s->pb, 1, a); 00218 v |= a; 00219 } 00220 00221 if (!v) 00222 return 1; 00223 00224 for(v= total_gain-1; v>=127; v-= 127) 00225 put_bits(&s->pb, 7, 127); 00226 put_bits(&s->pb, 7, v); 00227 00228 coef_nb_bits= ff_wma_total_gain_to_bits(total_gain); 00229 00230 if (s->use_noise_coding) { 00231 for(ch = 0; ch < s->nb_channels; ch++) { 00232 if (s->channel_coded[ch]) { 00233 int i, n; 00234 n = s->exponent_high_sizes[bsize]; 00235 for(i=0;i<n;i++) { 00236 put_bits(&s->pb, 1, s->high_band_coded[ch][i]= 0); 00237 if (0) 00238 nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; 00239 } 00240 } 00241 } 00242 } 00243 00244 parse_exponents = 1; 00245 if (s->block_len_bits != s->frame_len_bits) { 00246 put_bits(&s->pb, 1, parse_exponents); 00247 } 00248 00249 if (parse_exponents) { 00250 for(ch = 0; ch < s->nb_channels; ch++) { 00251 if (s->channel_coded[ch]) { 00252 if (s->use_exp_vlc) { 00253 encode_exp_vlc(s, ch, fixed_exp); 00254 } else { 00255 assert(0); //FIXME not implemented 00256 // encode_exp_lsp(s, ch); 00257 } 00258 } 00259 } 00260 } else { 00261 assert(0); //FIXME not implemented 00262 } 00263 00264 for(ch = 0; ch < s->nb_channels; ch++) { 00265 if (s->channel_coded[ch]) { 00266 int run, tindex; 00267 WMACoef *ptr, *eptr; 00268 tindex = (ch == 1 && s->ms_stereo); 00269 ptr = &s->coefs1[ch][0]; 00270 eptr = ptr + nb_coefs[ch]; 00271 00272 run=0; 00273 for(;ptr < eptr; ptr++){ 00274 if(*ptr){ 00275 int level= *ptr; 00276 int abs_level= FFABS(level); 00277 int code= 0; 00278 if(abs_level <= s->coef_vlcs[tindex]->max_level){ 00279 if(run < s->coef_vlcs[tindex]->levels[abs_level-1]) 00280 code= run + s->int_table[tindex][abs_level-1]; 00281 } 00282 00283 assert(code < s->coef_vlcs[tindex]->n); 00284 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], s->coef_vlcs[tindex]->huffcodes[code]); 00285 00286 if(code == 0){ 00287 if(1<<coef_nb_bits <= abs_level) 00288 return -1; 00289 00290 00291 //Workaround minor rounding differences for the regression tests, FIXME we should find and replace the problematic float by fixpoint for reg tests 00292 if(abs_level == 0x71B && (s->avctx->flags & CODEC_FLAG_BITEXACT)) abs_level=0x71A; 00293 00294 put_bits(&s->pb, coef_nb_bits, abs_level); 00295 put_bits(&s->pb, s->frame_len_bits, run); 00296 } 00297 put_bits(&s->pb, 1, level < 0); //FIXME the sign is fliped somewhere 00298 run=0; 00299 }else{ 00300 run++; 00301 } 00302 } 00303 if(run) 00304 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], s->coef_vlcs[tindex]->huffcodes[1]); 00305 } 00306 if (s->version == 1 && s->nb_channels >= 2) { 00307 align_put_bits(&s->pb); 00308 } 00309 } 00310 return 0; 00311 } 00312 00313 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], uint8_t *buf, int buf_size, int total_gain){ 00314 init_put_bits(&s->pb, buf, buf_size); 00315 00316 if (s->use_bit_reservoir) { 00317 assert(0);//FIXME not implemented 00318 }else{ 00319 if(encode_block(s, src_coefs, total_gain) < 0) 00320 return INT_MAX; 00321 } 00322 00323 align_put_bits(&s->pb); 00324 00325 return put_bits_count(&s->pb)/8 - s->block_align; 00326 } 00327 00328 static int encode_superframe(AVCodecContext *avctx, 00329 unsigned char *buf, int buf_size, void *data){ 00330 WMACodecContext *s = avctx->priv_data; 00331 short *samples = data; 00332 int i, total_gain; 00333 00334 s->block_len_bits= s->frame_len_bits; //required by non variable block len 00335 s->block_len = 1 << s->block_len_bits; 00336 00337 apply_window_and_mdct(avctx, samples, avctx->frame_size); 00338 00339 if (s->ms_stereo) { 00340 float a, b; 00341 int i; 00342 00343 for(i = 0; i < s->block_len; i++) { 00344 a = s->coefs[0][i]*0.5; 00345 b = s->coefs[1][i]*0.5; 00346 s->coefs[0][i] = a + b; 00347 s->coefs[1][i] = a - b; 00348 } 00349 } 00350 00351 #if 1 00352 total_gain= 128; 00353 for(i=64; i; i>>=1){ 00354 int error= encode_frame(s, s->coefs, buf, buf_size, total_gain-i); 00355 if(error<0) 00356 total_gain-= i; 00357 } 00358 #else 00359 total_gain= 90; 00360 best= encode_frame(s, s->coefs, buf, buf_size, total_gain); 00361 for(i=32; i; i>>=1){ 00362 int scoreL= encode_frame(s, s->coefs, buf, buf_size, total_gain-i); 00363 int scoreR= encode_frame(s, s->coefs, buf, buf_size, total_gain+i); 00364 av_log(NULL, AV_LOG_ERROR, "%d %d %d (%d)\n", scoreL, best, scoreR, total_gain); 00365 if(scoreL < FFMIN(best, scoreR)){ 00366 best = scoreL; 00367 total_gain -= i; 00368 }else if(scoreR < best){ 00369 best = scoreR; 00370 total_gain += i; 00371 } 00372 } 00373 #endif 00374 00375 encode_frame(s, s->coefs, buf, buf_size, total_gain); 00376 assert((put_bits_count(&s->pb) & 7) == 0); 00377 i= s->block_align - (put_bits_count(&s->pb)+7)/8; 00378 assert(i>=0); 00379 while(i--) 00380 put_bits(&s->pb, 8, 'N'); 00381 00382 flush_put_bits(&s->pb); 00383 return put_bits_ptr(&s->pb) - s->pb.buf; 00384 } 00385 00386 AVCodec wmav1_encoder = 00387 { 00388 "wmav1", 00389 AVMEDIA_TYPE_AUDIO, 00390 CODEC_ID_WMAV1, 00391 sizeof(WMACodecContext), 00392 encode_init, 00393 encode_superframe, 00394 ff_wma_end, 00395 .sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, 00396 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), 00397 }; 00398 00399 AVCodec wmav2_encoder = 00400 { 00401 "wmav2", 00402 AVMEDIA_TYPE_AUDIO, 00403 CODEC_ID_WMAV2, 00404 sizeof(WMACodecContext), 00405 encode_init, 00406 encode_superframe, 00407 ff_wma_end, 00408 .sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, 00409 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), 00410 };