Libav
|
00001 /* 00002 * Simple free lossless/lossy audio codec 00003 * Copyright (c) 2004 Alex Beregszaszi 00004 * 00005 * This file is part of FFmpeg. 00006 * 00007 * FFmpeg is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * FFmpeg is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with FFmpeg; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 #include "avcodec.h" 00022 #include "get_bits.h" 00023 #include "golomb.h" 00024 00040 #define MAX_CHANNELS 2 00041 00042 #define MID_SIDE 0 00043 #define LEFT_SIDE 1 00044 #define RIGHT_SIDE 2 00045 00046 typedef struct SonicContext { 00047 int lossless, decorrelation; 00048 00049 int num_taps, downsampling; 00050 double quantization; 00051 00052 int channels, samplerate, block_align, frame_size; 00053 00054 int *tap_quant; 00055 int *int_samples; 00056 int *coded_samples[MAX_CHANNELS]; 00057 00058 // for encoding 00059 int *tail; 00060 int tail_size; 00061 int *window; 00062 int window_size; 00063 00064 // for decoding 00065 int *predictor_k; 00066 int *predictor_state[MAX_CHANNELS]; 00067 } SonicContext; 00068 00069 #define LATTICE_SHIFT 10 00070 #define SAMPLE_SHIFT 4 00071 #define LATTICE_FACTOR (1 << LATTICE_SHIFT) 00072 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) 00073 00074 #define BASE_QUANT 0.6 00075 #define RATE_VARIATION 3.0 00076 00077 static inline int divide(int a, int b) 00078 { 00079 if (a < 0) 00080 return -( (-a + b/2)/b ); 00081 else 00082 return (a + b/2)/b; 00083 } 00084 00085 static inline int shift(int a,int b) 00086 { 00087 return (a+(1<<(b-1))) >> b; 00088 } 00089 00090 static inline int shift_down(int a,int b) 00091 { 00092 return (a>>b)+((a<0)?1:0); 00093 } 00094 00095 #if 1 00096 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) 00097 { 00098 int i; 00099 00100 for (i = 0; i < entries; i++) 00101 set_se_golomb(pb, buf[i]); 00102 00103 return 1; 00104 } 00105 00106 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) 00107 { 00108 int i; 00109 00110 for (i = 0; i < entries; i++) 00111 buf[i] = get_se_golomb(gb); 00112 00113 return 1; 00114 } 00115 00116 #else 00117 00118 #define ADAPT_LEVEL 8 00119 00120 static int bits_to_store(uint64_t x) 00121 { 00122 int res = 0; 00123 00124 while(x) 00125 { 00126 res++; 00127 x >>= 1; 00128 } 00129 return res; 00130 } 00131 00132 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) 00133 { 00134 int i, bits; 00135 00136 if (!max) 00137 return; 00138 00139 bits = bits_to_store(max); 00140 00141 for (i = 0; i < bits-1; i++) 00142 put_bits(pb, 1, value & (1 << i)); 00143 00144 if ( (value | (1 << (bits-1))) <= max) 00145 put_bits(pb, 1, value & (1 << (bits-1))); 00146 } 00147 00148 static unsigned int read_uint_max(GetBitContext *gb, int max) 00149 { 00150 int i, bits, value = 0; 00151 00152 if (!max) 00153 return 0; 00154 00155 bits = bits_to_store(max); 00156 00157 for (i = 0; i < bits-1; i++) 00158 if (get_bits1(gb)) 00159 value += 1 << i; 00160 00161 if ( (value | (1<<(bits-1))) <= max) 00162 if (get_bits1(gb)) 00163 value += 1 << (bits-1); 00164 00165 return value; 00166 } 00167 00168 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) 00169 { 00170 int i, j, x = 0, low_bits = 0, max = 0; 00171 int step = 256, pos = 0, dominant = 0, any = 0; 00172 int *copy, *bits; 00173 00174 copy = av_mallocz(4* entries); 00175 if (!copy) 00176 return -1; 00177 00178 if (base_2_part) 00179 { 00180 int energy = 0; 00181 00182 for (i = 0; i < entries; i++) 00183 energy += abs(buf[i]); 00184 00185 low_bits = bits_to_store(energy / (entries * 2)); 00186 if (low_bits > 15) 00187 low_bits = 15; 00188 00189 put_bits(pb, 4, low_bits); 00190 } 00191 00192 for (i = 0; i < entries; i++) 00193 { 00194 put_bits(pb, low_bits, abs(buf[i])); 00195 copy[i] = abs(buf[i]) >> low_bits; 00196 if (copy[i] > max) 00197 max = abs(copy[i]); 00198 } 00199 00200 bits = av_mallocz(4* entries*max); 00201 if (!bits) 00202 { 00203 // av_free(copy); 00204 return -1; 00205 } 00206 00207 for (i = 0; i <= max; i++) 00208 { 00209 for (j = 0; j < entries; j++) 00210 if (copy[j] >= i) 00211 bits[x++] = copy[j] > i; 00212 } 00213 00214 // store bitstream 00215 while (pos < x) 00216 { 00217 int steplet = step >> 8; 00218 00219 if (pos + steplet > x) 00220 steplet = x - pos; 00221 00222 for (i = 0; i < steplet; i++) 00223 if (bits[i+pos] != dominant) 00224 any = 1; 00225 00226 put_bits(pb, 1, any); 00227 00228 if (!any) 00229 { 00230 pos += steplet; 00231 step += step / ADAPT_LEVEL; 00232 } 00233 else 00234 { 00235 int interloper = 0; 00236 00237 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) 00238 interloper++; 00239 00240 // note change 00241 write_uint_max(pb, interloper, (step >> 8) - 1); 00242 00243 pos += interloper + 1; 00244 step -= step / ADAPT_LEVEL; 00245 } 00246 00247 if (step < 256) 00248 { 00249 step = 65536 / step; 00250 dominant = !dominant; 00251 } 00252 } 00253 00254 // store signs 00255 for (i = 0; i < entries; i++) 00256 if (buf[i]) 00257 put_bits(pb, 1, buf[i] < 0); 00258 00259 // av_free(bits); 00260 // av_free(copy); 00261 00262 return 0; 00263 } 00264 00265 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) 00266 { 00267 int i, low_bits = 0, x = 0; 00268 int n_zeros = 0, step = 256, dominant = 0; 00269 int pos = 0, level = 0; 00270 int *bits = av_mallocz(4* entries); 00271 00272 if (!bits) 00273 return -1; 00274 00275 if (base_2_part) 00276 { 00277 low_bits = get_bits(gb, 4); 00278 00279 if (low_bits) 00280 for (i = 0; i < entries; i++) 00281 buf[i] = get_bits(gb, low_bits); 00282 } 00283 00284 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); 00285 00286 while (n_zeros < entries) 00287 { 00288 int steplet = step >> 8; 00289 00290 if (!get_bits1(gb)) 00291 { 00292 for (i = 0; i < steplet; i++) 00293 bits[x++] = dominant; 00294 00295 if (!dominant) 00296 n_zeros += steplet; 00297 00298 step += step / ADAPT_LEVEL; 00299 } 00300 else 00301 { 00302 int actual_run = read_uint_max(gb, steplet-1); 00303 00304 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); 00305 00306 for (i = 0; i < actual_run; i++) 00307 bits[x++] = dominant; 00308 00309 bits[x++] = !dominant; 00310 00311 if (!dominant) 00312 n_zeros += actual_run; 00313 else 00314 n_zeros++; 00315 00316 step -= step / ADAPT_LEVEL; 00317 } 00318 00319 if (step < 256) 00320 { 00321 step = 65536 / step; 00322 dominant = !dominant; 00323 } 00324 } 00325 00326 // reconstruct unsigned values 00327 n_zeros = 0; 00328 for (i = 0; n_zeros < entries; i++) 00329 { 00330 while(1) 00331 { 00332 if (pos >= entries) 00333 { 00334 pos = 0; 00335 level += 1 << low_bits; 00336 } 00337 00338 if (buf[pos] >= level) 00339 break; 00340 00341 pos++; 00342 } 00343 00344 if (bits[i]) 00345 buf[pos] += 1 << low_bits; 00346 else 00347 n_zeros++; 00348 00349 pos++; 00350 } 00351 // av_free(bits); 00352 00353 // read signs 00354 for (i = 0; i < entries; i++) 00355 if (buf[i] && get_bits1(gb)) 00356 buf[i] = -buf[i]; 00357 00358 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); 00359 00360 return 0; 00361 } 00362 #endif 00363 00364 static void predictor_init_state(int *k, int *state, int order) 00365 { 00366 int i; 00367 00368 for (i = order-2; i >= 0; i--) 00369 { 00370 int j, p, x = state[i]; 00371 00372 for (j = 0, p = i+1; p < order; j++,p++) 00373 { 00374 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); 00375 state[p] += shift_down(k[j]*x, LATTICE_SHIFT); 00376 x = tmp; 00377 } 00378 } 00379 } 00380 00381 static int predictor_calc_error(int *k, int *state, int order, int error) 00382 { 00383 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); 00384 00385 #if 1 00386 int *k_ptr = &(k[order-2]), 00387 *state_ptr = &(state[order-2]); 00388 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) 00389 { 00390 int k_value = *k_ptr, state_value = *state_ptr; 00391 x -= shift_down(k_value * state_value, LATTICE_SHIFT); 00392 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); 00393 } 00394 #else 00395 for (i = order-2; i >= 0; i--) 00396 { 00397 x -= shift_down(k[i] * state[i], LATTICE_SHIFT); 00398 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); 00399 } 00400 #endif 00401 00402 // don't drift too far, to avoid overflows 00403 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); 00404 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); 00405 00406 state[0] = x; 00407 00408 return x; 00409 } 00410 00411 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER 00412 // Heavily modified Levinson-Durbin algorithm which 00413 // copes better with quantization, and calculates the 00414 // actual whitened result as it goes. 00415 00416 static void modified_levinson_durbin(int *window, int window_entries, 00417 int *out, int out_entries, int channels, int *tap_quant) 00418 { 00419 int i; 00420 int *state = av_mallocz(4* window_entries); 00421 00422 memcpy(state, window, 4* window_entries); 00423 00424 for (i = 0; i < out_entries; i++) 00425 { 00426 int step = (i+1)*channels, k, j; 00427 double xx = 0.0, xy = 0.0; 00428 #if 1 00429 int *x_ptr = &(window[step]), *state_ptr = &(state[0]); 00430 j = window_entries - step; 00431 for (;j>=0;j--,x_ptr++,state_ptr++) 00432 { 00433 double x_value = *x_ptr, state_value = *state_ptr; 00434 xx += state_value*state_value; 00435 xy += x_value*state_value; 00436 } 00437 #else 00438 for (j = 0; j <= (window_entries - step); j++); 00439 { 00440 double stepval = window[step+j], stateval = window[j]; 00441 // xx += (double)window[j]*(double)window[j]; 00442 // xy += (double)window[step+j]*(double)window[j]; 00443 xx += stateval*stateval; 00444 xy += stepval*stateval; 00445 } 00446 #endif 00447 if (xx == 0.0) 00448 k = 0; 00449 else 00450 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); 00451 00452 if (k > (LATTICE_FACTOR/tap_quant[i])) 00453 k = LATTICE_FACTOR/tap_quant[i]; 00454 if (-k > (LATTICE_FACTOR/tap_quant[i])) 00455 k = -(LATTICE_FACTOR/tap_quant[i]); 00456 00457 out[i] = k; 00458 k *= tap_quant[i]; 00459 00460 #if 1 00461 x_ptr = &(window[step]); 00462 state_ptr = &(state[0]); 00463 j = window_entries - step; 00464 for (;j>=0;j--,x_ptr++,state_ptr++) 00465 { 00466 int x_value = *x_ptr, state_value = *state_ptr; 00467 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); 00468 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); 00469 } 00470 #else 00471 for (j=0; j <= (window_entries - step); j++) 00472 { 00473 int stepval = window[step+j], stateval=state[j]; 00474 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); 00475 state[j] += shift_down(k * stepval, LATTICE_SHIFT); 00476 } 00477 #endif 00478 } 00479 00480 av_free(state); 00481 } 00482 00483 static inline int code_samplerate(int samplerate) 00484 { 00485 switch (samplerate) 00486 { 00487 case 44100: return 0; 00488 case 22050: return 1; 00489 case 11025: return 2; 00490 case 96000: return 3; 00491 case 48000: return 4; 00492 case 32000: return 5; 00493 case 24000: return 6; 00494 case 16000: return 7; 00495 case 8000: return 8; 00496 } 00497 return -1; 00498 } 00499 00500 static av_cold int sonic_encode_init(AVCodecContext *avctx) 00501 { 00502 SonicContext *s = avctx->priv_data; 00503 PutBitContext pb; 00504 int i, version = 0; 00505 00506 if (avctx->channels > MAX_CHANNELS) 00507 { 00508 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); 00509 return -1; /* only stereo or mono for now */ 00510 } 00511 00512 if (avctx->channels == 2) 00513 s->decorrelation = MID_SIDE; 00514 00515 if (avctx->codec->id == CODEC_ID_SONIC_LS) 00516 { 00517 s->lossless = 1; 00518 s->num_taps = 32; 00519 s->downsampling = 1; 00520 s->quantization = 0.0; 00521 } 00522 else 00523 { 00524 s->num_taps = 128; 00525 s->downsampling = 2; 00526 s->quantization = 1.0; 00527 } 00528 00529 // max tap 2048 00530 if ((s->num_taps < 32) || (s->num_taps > 1024) || 00531 ((s->num_taps>>5)<<5 != s->num_taps)) 00532 { 00533 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); 00534 return -1; 00535 } 00536 00537 // generate taps 00538 s->tap_quant = av_mallocz(4* s->num_taps); 00539 for (i = 0; i < s->num_taps; i++) 00540 s->tap_quant[i] = (int)(sqrt(i+1)); 00541 00542 s->channels = avctx->channels; 00543 s->samplerate = avctx->sample_rate; 00544 00545 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling; 00546 s->frame_size = s->channels*s->block_align*s->downsampling; 00547 00548 s->tail = av_mallocz(4* s->num_taps*s->channels); 00549 if (!s->tail) 00550 return -1; 00551 s->tail_size = s->num_taps*s->channels; 00552 00553 s->predictor_k = av_mallocz(4 * s->num_taps); 00554 if (!s->predictor_k) 00555 return -1; 00556 00557 for (i = 0; i < s->channels; i++) 00558 { 00559 s->coded_samples[i] = av_mallocz(4* s->block_align); 00560 if (!s->coded_samples[i]) 00561 return -1; 00562 } 00563 00564 s->int_samples = av_mallocz(4* s->frame_size); 00565 00566 s->window_size = ((2*s->tail_size)+s->frame_size); 00567 s->window = av_mallocz(4* s->window_size); 00568 if (!s->window) 00569 return -1; 00570 00571 avctx->extradata = av_mallocz(16); 00572 if (!avctx->extradata) 00573 return -1; 00574 init_put_bits(&pb, avctx->extradata, 16*8); 00575 00576 put_bits(&pb, 2, version); // version 00577 if (version == 1) 00578 { 00579 put_bits(&pb, 2, s->channels); 00580 put_bits(&pb, 4, code_samplerate(s->samplerate)); 00581 } 00582 put_bits(&pb, 1, s->lossless); 00583 if (!s->lossless) 00584 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision 00585 put_bits(&pb, 2, s->decorrelation); 00586 put_bits(&pb, 2, s->downsampling); 00587 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 00588 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table 00589 00590 flush_put_bits(&pb); 00591 avctx->extradata_size = put_bits_count(&pb)/8; 00592 00593 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", 00594 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); 00595 00596 avctx->coded_frame = avcodec_alloc_frame(); 00597 if (!avctx->coded_frame) 00598 return AVERROR(ENOMEM); 00599 avctx->coded_frame->key_frame = 1; 00600 avctx->frame_size = s->block_align*s->downsampling; 00601 00602 return 0; 00603 } 00604 00605 static av_cold int sonic_encode_close(AVCodecContext *avctx) 00606 { 00607 SonicContext *s = avctx->priv_data; 00608 int i; 00609 00610 av_freep(&avctx->coded_frame); 00611 00612 for (i = 0; i < s->channels; i++) 00613 av_free(s->coded_samples[i]); 00614 00615 av_free(s->predictor_k); 00616 av_free(s->tail); 00617 av_free(s->tap_quant); 00618 av_free(s->window); 00619 av_free(s->int_samples); 00620 00621 return 0; 00622 } 00623 00624 static int sonic_encode_frame(AVCodecContext *avctx, 00625 uint8_t *buf, int buf_size, void *data) 00626 { 00627 SonicContext *s = avctx->priv_data; 00628 PutBitContext pb; 00629 int i, j, ch, quant = 0, x = 0; 00630 short *samples = data; 00631 00632 init_put_bits(&pb, buf, buf_size*8); 00633 00634 // short -> internal 00635 for (i = 0; i < s->frame_size; i++) 00636 s->int_samples[i] = samples[i]; 00637 00638 if (!s->lossless) 00639 for (i = 0; i < s->frame_size; i++) 00640 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; 00641 00642 switch(s->decorrelation) 00643 { 00644 case MID_SIDE: 00645 for (i = 0; i < s->frame_size; i += s->channels) 00646 { 00647 s->int_samples[i] += s->int_samples[i+1]; 00648 s->int_samples[i+1] -= shift(s->int_samples[i], 1); 00649 } 00650 break; 00651 case LEFT_SIDE: 00652 for (i = 0; i < s->frame_size; i += s->channels) 00653 s->int_samples[i+1] -= s->int_samples[i]; 00654 break; 00655 case RIGHT_SIDE: 00656 for (i = 0; i < s->frame_size; i += s->channels) 00657 s->int_samples[i] -= s->int_samples[i+1]; 00658 break; 00659 } 00660 00661 memset(s->window, 0, 4* s->window_size); 00662 00663 for (i = 0; i < s->tail_size; i++) 00664 s->window[x++] = s->tail[i]; 00665 00666 for (i = 0; i < s->frame_size; i++) 00667 s->window[x++] = s->int_samples[i]; 00668 00669 for (i = 0; i < s->tail_size; i++) 00670 s->window[x++] = 0; 00671 00672 for (i = 0; i < s->tail_size; i++) 00673 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; 00674 00675 // generate taps 00676 modified_levinson_durbin(s->window, s->window_size, 00677 s->predictor_k, s->num_taps, s->channels, s->tap_quant); 00678 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0) 00679 return -1; 00680 00681 for (ch = 0; ch < s->channels; ch++) 00682 { 00683 x = s->tail_size+ch; 00684 for (i = 0; i < s->block_align; i++) 00685 { 00686 int sum = 0; 00687 for (j = 0; j < s->downsampling; j++, x += s->channels) 00688 sum += s->window[x]; 00689 s->coded_samples[ch][i] = sum; 00690 } 00691 } 00692 00693 // simple rate control code 00694 if (!s->lossless) 00695 { 00696 double energy1 = 0.0, energy2 = 0.0; 00697 for (ch = 0; ch < s->channels; ch++) 00698 { 00699 for (i = 0; i < s->block_align; i++) 00700 { 00701 double sample = s->coded_samples[ch][i]; 00702 energy2 += sample*sample; 00703 energy1 += fabs(sample); 00704 } 00705 } 00706 00707 energy2 = sqrt(energy2/(s->channels*s->block_align)); 00708 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align); 00709 00710 // increase bitrate when samples are like a gaussian distribution 00711 // reduce bitrate when samples are like a two-tailed exponential distribution 00712 00713 if (energy2 > energy1) 00714 energy2 += (energy2-energy1)*RATE_VARIATION; 00715 00716 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); 00717 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); 00718 00719 if (quant < 1) 00720 quant = 1; 00721 if (quant > 65535) 00722 quant = 65535; 00723 00724 set_ue_golomb(&pb, quant); 00725 00726 quant *= SAMPLE_FACTOR; 00727 } 00728 00729 // write out coded samples 00730 for (ch = 0; ch < s->channels; ch++) 00731 { 00732 if (!s->lossless) 00733 for (i = 0; i < s->block_align; i++) 00734 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant); 00735 00736 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0) 00737 return -1; 00738 } 00739 00740 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); 00741 00742 flush_put_bits(&pb); 00743 return (put_bits_count(&pb)+7)/8; 00744 } 00745 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ 00746 00747 #if CONFIG_SONIC_DECODER 00748 static const int samplerate_table[] = 00749 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; 00750 00751 static av_cold int sonic_decode_init(AVCodecContext *avctx) 00752 { 00753 SonicContext *s = avctx->priv_data; 00754 GetBitContext gb; 00755 int i, version; 00756 00757 s->channels = avctx->channels; 00758 s->samplerate = avctx->sample_rate; 00759 00760 if (!avctx->extradata) 00761 { 00762 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); 00763 return -1; 00764 } 00765 00766 init_get_bits(&gb, avctx->extradata, avctx->extradata_size); 00767 00768 version = get_bits(&gb, 2); 00769 if (version > 1) 00770 { 00771 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); 00772 return -1; 00773 } 00774 00775 if (version == 1) 00776 { 00777 s->channels = get_bits(&gb, 2); 00778 s->samplerate = samplerate_table[get_bits(&gb, 4)]; 00779 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", 00780 s->channels, s->samplerate); 00781 } 00782 00783 if (s->channels > MAX_CHANNELS) 00784 { 00785 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); 00786 return -1; 00787 } 00788 00789 s->lossless = get_bits1(&gb); 00790 if (!s->lossless) 00791 skip_bits(&gb, 3); // XXX FIXME 00792 s->decorrelation = get_bits(&gb, 2); 00793 00794 s->downsampling = get_bits(&gb, 2); 00795 s->num_taps = (get_bits(&gb, 5)+1)<<5; 00796 if (get_bits1(&gb)) // XXX FIXME 00797 av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); 00798 00799 s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling; 00800 s->frame_size = s->channels*s->block_align*s->downsampling; 00801 // avctx->frame_size = s->block_align; 00802 00803 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", 00804 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); 00805 00806 // generate taps 00807 s->tap_quant = av_mallocz(4* s->num_taps); 00808 for (i = 0; i < s->num_taps; i++) 00809 s->tap_quant[i] = (int)(sqrt(i+1)); 00810 00811 s->predictor_k = av_mallocz(4* s->num_taps); 00812 00813 for (i = 0; i < s->channels; i++) 00814 { 00815 s->predictor_state[i] = av_mallocz(4* s->num_taps); 00816 if (!s->predictor_state[i]) 00817 return -1; 00818 } 00819 00820 for (i = 0; i < s->channels; i++) 00821 { 00822 s->coded_samples[i] = av_mallocz(4* s->block_align); 00823 if (!s->coded_samples[i]) 00824 return -1; 00825 } 00826 s->int_samples = av_mallocz(4* s->frame_size); 00827 00828 avctx->sample_fmt = SAMPLE_FMT_S16; 00829 return 0; 00830 } 00831 00832 static av_cold int sonic_decode_close(AVCodecContext *avctx) 00833 { 00834 SonicContext *s = avctx->priv_data; 00835 int i; 00836 00837 av_free(s->int_samples); 00838 av_free(s->tap_quant); 00839 av_free(s->predictor_k); 00840 00841 for (i = 0; i < s->channels; i++) 00842 { 00843 av_free(s->predictor_state[i]); 00844 av_free(s->coded_samples[i]); 00845 } 00846 00847 return 0; 00848 } 00849 00850 static int sonic_decode_frame(AVCodecContext *avctx, 00851 void *data, int *data_size, 00852 AVPacket *avpkt) 00853 { 00854 const uint8_t *buf = avpkt->data; 00855 int buf_size = avpkt->size; 00856 SonicContext *s = avctx->priv_data; 00857 GetBitContext gb; 00858 int i, quant, ch, j; 00859 short *samples = data; 00860 00861 if (buf_size == 0) return 0; 00862 00863 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); 00864 00865 init_get_bits(&gb, buf, buf_size*8); 00866 00867 intlist_read(&gb, s->predictor_k, s->num_taps, 0); 00868 00869 // dequantize 00870 for (i = 0; i < s->num_taps; i++) 00871 s->predictor_k[i] *= s->tap_quant[i]; 00872 00873 if (s->lossless) 00874 quant = 1; 00875 else 00876 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR; 00877 00878 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); 00879 00880 for (ch = 0; ch < s->channels; ch++) 00881 { 00882 int x = ch; 00883 00884 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); 00885 00886 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1); 00887 00888 for (i = 0; i < s->block_align; i++) 00889 { 00890 for (j = 0; j < s->downsampling - 1; j++) 00891 { 00892 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); 00893 x += s->channels; 00894 } 00895 00896 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); 00897 x += s->channels; 00898 } 00899 00900 for (i = 0; i < s->num_taps; i++) 00901 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; 00902 } 00903 00904 switch(s->decorrelation) 00905 { 00906 case MID_SIDE: 00907 for (i = 0; i < s->frame_size; i += s->channels) 00908 { 00909 s->int_samples[i+1] += shift(s->int_samples[i], 1); 00910 s->int_samples[i] -= s->int_samples[i+1]; 00911 } 00912 break; 00913 case LEFT_SIDE: 00914 for (i = 0; i < s->frame_size; i += s->channels) 00915 s->int_samples[i+1] += s->int_samples[i]; 00916 break; 00917 case RIGHT_SIDE: 00918 for (i = 0; i < s->frame_size; i += s->channels) 00919 s->int_samples[i] += s->int_samples[i+1]; 00920 break; 00921 } 00922 00923 if (!s->lossless) 00924 for (i = 0; i < s->frame_size; i++) 00925 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); 00926 00927 // internal -> short 00928 for (i = 0; i < s->frame_size; i++) 00929 samples[i] = av_clip_int16(s->int_samples[i]); 00930 00931 align_get_bits(&gb); 00932 00933 *data_size = s->frame_size * 2; 00934 00935 return (get_bits_count(&gb)+7)/8; 00936 } 00937 00938 AVCodec sonic_decoder = { 00939 "sonic", 00940 AVMEDIA_TYPE_AUDIO, 00941 CODEC_ID_SONIC, 00942 sizeof(SonicContext), 00943 sonic_decode_init, 00944 NULL, 00945 sonic_decode_close, 00946 sonic_decode_frame, 00947 .long_name = NULL_IF_CONFIG_SMALL("Sonic"), 00948 }; 00949 #endif /* CONFIG_SONIC_DECODER */ 00950 00951 #if CONFIG_SONIC_ENCODER 00952 AVCodec sonic_encoder = { 00953 "sonic", 00954 AVMEDIA_TYPE_AUDIO, 00955 CODEC_ID_SONIC, 00956 sizeof(SonicContext), 00957 sonic_encode_init, 00958 sonic_encode_frame, 00959 sonic_encode_close, 00960 NULL, 00961 .long_name = NULL_IF_CONFIG_SMALL("Sonic"), 00962 }; 00963 #endif 00964 00965 #if CONFIG_SONIC_LS_ENCODER 00966 AVCodec sonic_ls_encoder = { 00967 "sonicls", 00968 AVMEDIA_TYPE_AUDIO, 00969 CODEC_ID_SONIC_LS, 00970 sizeof(SonicContext), 00971 sonic_encode_init, 00972 sonic_encode_frame, 00973 sonic_encode_close, 00974 NULL, 00975 .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), 00976 }; 00977 #endif