libavcodec/g722enc.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) CMU 1993 Computer Science, Speech Group
00003  *                        Chengxiang Lu and Alex Hauptmann
00004  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
00005  * Copyright (c) 2009 Kenan Gillet
00006  * Copyright (c) 2010 Martin Storsjo
00007  *
00008  * This file is part of Libav.
00009  *
00010  * Libav is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * Libav is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with Libav; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "avcodec.h"
00031 #include "g722.h"
00032 
00033 #define FREEZE_INTERVAL 128
00034 
00035 /* This is an arbitrary value. Allowing insanely large values leads to strange
00036    problems, so we limit it to a reasonable value */
00037 #define MAX_FRAME_SIZE 32768
00038 
00039 /* We clip the value of avctx->trellis to prevent data type overflows and
00040    undefined behavior. Using larger values is insanely slow anyway. */
00041 #define MIN_TRELLIS 0
00042 #define MAX_TRELLIS 16
00043 
00044 static av_cold int g722_encode_init(AVCodecContext * avctx)
00045 {
00046     G722Context *c = avctx->priv_data;
00047 
00048     if (avctx->channels != 1) {
00049         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
00050         return AVERROR_INVALIDDATA;
00051     }
00052 
00053     c->band[0].scale_factor = 8;
00054     c->band[1].scale_factor = 2;
00055     c->prev_samples_pos = 22;
00056 
00057     if (avctx->trellis) {
00058         int frontier = 1 << avctx->trellis;
00059         int max_paths = frontier * FREEZE_INTERVAL;
00060         int i;
00061         for (i = 0; i < 2; i++) {
00062             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
00063             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
00064             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
00065         }
00066     }
00067 
00068     if (avctx->frame_size) {
00069         /* validate frame size */
00070         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
00071             int new_frame_size;
00072 
00073             if (avctx->frame_size == 1)
00074                 new_frame_size = 2;
00075             else if (avctx->frame_size > MAX_FRAME_SIZE)
00076                 new_frame_size = MAX_FRAME_SIZE;
00077             else
00078                 new_frame_size = avctx->frame_size - 1;
00079 
00080             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
00081                    "allowed. Using %d instead of %d\n", new_frame_size,
00082                    avctx->frame_size);
00083             avctx->frame_size = new_frame_size;
00084         }
00085     } else {
00086         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
00087            a common packet size for VoIP applications */
00088         avctx->frame_size = 320;
00089     }
00090 
00091     if (avctx->trellis) {
00092         /* validate trellis */
00093         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
00094             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
00095             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
00096                    "allowed. Using %d instead of %d\n", new_trellis,
00097                    avctx->trellis);
00098             avctx->trellis = new_trellis;
00099         }
00100     }
00101 
00102     return 0;
00103 }
00104 
00105 static av_cold int g722_encode_close(AVCodecContext *avctx)
00106 {
00107     G722Context *c = avctx->priv_data;
00108     int i;
00109     for (i = 0; i < 2; i++) {
00110         av_freep(&c->paths[i]);
00111         av_freep(&c->node_buf[i]);
00112         av_freep(&c->nodep_buf[i]);
00113     }
00114     return 0;
00115 }
00116 
00117 static const int16_t low_quant[33] = {
00118       35,   72,  110,  150,  190,  233,  276,  323,
00119      370,  422,  473,  530,  587,  650,  714,  786,
00120      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
00121     1765, 1980, 2195, 2557, 2919
00122 };
00123 
00124 static inline void filter_samples(G722Context *c, const int16_t *samples,
00125                                   int *xlow, int *xhigh)
00126 {
00127     int xout1, xout2;
00128     c->prev_samples[c->prev_samples_pos++] = samples[0];
00129     c->prev_samples[c->prev_samples_pos++] = samples[1];
00130     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
00131     *xlow  = xout1 + xout2 >> 14;
00132     *xhigh = xout1 - xout2 >> 14;
00133     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
00134         memmove(c->prev_samples,
00135                 c->prev_samples + c->prev_samples_pos - 22,
00136                 22 * sizeof(c->prev_samples[0]));
00137         c->prev_samples_pos = 22;
00138     }
00139 }
00140 
00141 static inline int encode_high(const struct G722Band *state, int xhigh)
00142 {
00143     int diff = av_clip_int16(xhigh - state->s_predictor);
00144     int pred = 141 * state->scale_factor >> 8;
00145            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
00146     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
00147 }
00148 
00149 static inline int encode_low(const struct G722Band* state, int xlow)
00150 {
00151     int diff  = av_clip_int16(xlow - state->s_predictor);
00152            /* = diff >= 0 ? diff : -(diff + 1) */
00153     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
00154     int i = 0;
00155     limit = limit + 1 << 10;
00156     if (limit > low_quant[8] * state->scale_factor)
00157         i = 9;
00158     while (i < 29 && limit > low_quant[i] * state->scale_factor)
00159         i++;
00160     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
00161 }
00162 
00163 static void g722_encode_trellis(G722Context *c, int trellis,
00164                                 uint8_t *dst, int nb_samples,
00165                                 const int16_t *samples)
00166 {
00167     int i, j, k;
00168     int frontier = 1 << trellis;
00169     struct TrellisNode **nodes[2];
00170     struct TrellisNode **nodes_next[2];
00171     int pathn[2] = {0, 0}, froze = -1;
00172     struct TrellisPath *p[2];
00173 
00174     for (i = 0; i < 2; i++) {
00175         nodes[i] = c->nodep_buf[i];
00176         nodes_next[i] = c->nodep_buf[i] + frontier;
00177         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
00178         nodes[i][0] = c->node_buf[i] + frontier;
00179         nodes[i][0]->ssd = 0;
00180         nodes[i][0]->path = 0;
00181         nodes[i][0]->state = c->band[i];
00182     }
00183 
00184     for (i = 0; i < nb_samples >> 1; i++) {
00185         int xlow, xhigh;
00186         struct TrellisNode *next[2];
00187         int heap_pos[2] = {0, 0};
00188 
00189         for (j = 0; j < 2; j++) {
00190             next[j] = c->node_buf[j] + frontier*(i & 1);
00191             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
00192         }
00193 
00194         filter_samples(c, &samples[2*i], &xlow, &xhigh);
00195 
00196         for (j = 0; j < frontier && nodes[0][j]; j++) {
00197             /* Only k >> 2 affects the future adaptive state, therefore testing
00198              * small steps that don't change k >> 2 is useless, the original
00199              * value from encode_low is better than them. Since we step k
00200              * in steps of 4, make sure range is a multiple of 4, so that
00201              * we don't miss the original value from encode_low. */
00202             int range = j < frontier/2 ? 4 : 0;
00203             struct TrellisNode *cur_node = nodes[0][j];
00204 
00205             int ilow = encode_low(&cur_node->state, xlow);
00206 
00207             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
00208                 int decoded, dec_diff, pos;
00209                 uint32_t ssd;
00210                 struct TrellisNode* node;
00211 
00212                 if (k < 0)
00213                     continue;
00214 
00215                 decoded = av_clip((cur_node->state.scale_factor *
00216                                   ff_g722_low_inv_quant6[k] >> 10)
00217                                 + cur_node->state.s_predictor, -16384, 16383);
00218                 dec_diff = xlow - decoded;
00219 
00220 #define STORE_NODE(index, UPDATE, VALUE)\
00221                 ssd = cur_node->ssd + dec_diff*dec_diff;\
00222                 /* Check for wraparound. Using 64 bit ssd counters would \
00223                  * be simpler, but is slower on x86 32 bit. */\
00224                 if (ssd < cur_node->ssd)\
00225                     continue;\
00226                 if (heap_pos[index] < frontier) {\
00227                     pos = heap_pos[index]++;\
00228                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
00229                     node = nodes_next[index][pos] = next[index]++;\
00230                     node->path = pathn[index]++;\
00231                 } else {\
00232                     /* Try to replace one of the leaf nodes with the new \
00233                      * one, but not always testing the same leaf position */\
00234                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
00235                     if (ssd >= nodes_next[index][pos]->ssd)\
00236                         continue;\
00237                     heap_pos[index]++;\
00238                     node = nodes_next[index][pos];\
00239                 }\
00240                 node->ssd = ssd;\
00241                 node->state = cur_node->state;\
00242                 UPDATE;\
00243                 c->paths[index][node->path].value = VALUE;\
00244                 c->paths[index][node->path].prev = cur_node->path;\
00245                 /* Sift the newly inserted node up in the heap to restore \
00246                  * the heap property */\
00247                 while (pos > 0) {\
00248                     int parent = (pos - 1) >> 1;\
00249                     if (nodes_next[index][parent]->ssd <= ssd)\
00250                         break;\
00251                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
00252                                                 nodes_next[index][pos]);\
00253                     pos = parent;\
00254                 }
00255                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
00256             }
00257         }
00258 
00259         for (j = 0; j < frontier && nodes[1][j]; j++) {
00260             int ihigh;
00261             struct TrellisNode *cur_node = nodes[1][j];
00262 
00263             /* We don't try to get any initial guess for ihigh via
00264              * encode_high - since there's only 4 possible values, test
00265              * them all. Testing all of these gives a much, much larger
00266              * gain than testing a larger range around ilow. */
00267             for (ihigh = 0; ihigh < 4; ihigh++) {
00268                 int dhigh, decoded, dec_diff, pos;
00269                 uint32_t ssd;
00270                 struct TrellisNode* node;
00271 
00272                 dhigh = cur_node->state.scale_factor *
00273                         ff_g722_high_inv_quant[ihigh] >> 10;
00274                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
00275                                   -16384, 16383);
00276                 dec_diff = xhigh - decoded;
00277 
00278                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
00279             }
00280         }
00281 
00282         for (j = 0; j < 2; j++) {
00283             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
00284 
00285             if (nodes[j][0]->ssd > (1 << 16)) {
00286                 for (k = 1; k < frontier && nodes[j][k]; k++)
00287                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
00288                 nodes[j][0]->ssd = 0;
00289             }
00290         }
00291 
00292         if (i == froze + FREEZE_INTERVAL) {
00293             p[0] = &c->paths[0][nodes[0][0]->path];
00294             p[1] = &c->paths[1][nodes[1][0]->path];
00295             for (j = i; j > froze; j--) {
00296                 dst[j] = p[1]->value << 6 | p[0]->value;
00297                 p[0] = &c->paths[0][p[0]->prev];
00298                 p[1] = &c->paths[1][p[1]->prev];
00299             }
00300             froze = i;
00301             pathn[0] = pathn[1] = 0;
00302             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
00303             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
00304         }
00305     }
00306 
00307     p[0] = &c->paths[0][nodes[0][0]->path];
00308     p[1] = &c->paths[1][nodes[1][0]->path];
00309     for (j = i; j > froze; j--) {
00310         dst[j] = p[1]->value << 6 | p[0]->value;
00311         p[0] = &c->paths[0][p[0]->prev];
00312         p[1] = &c->paths[1][p[1]->prev];
00313     }
00314     c->band[0] = nodes[0][0]->state;
00315     c->band[1] = nodes[1][0]->state;
00316 }
00317 
00318 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
00319                                          const int16_t *samples)
00320 {
00321     int xlow, xhigh, ilow, ihigh;
00322     filter_samples(c, samples, &xlow, &xhigh);
00323     ihigh = encode_high(&c->band[1], xhigh);
00324     ilow  = encode_low (&c->band[0], xlow);
00325     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
00326                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
00327     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
00328     *dst = ihigh << 6 | ilow;
00329 }
00330 
00331 static void g722_encode_no_trellis(G722Context *c,
00332                                    uint8_t *dst, int nb_samples,
00333                                    const int16_t *samples)
00334 {
00335     int i;
00336     for (i = 0; i < nb_samples; i += 2)
00337         encode_byte(c, dst++, &samples[i]);
00338 }
00339 
00340 static int g722_encode_frame(AVCodecContext *avctx,
00341                              uint8_t *dst, int buf_size, void *data)
00342 {
00343     G722Context *c = avctx->priv_data;
00344     const int16_t *samples = data;
00345     int nb_samples;
00346 
00347     nb_samples = avctx->frame_size - (avctx->frame_size & 1);
00348 
00349     if (avctx->trellis)
00350         g722_encode_trellis(c, avctx->trellis, dst, nb_samples, samples);
00351     else
00352         g722_encode_no_trellis(c, dst, nb_samples, samples);
00353 
00354     /* handle last frame with odd frame_size */
00355     if (nb_samples < avctx->frame_size) {
00356         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
00357         encode_byte(c, &dst[nb_samples >> 1], last_samples);
00358     }
00359 
00360     return (avctx->frame_size + 1) >> 1;
00361 }
00362 
00363 AVCodec ff_adpcm_g722_encoder = {
00364     .name           = "g722",
00365     .type           = AVMEDIA_TYPE_AUDIO,
00366     .id             = CODEC_ID_ADPCM_G722,
00367     .priv_data_size = sizeof(G722Context),
00368     .init           = g722_encode_init,
00369     .close          = g722_encode_close,
00370     .encode         = g722_encode_frame,
00371     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
00372     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
00373     .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
00374 };