libavcodec/psymodel.h
Go to the documentation of this file.
00001 /*
00002  * audio encoder psychoacoustic model
00003  * Copyright (C) 2008 Konstantin Shishkov
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #ifndef AVCODEC_PSYMODEL_H
00023 #define AVCODEC_PSYMODEL_H
00024 
00025 #include "avcodec.h"
00026 
00028 #define PSY_MAX_BANDS 128
00029 
00030 #define PSY_MAX_CHANS 20
00031 
00035 typedef struct FFPsyBand {
00036     int   bits;
00037     float energy;
00038     float threshold;
00039     float distortion;
00040     float perceptual_weight;
00041 } FFPsyBand;
00042 
00046 typedef struct FFPsyChannel {
00047     FFPsyBand psy_bands[PSY_MAX_BANDS]; 
00048     float     entropy;                  
00049 } FFPsyChannel;
00050 
00054 typedef struct FFPsyChannelGroup {
00055     FFPsyChannel *ch[PSY_MAX_CHANS];  
00056     uint8_t num_ch;                   
00057     uint8_t coupling[PSY_MAX_BANDS];  
00058 } FFPsyChannelGroup;
00059 
00063 typedef struct FFPsyWindowInfo {
00064     int window_type[3];               
00065     int window_shape;                 
00066     int num_windows;                  
00067     int grouping[8];                  
00068     int *window_sizes;                
00069 } FFPsyWindowInfo;
00070 
00074 typedef struct FFPsyContext {
00075     AVCodecContext *avctx;            
00076     const struct FFPsyModel *model;   
00077 
00078     FFPsyChannel      *ch;            
00079     FFPsyChannelGroup *group;         
00080     int num_groups;                   
00081 
00082     uint8_t **bands;                  
00083     int     *num_bands;               
00084     int num_lens;                     
00085 
00086     struct {
00087         int size;                     
00088         int bits;                     
00089     } bitres;
00090 
00091     void* model_priv_data;            
00092 } FFPsyContext;
00093 
00097 typedef struct FFPsyModel {
00098     const char *name;
00099     int  (*init)   (FFPsyContext *apc);
00100 
00112     FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
00113 
00122     void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
00123 
00124     void (*end)    (FFPsyContext *apc);
00125 } FFPsyModel;
00126 
00140 av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
00141                         const uint8_t **bands, const int* num_bands,
00142                         int num_groups, const uint8_t *group_map);
00143 
00152 FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel);
00153 
00159 av_cold void ff_psy_end(FFPsyContext *ctx);
00160 
00161 
00162 /**************************************************************************
00163  *                       Audio preprocessing stuff.                       *
00164  *       This should be moved into some audio filter eventually.          *
00165  **************************************************************************/
00166 struct FFPsyPreprocessContext;
00167 
00171 av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx);
00172 
00182 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
00183                        const int16_t *audio, int16_t *dest,
00184                        int tag, int channels);
00185 
00189 av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);
00190 
00191 #endif /* AVCODEC_PSYMODEL_H */