libavcodec/x86/ac3dsp_mmx.c
Go to the documentation of this file.
00001 /*
00002  * x86-optimized AC-3 DSP utils
00003  * Copyright (c) 2011 Justin Ruggles
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "libavutil/x86_cpu.h"
00023 #include "dsputil_mmx.h"
00024 #include "libavcodec/ac3dsp.h"
00025 
00026 extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00027 extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00028 extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
00029 
00030 extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
00031 extern int ff_ac3_max_msb_abs_int16_sse2  (const int16_t *src, int len);
00032 extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
00033 
00034 extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
00035 extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
00036 
00037 extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
00038 extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
00039 
00040 extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
00041 extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
00042 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
00043 
00044 extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
00045 
00046 extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
00047 extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
00048 extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
00049 
00050 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
00051 {
00052 #if HAVE_YASM
00053     int mm_flags = av_get_cpu_flags();
00054 
00055     if (mm_flags & AV_CPU_FLAG_MMX) {
00056         c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
00057         c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
00058         c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
00059     }
00060     if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
00061         c->extract_exponents = ff_ac3_extract_exponents_3dnow;
00062         if (!bit_exact) {
00063             c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
00064         }
00065     }
00066     if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
00067         c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
00068         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
00069     }
00070     if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
00071         c->float_to_fixed24 = ff_float_to_fixed24_sse;
00072     }
00073     if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
00074         c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
00075         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
00076         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
00077         c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
00078         c->extract_exponents = ff_ac3_extract_exponents_sse2;
00079         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
00080             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
00081             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
00082         }
00083     }
00084     if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
00085         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
00086         if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
00087             c->extract_exponents = ff_ac3_extract_exponents_ssse3;
00088         }
00089     }
00090 #endif
00091 }