Libav 0.7.1
|
00001 /* 00002 * MMI optimized DSP utils 00003 * Copyright (c) 2000, 2001 Fabrice Bellard 00004 * 00005 * MMI optimization by Leon van Stuivenberg 00006 * clear_blocks_mmi() by BroadQ 00007 * 00008 * This file is part of Libav. 00009 * 00010 * Libav is free software; you can redistribute it and/or 00011 * modify it under the terms of the GNU Lesser General Public 00012 * License as published by the Free Software Foundation; either 00013 * version 2.1 of the License, or (at your option) any later version. 00014 * 00015 * Libav is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 * Lesser General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU Lesser General Public 00021 * License along with Libav; if not, write to the Free Software 00022 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 */ 00024 00025 #include "libavcodec/dsputil.h" 00026 #include "mmi.h" 00027 00028 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); 00029 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); 00030 void ff_mmi_idct(DCTELEM *block); 00031 00032 static void clear_blocks_mmi(DCTELEM * blocks) 00033 { 00034 __asm__ volatile( 00035 ".set noreorder \n" 00036 "addiu $9, %0, 768 \n" 00037 "nop \n" 00038 "1: \n" 00039 "sq $0, 0(%0) \n" 00040 "move $8, %0 \n" 00041 "addi %0, %0, 64 \n" 00042 "sq $0, 16($8) \n" 00043 "slt $10, %0, $9 \n" 00044 "sq $0, 32($8) \n" 00045 "bnez $10, 1b \n" 00046 "sq $0, 48($8) \n" 00047 ".set reorder \n" 00048 : "+r" (blocks) :: "$8", "$9", "memory" ); 00049 } 00050 00051 00052 static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) 00053 { 00054 __asm__ volatile( 00055 ".set push \n\t" 00056 ".set mips3 \n\t" 00057 "ld $8, 0(%0) \n\t" 00058 "add %0, %0, %2 \n\t" 00059 "ld $9, 0(%0) \n\t" 00060 "add %0, %0, %2 \n\t" 00061 "ld $10, 0(%0) \n\t" 00062 "pextlb $8, $0, $8 \n\t" 00063 "sq $8, 0(%1) \n\t" 00064 "add %0, %0, %2 \n\t" 00065 "ld $8, 0(%0) \n\t" 00066 "pextlb $9, $0, $9 \n\t" 00067 "sq $9, 16(%1) \n\t" 00068 "add %0, %0, %2 \n\t" 00069 "ld $9, 0(%0) \n\t" 00070 "pextlb $10, $0, $10 \n\t" 00071 "sq $10, 32(%1) \n\t" 00072 "add %0, %0, %2 \n\t" 00073 "ld $10, 0(%0) \n\t" 00074 "pextlb $8, $0, $8 \n\t" 00075 "sq $8, 48(%1) \n\t" 00076 "add %0, %0, %2 \n\t" 00077 "ld $8, 0(%0) \n\t" 00078 "pextlb $9, $0, $9 \n\t" 00079 "sq $9, 64(%1) \n\t" 00080 "add %0, %0, %2 \n\t" 00081 "ld $9, 0(%0) \n\t" 00082 "pextlb $10, $0, $10 \n\t" 00083 "sq $10, 80(%1) \n\t" 00084 "pextlb $8, $0, $8 \n\t" 00085 "sq $8, 96(%1) \n\t" 00086 "pextlb $9, $0, $9 \n\t" 00087 "sq $9, 112(%1) \n\t" 00088 ".set pop \n\t" 00089 : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" ); 00090 } 00091 00092 00093 static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) 00094 { 00095 __asm__ volatile( 00096 ".set push \n\t" 00097 ".set mips3 \n\t" 00098 "1: \n\t" 00099 "ldr $8, 0(%1) \n\t" 00100 "addiu %2, %2, -1 \n\t" 00101 "ldl $8, 7(%1) \n\t" 00102 "add %1, %1, %3 \n\t" 00103 "sd $8, 0(%0) \n\t" 00104 "add %0, %0, %3 \n\t" 00105 "bgtz %2, 1b \n\t" 00106 ".set pop \n\t" 00107 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) 00108 : "$8", "memory" ); 00109 } 00110 00111 00112 static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) 00113 { 00114 __asm__ volatile ( 00115 ".set push \n\t" 00116 ".set mips3 \n\t" 00117 "1: \n\t" 00118 "ldr $8, 0(%1) \n\t" 00119 "add $11, %1, %3 \n\t" 00120 "ldl $8, 7(%1) \n\t" 00121 "add $10, %0, %3 \n\t" 00122 "ldr $9, 8(%1) \n\t" 00123 "ldl $9, 15(%1) \n\t" 00124 "ldr $12, 0($11) \n\t" 00125 "add %1, $11, %3 \n\t" 00126 "ldl $12, 7($11) \n\t" 00127 "pcpyld $8, $9, $8 \n\t" 00128 "sq $8, 0(%0) \n\t" 00129 "ldr $13, 8($11) \n\t" 00130 "addiu %2, %2, -2 \n\t" 00131 "ldl $13, 15($11) \n\t" 00132 "add %0, $10, %3 \n\t" 00133 "pcpyld $12, $13, $12 \n\t" 00134 "sq $12, 0($10) \n\t" 00135 "bgtz %2, 1b \n\t" 00136 ".set pop \n\t" 00137 : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) 00138 : "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); 00139 } 00140 00141 00142 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) 00143 { 00144 const int idct_algo= avctx->idct_algo; 00145 const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; 00146 00147 if (!high_bit_depth) { 00148 c->clear_blocks = clear_blocks_mmi; 00149 00150 c->put_pixels_tab[1][0] = put_pixels8_mmi; 00151 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi; 00152 00153 c->put_pixels_tab[0][0] = put_pixels16_mmi; 00154 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; 00155 } 00156 00157 c->get_pixels = get_pixels_mmi; 00158 00159 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ 00160 c->idct_put= ff_mmi_idct_put; 00161 c->idct_add= ff_mmi_idct_add; 00162 c->idct = ff_mmi_idct; 00163 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 00164 } 00165 } 00166