• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/x86/dsputil_h264_template_ssse3.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2008 Loren Merritt
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00027 static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, int rnd)
00028 {
00029     if(y==0 && x==0) {
00030         /* no filter needed */
00031         H264_CHROMA_MC8_MV0(dst, src, stride, h);
00032         return;
00033     }
00034 
00035     assert(x<8 && y<8 && x>=0 && y>=0);
00036 
00037     if(y==0 || x==0)
00038     {
00039         /* 1 dimensional filter only */
00040         __asm__ volatile(
00041             "movd %0, %%xmm7 \n\t"
00042             "movq %1, %%xmm6 \n\t"
00043             "pshuflw $0, %%xmm7, %%xmm7 \n\t"
00044             "movlhps %%xmm6, %%xmm6 \n\t"
00045             "movlhps %%xmm7, %%xmm7 \n\t"
00046             :: "r"(255*(x+y)+8), "m"(*(rnd?&ff_pw_4:&ff_pw_3))
00047         );
00048 
00049         if(x) {
00050             __asm__ volatile(
00051                 "1: \n\t"
00052                 "movq (%1), %%xmm0 \n\t"
00053                 "movq 1(%1), %%xmm1 \n\t"
00054                 "movq (%1,%3), %%xmm2 \n\t"
00055                 "movq 1(%1,%3), %%xmm3 \n\t"
00056                 "punpcklbw %%xmm1, %%xmm0 \n\t"
00057                 "punpcklbw %%xmm3, %%xmm2 \n\t"
00058                 "pmaddubsw %%xmm7, %%xmm0 \n\t"
00059                 "pmaddubsw %%xmm7, %%xmm2 \n\t"
00060          AVG_OP("movq (%0), %%xmm4 \n\t")
00061          AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
00062                 "paddw %%xmm6, %%xmm0 \n\t"
00063                 "paddw %%xmm6, %%xmm2 \n\t"
00064                 "psrlw $3, %%xmm0 \n\t"
00065                 "psrlw $3, %%xmm2 \n\t"
00066                 "packuswb %%xmm2, %%xmm0 \n\t"
00067          AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
00068                 "movq %%xmm0, (%0) \n\t"
00069                 "movhps %%xmm0, (%0,%3) \n\t"
00070                 "sub $2, %2 \n\t"
00071                 "lea (%1,%3,2), %1 \n\t"
00072                 "lea (%0,%3,2), %0 \n\t"
00073                 "jg 1b \n\t"
00074                 :"+r"(dst), "+r"(src), "+r"(h)
00075                 :"r"((x86_reg)stride)
00076             );
00077         } else {
00078             __asm__ volatile(
00079                 "1: \n\t"
00080                 "movq (%1), %%xmm0 \n\t"
00081                 "movq (%1,%3), %%xmm1 \n\t"
00082                 "movdqa %%xmm1, %%xmm2 \n\t"
00083                 "movq (%1,%3,2), %%xmm3 \n\t"
00084                 "punpcklbw %%xmm1, %%xmm0 \n\t"
00085                 "punpcklbw %%xmm3, %%xmm2 \n\t"
00086                 "pmaddubsw %%xmm7, %%xmm0 \n\t"
00087                 "pmaddubsw %%xmm7, %%xmm2 \n\t"
00088          AVG_OP("movq (%0), %%xmm4 \n\t")
00089          AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
00090                 "paddw %%xmm6, %%xmm0 \n\t"
00091                 "paddw %%xmm6, %%xmm2 \n\t"
00092                 "psrlw $3, %%xmm0 \n\t"
00093                 "psrlw $3, %%xmm2 \n\t"
00094                 "packuswb %%xmm2, %%xmm0 \n\t"
00095          AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
00096                 "movq %%xmm0, (%0) \n\t"
00097                 "movhps %%xmm0, (%0,%3) \n\t"
00098                 "sub $2, %2 \n\t"
00099                 "lea (%1,%3,2), %1 \n\t"
00100                 "lea (%0,%3,2), %0 \n\t"
00101                 "jg 1b \n\t"
00102                 :"+r"(dst), "+r"(src), "+r"(h)
00103                 :"r"((x86_reg)stride)
00104             );
00105         }
00106         return;
00107     }
00108 
00109     /* general case, bilinear */
00110     __asm__ volatile(
00111         "movd %0, %%xmm7 \n\t"
00112         "movd %1, %%xmm6 \n\t"
00113         "movdqa %2, %%xmm5 \n\t"
00114         "pshuflw $0, %%xmm7, %%xmm7 \n\t"
00115         "pshuflw $0, %%xmm6, %%xmm6 \n\t"
00116         "movlhps %%xmm7, %%xmm7 \n\t"
00117         "movlhps %%xmm6, %%xmm6 \n\t"
00118         :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
00119     );
00120 
00121     __asm__ volatile(
00122         "movq (%1), %%xmm0 \n\t"
00123         "movq 1(%1), %%xmm1 \n\t"
00124         "punpcklbw %%xmm1, %%xmm0 \n\t"
00125         "add %3, %1 \n\t"
00126         "1: \n\t"
00127         "movq (%1), %%xmm1 \n\t"
00128         "movq 1(%1), %%xmm2 \n\t"
00129         "movq (%1,%3), %%xmm3 \n\t"
00130         "movq 1(%1,%3), %%xmm4 \n\t"
00131         "lea (%1,%3,2), %1 \n\t"
00132         "punpcklbw %%xmm2, %%xmm1 \n\t"
00133         "punpcklbw %%xmm4, %%xmm3 \n\t"
00134         "movdqa %%xmm1, %%xmm2 \n\t"
00135         "movdqa %%xmm3, %%xmm4 \n\t"
00136         "pmaddubsw %%xmm7, %%xmm0 \n\t"
00137         "pmaddubsw %%xmm6, %%xmm1 \n\t"
00138         "pmaddubsw %%xmm7, %%xmm2 \n\t"
00139         "pmaddubsw %%xmm6, %%xmm3 \n\t"
00140         "paddw %%xmm5, %%xmm0 \n\t"
00141         "paddw %%xmm5, %%xmm2 \n\t"
00142         "paddw %%xmm0, %%xmm1 \n\t"
00143         "paddw %%xmm2, %%xmm3 \n\t"
00144         "movdqa %%xmm4, %%xmm0 \n\t"
00145         "psrlw $6, %%xmm1 \n\t"
00146         "psrlw $6, %%xmm3 \n\t"
00147  AVG_OP("movq (%0), %%xmm2 \n\t")
00148  AVG_OP("movhps (%0,%3), %%xmm2 \n\t")
00149         "packuswb %%xmm3, %%xmm1 \n\t"
00150  AVG_OP("pavgb %%xmm2, %%xmm1 \n\t")
00151         "movq %%xmm1, (%0)\n\t"
00152         "movhps %%xmm1, (%0,%3)\n\t"
00153         "sub $2, %2 \n\t"
00154         "lea (%0,%3,2), %0 \n\t"
00155         "jg 1b \n\t"
00156         :"+r"(dst), "+r"(src), "+r"(h)
00157         :"r"((x86_reg)stride)
00158     );
00159 }
00160 
00161 static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
00162 {
00163     __asm__ volatile(
00164         "movd %0, %%mm7 \n\t"
00165         "movd %1, %%mm6 \n\t"
00166         "movq %2, %%mm5 \n\t"
00167         "pshufw $0, %%mm7, %%mm7 \n\t"
00168         "pshufw $0, %%mm6, %%mm6 \n\t"
00169         :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
00170     );
00171 
00172     __asm__ volatile(
00173         "movd (%1), %%mm0 \n\t"
00174         "punpcklbw 1(%1), %%mm0 \n\t"
00175         "add %3, %1 \n\t"
00176         "1: \n\t"
00177         "movd (%1), %%mm1 \n\t"
00178         "movd (%1,%3), %%mm3 \n\t"
00179         "punpcklbw 1(%1), %%mm1 \n\t"
00180         "punpcklbw 1(%1,%3), %%mm3 \n\t"
00181         "lea (%1,%3,2), %1 \n\t"
00182         "movq %%mm1, %%mm2 \n\t"
00183         "movq %%mm3, %%mm4 \n\t"
00184         "pmaddubsw %%mm7, %%mm0 \n\t"
00185         "pmaddubsw %%mm6, %%mm1 \n\t"
00186         "pmaddubsw %%mm7, %%mm2 \n\t"
00187         "pmaddubsw %%mm6, %%mm3 \n\t"
00188         "paddw %%mm5, %%mm0 \n\t"
00189         "paddw %%mm5, %%mm2 \n\t"
00190         "paddw %%mm0, %%mm1 \n\t"
00191         "paddw %%mm2, %%mm3 \n\t"
00192         "movq %%mm4, %%mm0 \n\t"
00193         "psrlw $6, %%mm1 \n\t"
00194         "psrlw $6, %%mm3 \n\t"
00195         "packuswb %%mm1, %%mm1 \n\t"
00196         "packuswb %%mm3, %%mm3 \n\t"
00197  AVG_OP("pavgb (%0), %%mm1 \n\t")
00198  AVG_OP("pavgb (%0,%3), %%mm3 \n\t")
00199         "movd %%mm1, (%0)\n\t"
00200         "movd %%mm3, (%0,%3)\n\t"
00201         "sub $2, %2 \n\t"
00202         "lea (%0,%3,2), %0 \n\t"
00203         "jg 1b \n\t"
00204         :"+r"(dst), "+r"(src), "+r"(h)
00205         :"r"((x86_reg)stride)
00206     );
00207 }
00208 

Generated on Fri Sep 16 2011 17:17:46 for FFmpeg by  doxygen 1.7.1