00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef COMPILE_TEMPLATE_SSE
00022 #define MM "%%xmm"
00023 #define MOV "movq"
00024 #define MOVQ "movdqa"
00025 #define MOVQU "movdqu"
00026 #define STEP 8
00027 #define LOAD(mem,dst) \
00028 MOV" "mem", "dst" \n\t"\
00029 "punpcklbw "MM"7, "dst" \n\t"
00030 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
00031 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
00032 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
00033 "psrldq $2, "src" \n\t"
00034 #else
00035 #define MM "%%mm"
00036 #define MOV "movd"
00037 #define MOVQ "movq"
00038 #define MOVQU "movq"
00039 #define STEP 4
00040 #define LOAD(mem,dst) \
00041 MOV" "mem", "dst" \n\t"\
00042 "punpcklbw "MM"7, "dst" \n\t"
00043 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
00044 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
00045 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
00046 #endif
00047
00048 #ifdef COMPILE_TEMPLATE_SSSE3
00049 #define PABS(tmp,dst) \
00050 "pabsw "dst", "dst" \n\t"
00051 #else
00052 #define PABS(tmp,dst) \
00053 "pxor "tmp", "tmp" \n\t"\
00054 "psubw "dst", "tmp" \n\t"\
00055 "pmaxsw "tmp", "dst" \n\t"
00056 #endif
00057
00058 #define CHECK(pj,mj) \
00059 MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" \
00060 MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" \
00061 MOVQ" "MM"2, "MM"4 \n\t"\
00062 MOVQ" "MM"2, "MM"5 \n\t"\
00063 "pxor "MM"3, "MM"4 \n\t"\
00064 "pavgb "MM"3, "MM"5 \n\t"\
00065 "pand "MANGLE(pb_1)", "MM"4 \n\t"\
00066 "psubusb "MM"4, "MM"5 \n\t"\
00067 PSRL1(MM"5") \
00068 "punpcklbw "MM"7, "MM"5 \n\t" \
00069 MOVQ" "MM"2, "MM"4 \n\t"\
00070 "psubusb "MM"3, "MM"2 \n\t"\
00071 "psubusb "MM"4, "MM"3 \n\t"\
00072 "pmaxub "MM"3, "MM"2 \n\t"\
00073 MOVQ" "MM"2, "MM"3 \n\t"\
00074 MOVQ" "MM"2, "MM"4 \n\t" \
00075 PSRL1(MM"3") \
00076 PSRL2(MM"4") \
00077 "punpcklbw "MM"7, "MM"2 \n\t"\
00078 "punpcklbw "MM"7, "MM"3 \n\t"\
00079 "punpcklbw "MM"7, "MM"4 \n\t"\
00080 "paddw "MM"3, "MM"2 \n\t"\
00081 "paddw "MM"4, "MM"2 \n\t"
00082
00083 #define CHECK1 \
00084 MOVQ" "MM"0, "MM"3 \n\t"\
00085 "pcmpgtw "MM"2, "MM"3 \n\t" \
00086 "pminsw "MM"2, "MM"0 \n\t" \
00087 MOVQ" "MM"3, "MM"6 \n\t"\
00088 "pand "MM"3, "MM"5 \n\t"\
00089 "pandn "MM"1, "MM"3 \n\t"\
00090 "por "MM"5, "MM"3 \n\t"\
00091 MOVQ" "MM"3, "MM"1 \n\t"
00092
00093 #define CHECK2
00094 \
00095 "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
00096 "psllw $14, "MM"6 \n\t"\
00097 "paddsw "MM"6, "MM"2 \n\t"\
00098 MOVQ" "MM"0, "MM"3 \n\t"\
00099 "pcmpgtw "MM"2, "MM"3 \n\t"\
00100 "pminsw "MM"2, "MM"0 \n\t"\
00101 "pand "MM"3, "MM"5 \n\t"\
00102 "pandn "MM"1, "MM"3 \n\t"\
00103 "por "MM"5, "MM"3 \n\t"\
00104 MOVQ" "MM"3, "MM"1 \n\t"
00105
00106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
00107 uint8_t *prev, uint8_t *cur, uint8_t *next,
00108 int w, int prefs, int mrefs, int parity, int mode)
00109 {
00110 DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
00111 DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
00112 DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
00113 DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
00114 int x;
00115
00116 #define FILTER\
00117 for(x=0; x<w; x+=STEP){\
00118 __asm__ volatile(\
00119 "pxor "MM"7, "MM"7 \n\t"\
00120 LOAD("(%[cur],%[mrefs])", MM"0") \
00121 LOAD("(%[cur],%[prefs])", MM"1") \
00122 LOAD("(%["prev2"])", MM"2") \
00123 LOAD("(%["next2"])", MM"3") \
00124 MOVQ" "MM"3, "MM"4 \n\t"\
00125 "paddw "MM"2, "MM"3 \n\t"\
00126 "psraw $1, "MM"3 \n\t" \
00127 MOVQ" "MM"0, %[tmp0] \n\t" \
00128 MOVQ" "MM"3, %[tmp1] \n\t" \
00129 MOVQ" "MM"1, %[tmp2] \n\t" \
00130 "psubw "MM"4, "MM"2 \n\t"\
00131 PABS( MM"4", MM"2") \
00132 LOAD("(%[prev],%[mrefs])", MM"3") \
00133 LOAD("(%[prev],%[prefs])", MM"4") \
00134 "psubw "MM"0, "MM"3 \n\t"\
00135 "psubw "MM"1, "MM"4 \n\t"\
00136 PABS( MM"5", MM"3")\
00137 PABS( MM"5", MM"4")\
00138 "paddw "MM"4, "MM"3 \n\t" \
00139 "psrlw $1, "MM"2 \n\t"\
00140 "psrlw $1, "MM"3 \n\t"\
00141 "pmaxsw "MM"3, "MM"2 \n\t"\
00142 LOAD("(%[next],%[mrefs])", MM"3") \
00143 LOAD("(%[next],%[prefs])", MM"4") \
00144 "psubw "MM"0, "MM"3 \n\t"\
00145 "psubw "MM"1, "MM"4 \n\t"\
00146 PABS( MM"5", MM"3")\
00147 PABS( MM"5", MM"4")\
00148 "paddw "MM"4, "MM"3 \n\t" \
00149 "psrlw $1, "MM"3 \n\t"\
00150 "pmaxsw "MM"3, "MM"2 \n\t"\
00151 MOVQ" "MM"2, %[tmp3] \n\t" \
00152 \
00153 "paddw "MM"0, "MM"1 \n\t"\
00154 "paddw "MM"0, "MM"0 \n\t"\
00155 "psubw "MM"1, "MM"0 \n\t"\
00156 "psrlw $1, "MM"1 \n\t" \
00157 PABS( MM"2", MM"0") \
00158 \
00159 MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" \
00160 MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" \
00161 MOVQ" "MM"2, "MM"4 \n\t"\
00162 "psubusb "MM"3, "MM"2 \n\t"\
00163 "psubusb "MM"4, "MM"3 \n\t"\
00164 "pmaxub "MM"3, "MM"2 \n\t"\
00165 PSHUF(MM"3", MM"2") \
00166 "punpcklbw "MM"7, "MM"2 \n\t" \
00167 "punpcklbw "MM"7, "MM"3 \n\t" \
00168 "paddw "MM"2, "MM"0 \n\t"\
00169 "paddw "MM"3, "MM"0 \n\t"\
00170 "psubw "MANGLE(pw_1)", "MM"0 \n\t" \
00171 \
00172 CHECK(-2,0)\
00173 CHECK1\
00174 CHECK(-3,1)\
00175 CHECK2\
00176 CHECK(0,-2)\
00177 CHECK1\
00178 CHECK(1,-3)\
00179 CHECK2\
00180 \
00181 \
00182 MOVQ" %[tmp3], "MM"6 \n\t" \
00183 "cmpl $2, %[mode] \n\t"\
00184 "jge 1f \n\t"\
00185 LOAD("(%["prev2"],%[mrefs],2)", MM"2") \
00186 LOAD("(%["next2"],%[mrefs],2)", MM"4") \
00187 LOAD("(%["prev2"],%[prefs],2)", MM"3") \
00188 LOAD("(%["next2"],%[prefs],2)", MM"5") \
00189 "paddw "MM"4, "MM"2 \n\t"\
00190 "paddw "MM"5, "MM"3 \n\t"\
00191 "psrlw $1, "MM"2 \n\t" \
00192 "psrlw $1, "MM"3 \n\t" \
00193 MOVQ" %[tmp0], "MM"4 \n\t" \
00194 MOVQ" %[tmp1], "MM"5 \n\t" \
00195 MOVQ" %[tmp2], "MM"7 \n\t" \
00196 "psubw "MM"4, "MM"2 \n\t" \
00197 "psubw "MM"7, "MM"3 \n\t" \
00198 MOVQ" "MM"5, "MM"0 \n\t"\
00199 "psubw "MM"4, "MM"5 \n\t" \
00200 "psubw "MM"7, "MM"0 \n\t" \
00201 MOVQ" "MM"2, "MM"4 \n\t"\
00202 "pminsw "MM"3, "MM"2 \n\t"\
00203 "pmaxsw "MM"4, "MM"3 \n\t"\
00204 "pmaxsw "MM"5, "MM"2 \n\t"\
00205 "pminsw "MM"5, "MM"3 \n\t"\
00206 "pmaxsw "MM"0, "MM"2 \n\t" \
00207 "pminsw "MM"0, "MM"3 \n\t" \
00208 "pxor "MM"4, "MM"4 \n\t"\
00209 "pmaxsw "MM"3, "MM"6 \n\t"\
00210 "psubw "MM"2, "MM"4 \n\t" \
00211 "pmaxsw "MM"4, "MM"6 \n\t" \
00212 "1: \n\t"\
00213 \
00214 MOVQ" %[tmp1], "MM"2 \n\t" \
00215 MOVQ" "MM"2, "MM"3 \n\t"\
00216 "psubw "MM"6, "MM"2 \n\t" \
00217 "paddw "MM"6, "MM"3 \n\t" \
00218 "pmaxsw "MM"2, "MM"1 \n\t"\
00219 "pminsw "MM"3, "MM"1 \n\t" \
00220 "packuswb "MM"1, "MM"1 \n\t"\
00221 \
00222 :[tmp0]"=m"(tmp0),\
00223 [tmp1]"=m"(tmp1),\
00224 [tmp2]"=m"(tmp2),\
00225 [tmp3]"=m"(tmp3)\
00226 :[prev] "r"(prev),\
00227 [cur] "r"(cur),\
00228 [next] "r"(next),\
00229 [prefs]"r"((x86_reg)prefs),\
00230 [mrefs]"r"((x86_reg)mrefs),\
00231 [mode] "g"(mode)\
00232 );\
00233 __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
00234 dst += STEP;\
00235 prev+= STEP;\
00236 cur += STEP;\
00237 next+= STEP;\
00238 }
00239
00240 if (parity) {
00241 #define prev2 "prev"
00242 #define next2 "cur"
00243 FILTER
00244 #undef prev2
00245 #undef next2
00246 } else {
00247 #define prev2 "cur"
00248 #define next2 "next"
00249 FILTER
00250 #undef prev2
00251 #undef next2
00252 }
00253 }
00254 #undef STEP
00255 #undef MM
00256 #undef MOV
00257 #undef MOVQ
00258 #undef MOVQU
00259 #undef PSHUF
00260 #undef PSRL1
00261 #undef PSRL2
00262 #undef LOAD
00263 #undef PABS
00264 #undef CHECK
00265 #undef CHECK1
00266 #undef CHECK2
00267 #undef FILTER
00268