00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #undef REAL_MOVNTQ
00022 #undef MOVNTQ
00023 #undef PREFETCH
00024
00025 #if COMPILE_TEMPLATE_MMX2
00026 #define PREFETCH "prefetchnta"
00027 #else
00028 #define PREFETCH " # nop"
00029 #endif
00030
00031 #if COMPILE_TEMPLATE_MMX2
00032 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
00033 #else
00034 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
00035 #endif
00036 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
00037
00038 #define YSCALEYUV2PACKEDX_UV \
00039 __asm__ volatile(\
00040 "xor %%"REG_a", %%"REG_a" \n\t"\
00041 ".p2align 4 \n\t"\
00042 "nop \n\t"\
00043 "1: \n\t"\
00044 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00045 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00046 "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
00047 "movq %%mm3, %%mm4 \n\t"\
00048 ".p2align 4 \n\t"\
00049 "2: \n\t"\
00050 "movq 8(%%"REG_d"), %%mm0 \n\t" \
00051 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00052 "add %6, %%"REG_S" \n\t" \
00053 "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" \
00054 "add $16, %%"REG_d" \n\t"\
00055 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00056 "pmulhw %%mm0, %%mm2 \n\t"\
00057 "pmulhw %%mm0, %%mm5 \n\t"\
00058 "paddw %%mm2, %%mm3 \n\t"\
00059 "paddw %%mm5, %%mm4 \n\t"\
00060 "test %%"REG_S", %%"REG_S" \n\t"\
00061 " jnz 2b \n\t"\
00062
00063 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
00064 "lea "offset"(%0), %%"REG_d" \n\t"\
00065 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00066 "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
00067 "movq "#dst1", "#dst2" \n\t"\
00068 ".p2align 4 \n\t"\
00069 "2: \n\t"\
00070 "movq 8(%%"REG_d"), "#coeff" \n\t" \
00071 "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" \
00072 "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" \
00073 "add $16, %%"REG_d" \n\t"\
00074 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00075 "pmulhw "#coeff", "#src1" \n\t"\
00076 "pmulhw "#coeff", "#src2" \n\t"\
00077 "paddw "#src1", "#dst1" \n\t"\
00078 "paddw "#src2", "#dst2" \n\t"\
00079 "test %%"REG_S", %%"REG_S" \n\t"\
00080 " jnz 2b \n\t"\
00081
00082 #define YSCALEYUV2PACKEDX \
00083 YSCALEYUV2PACKEDX_UV \
00084 YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
00085
00086 #define YSCALEYUV2PACKEDX_END \
00087 :: "r" (&c->redDither), \
00088 "m" (dummy), "m" (dummy), "m" (dummy),\
00089 "r" (dest), "m" (dstW_reg), "m"(uv_off) \
00090 : "%"REG_a, "%"REG_d, "%"REG_S \
00091 );
00092
00093 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
00094 __asm__ volatile(\
00095 "xor %%"REG_a", %%"REG_a" \n\t"\
00096 ".p2align 4 \n\t"\
00097 "nop \n\t"\
00098 "1: \n\t"\
00099 "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
00100 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00101 "pxor %%mm4, %%mm4 \n\t"\
00102 "pxor %%mm5, %%mm5 \n\t"\
00103 "pxor %%mm6, %%mm6 \n\t"\
00104 "pxor %%mm7, %%mm7 \n\t"\
00105 ".p2align 4 \n\t"\
00106 "2: \n\t"\
00107 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" \
00108 "add %6, %%"REG_S" \n\t" \
00109 "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" \
00110 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00111 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" \
00112 "movq %%mm0, %%mm3 \n\t"\
00113 "punpcklwd %%mm1, %%mm0 \n\t"\
00114 "punpckhwd %%mm1, %%mm3 \n\t"\
00115 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" \
00116 "pmaddwd %%mm1, %%mm0 \n\t"\
00117 "pmaddwd %%mm1, %%mm3 \n\t"\
00118 "paddd %%mm0, %%mm4 \n\t"\
00119 "paddd %%mm3, %%mm5 \n\t"\
00120 "add %6, %%"REG_S" \n\t" \
00121 "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" \
00122 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00123 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00124 "test %%"REG_S", %%"REG_S" \n\t"\
00125 "movq %%mm2, %%mm0 \n\t"\
00126 "punpcklwd %%mm3, %%mm2 \n\t"\
00127 "punpckhwd %%mm3, %%mm0 \n\t"\
00128 "pmaddwd %%mm1, %%mm2 \n\t"\
00129 "pmaddwd %%mm1, %%mm0 \n\t"\
00130 "paddd %%mm2, %%mm6 \n\t"\
00131 "paddd %%mm0, %%mm7 \n\t"\
00132 " jnz 2b \n\t"\
00133 "psrad $16, %%mm4 \n\t"\
00134 "psrad $16, %%mm5 \n\t"\
00135 "psrad $16, %%mm6 \n\t"\
00136 "psrad $16, %%mm7 \n\t"\
00137 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00138 "packssdw %%mm5, %%mm4 \n\t"\
00139 "packssdw %%mm7, %%mm6 \n\t"\
00140 "paddw %%mm0, %%mm4 \n\t"\
00141 "paddw %%mm0, %%mm6 \n\t"\
00142 "movq %%mm4, "U_TEMP"(%0) \n\t"\
00143 "movq %%mm6, "V_TEMP"(%0) \n\t"\
00144
00145 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
00146 "lea "offset"(%0), %%"REG_d" \n\t"\
00147 "mov (%%"REG_d"), %%"REG_S" \n\t"\
00148 "pxor %%mm1, %%mm1 \n\t"\
00149 "pxor %%mm5, %%mm5 \n\t"\
00150 "pxor %%mm7, %%mm7 \n\t"\
00151 "pxor %%mm6, %%mm6 \n\t"\
00152 ".p2align 4 \n\t"\
00153 "2: \n\t"\
00154 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" \
00155 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" \
00156 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
00157 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" \
00158 "movq %%mm0, %%mm3 \n\t"\
00159 "punpcklwd %%mm4, %%mm0 \n\t"\
00160 "punpckhwd %%mm4, %%mm3 \n\t"\
00161 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" \
00162 "pmaddwd %%mm4, %%mm0 \n\t"\
00163 "pmaddwd %%mm4, %%mm3 \n\t"\
00164 "paddd %%mm0, %%mm1 \n\t"\
00165 "paddd %%mm3, %%mm5 \n\t"\
00166 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" \
00167 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
00168 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
00169 "test %%"REG_S", %%"REG_S" \n\t"\
00170 "movq %%mm2, %%mm0 \n\t"\
00171 "punpcklwd %%mm3, %%mm2 \n\t"\
00172 "punpckhwd %%mm3, %%mm0 \n\t"\
00173 "pmaddwd %%mm4, %%mm2 \n\t"\
00174 "pmaddwd %%mm4, %%mm0 \n\t"\
00175 "paddd %%mm2, %%mm7 \n\t"\
00176 "paddd %%mm0, %%mm6 \n\t"\
00177 " jnz 2b \n\t"\
00178 "psrad $16, %%mm1 \n\t"\
00179 "psrad $16, %%mm5 \n\t"\
00180 "psrad $16, %%mm7 \n\t"\
00181 "psrad $16, %%mm6 \n\t"\
00182 "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
00183 "packssdw %%mm5, %%mm1 \n\t"\
00184 "packssdw %%mm6, %%mm7 \n\t"\
00185 "paddw %%mm0, %%mm1 \n\t"\
00186 "paddw %%mm0, %%mm7 \n\t"\
00187 "movq "U_TEMP"(%0), %%mm3 \n\t"\
00188 "movq "V_TEMP"(%0), %%mm4 \n\t"\
00189
00190 #define YSCALEYUV2PACKEDX_ACCURATE \
00191 YSCALEYUV2PACKEDX_ACCURATE_UV \
00192 YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
00193
00194 #define YSCALEYUV2RGBX \
00195 "psubw "U_OFFSET"(%0), %%mm3 \n\t" \
00196 "psubw "V_OFFSET"(%0), %%mm4 \n\t" \
00197 "movq %%mm3, %%mm2 \n\t" \
00198 "movq %%mm4, %%mm5 \n\t" \
00199 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
00200 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
00201 \
00202 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
00203 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
00204 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" \
00205 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" \
00206 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
00207 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
00208 \
00209 "paddw %%mm3, %%mm4 \n\t"\
00210 "movq %%mm2, %%mm0 \n\t"\
00211 "movq %%mm5, %%mm6 \n\t"\
00212 "movq %%mm4, %%mm3 \n\t"\
00213 "punpcklwd %%mm2, %%mm2 \n\t"\
00214 "punpcklwd %%mm5, %%mm5 \n\t"\
00215 "punpcklwd %%mm4, %%mm4 \n\t"\
00216 "paddw %%mm1, %%mm2 \n\t"\
00217 "paddw %%mm1, %%mm5 \n\t"\
00218 "paddw %%mm1, %%mm4 \n\t"\
00219 "punpckhwd %%mm0, %%mm0 \n\t"\
00220 "punpckhwd %%mm6, %%mm6 \n\t"\
00221 "punpckhwd %%mm3, %%mm3 \n\t"\
00222 "paddw %%mm7, %%mm0 \n\t"\
00223 "paddw %%mm7, %%mm6 \n\t"\
00224 "paddw %%mm7, %%mm3 \n\t"\
00225 \
00226 "packuswb %%mm0, %%mm2 \n\t"\
00227 "packuswb %%mm6, %%mm5 \n\t"\
00228 "packuswb %%mm3, %%mm4 \n\t"\
00229
00230 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
00231 "movq "#b", "#q2" \n\t" \
00232 "movq "#r", "#t" \n\t" \
00233 "punpcklbw "#g", "#b" \n\t" \
00234 "punpcklbw "#a", "#r" \n\t" \
00235 "punpckhbw "#g", "#q2" \n\t" \
00236 "punpckhbw "#a", "#t" \n\t" \
00237 "movq "#b", "#q0" \n\t" \
00238 "movq "#q2", "#q3" \n\t" \
00239 "punpcklwd "#r", "#q0" \n\t" \
00240 "punpckhwd "#r", "#b" \n\t" \
00241 "punpcklwd "#t", "#q2" \n\t" \
00242 "punpckhwd "#t", "#q3" \n\t" \
00243 \
00244 MOVNTQ( q0, (dst, index, 4))\
00245 MOVNTQ( b, 8(dst, index, 4))\
00246 MOVNTQ( q2, 16(dst, index, 4))\
00247 MOVNTQ( q3, 24(dst, index, 4))\
00248 \
00249 "add $8, "#index" \n\t"\
00250 "cmp "#dstw", "#index" \n\t"\
00251 " jb 1b \n\t"
00252 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
00253
00254 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
00255 const int16_t **lumSrc, int lumFilterSize,
00256 const int16_t *chrFilter, const int16_t **chrUSrc,
00257 const int16_t **chrVSrc,
00258 int chrFilterSize, const int16_t **alpSrc,
00259 uint8_t *dest, int dstW, int dstY)
00260 {
00261 x86_reg dummy=0;
00262 x86_reg dstW_reg = dstW;
00263 x86_reg uv_off = c->uv_off_byte;
00264
00265 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00266 YSCALEYUV2PACKEDX_ACCURATE
00267 YSCALEYUV2RGBX
00268 "movq %%mm2, "U_TEMP"(%0) \n\t"
00269 "movq %%mm4, "V_TEMP"(%0) \n\t"
00270 "movq %%mm5, "Y_TEMP"(%0) \n\t"
00271 YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
00272 "movq "Y_TEMP"(%0), %%mm5 \n\t"
00273 "psraw $3, %%mm1 \n\t"
00274 "psraw $3, %%mm7 \n\t"
00275 "packuswb %%mm7, %%mm1 \n\t"
00276 WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
00277 YSCALEYUV2PACKEDX_END
00278 } else {
00279 YSCALEYUV2PACKEDX_ACCURATE
00280 YSCALEYUV2RGBX
00281 "pcmpeqd %%mm7, %%mm7 \n\t"
00282 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00283 YSCALEYUV2PACKEDX_END
00284 }
00285 }
00286
00287 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
00288 const int16_t **lumSrc, int lumFilterSize,
00289 const int16_t *chrFilter, const int16_t **chrUSrc,
00290 const int16_t **chrVSrc,
00291 int chrFilterSize, const int16_t **alpSrc,
00292 uint8_t *dest, int dstW, int dstY)
00293 {
00294 x86_reg dummy=0;
00295 x86_reg dstW_reg = dstW;
00296 x86_reg uv_off = c->uv_off_byte;
00297
00298 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00299 YSCALEYUV2PACKEDX
00300 YSCALEYUV2RGBX
00301 YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
00302 "psraw $3, %%mm1 \n\t"
00303 "psraw $3, %%mm7 \n\t"
00304 "packuswb %%mm7, %%mm1 \n\t"
00305 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00306 YSCALEYUV2PACKEDX_END
00307 } else {
00308 YSCALEYUV2PACKEDX
00309 YSCALEYUV2RGBX
00310 "pcmpeqd %%mm7, %%mm7 \n\t"
00311 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00312 YSCALEYUV2PACKEDX_END
00313 }
00314 }
00315
00316 #define REAL_WRITERGB16(dst, dstw, index) \
00317 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00318 "pand "MANGLE(bFC)", %%mm4 \n\t" \
00319 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00320 "psrlq $3, %%mm2 \n\t"\
00321 \
00322 "movq %%mm2, %%mm1 \n\t"\
00323 "movq %%mm4, %%mm3 \n\t"\
00324 \
00325 "punpcklbw %%mm7, %%mm3 \n\t"\
00326 "punpcklbw %%mm5, %%mm2 \n\t"\
00327 "punpckhbw %%mm7, %%mm4 \n\t"\
00328 "punpckhbw %%mm5, %%mm1 \n\t"\
00329 \
00330 "psllq $3, %%mm3 \n\t"\
00331 "psllq $3, %%mm4 \n\t"\
00332 \
00333 "por %%mm3, %%mm2 \n\t"\
00334 "por %%mm4, %%mm1 \n\t"\
00335 \
00336 MOVNTQ(%%mm2, (dst, index, 2))\
00337 MOVNTQ(%%mm1, 8(dst, index, 2))\
00338 \
00339 "add $8, "#index" \n\t"\
00340 "cmp "#dstw", "#index" \n\t"\
00341 " jb 1b \n\t"
00342 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
00343
00344 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
00345 const int16_t **lumSrc, int lumFilterSize,
00346 const int16_t *chrFilter, const int16_t **chrUSrc,
00347 const int16_t **chrVSrc,
00348 int chrFilterSize, const int16_t **alpSrc,
00349 uint8_t *dest, int dstW, int dstY)
00350 {
00351 x86_reg dummy=0;
00352 x86_reg dstW_reg = dstW;
00353 x86_reg uv_off = c->uv_off_byte;
00354
00355 YSCALEYUV2PACKEDX_ACCURATE
00356 YSCALEYUV2RGBX
00357 "pxor %%mm7, %%mm7 \n\t"
00358
00359 #ifdef DITHER1XBPP
00360 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00361 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00362 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00363 #endif
00364 WRITERGB16(%4, %5, %%REGa)
00365 YSCALEYUV2PACKEDX_END
00366 }
00367
00368 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
00369 const int16_t **lumSrc, int lumFilterSize,
00370 const int16_t *chrFilter, const int16_t **chrUSrc,
00371 const int16_t **chrVSrc,
00372 int chrFilterSize, const int16_t **alpSrc,
00373 uint8_t *dest, int dstW, int dstY)
00374 {
00375 x86_reg dummy=0;
00376 x86_reg dstW_reg = dstW;
00377 x86_reg uv_off = c->uv_off_byte;
00378
00379 YSCALEYUV2PACKEDX
00380 YSCALEYUV2RGBX
00381 "pxor %%mm7, %%mm7 \n\t"
00382
00383 #ifdef DITHER1XBPP
00384 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00385 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00386 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00387 #endif
00388 WRITERGB16(%4, %5, %%REGa)
00389 YSCALEYUV2PACKEDX_END
00390 }
00391
00392 #define REAL_WRITERGB15(dst, dstw, index) \
00393 "pand "MANGLE(bF8)", %%mm2 \n\t" \
00394 "pand "MANGLE(bF8)", %%mm4 \n\t" \
00395 "pand "MANGLE(bF8)", %%mm5 \n\t" \
00396 "psrlq $3, %%mm2 \n\t"\
00397 "psrlq $1, %%mm5 \n\t"\
00398 \
00399 "movq %%mm2, %%mm1 \n\t"\
00400 "movq %%mm4, %%mm3 \n\t"\
00401 \
00402 "punpcklbw %%mm7, %%mm3 \n\t"\
00403 "punpcklbw %%mm5, %%mm2 \n\t"\
00404 "punpckhbw %%mm7, %%mm4 \n\t"\
00405 "punpckhbw %%mm5, %%mm1 \n\t"\
00406 \
00407 "psllq $2, %%mm3 \n\t"\
00408 "psllq $2, %%mm4 \n\t"\
00409 \
00410 "por %%mm3, %%mm2 \n\t"\
00411 "por %%mm4, %%mm1 \n\t"\
00412 \
00413 MOVNTQ(%%mm2, (dst, index, 2))\
00414 MOVNTQ(%%mm1, 8(dst, index, 2))\
00415 \
00416 "add $8, "#index" \n\t"\
00417 "cmp "#dstw", "#index" \n\t"\
00418 " jb 1b \n\t"
00419 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
00420
00421 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
00422 const int16_t **lumSrc, int lumFilterSize,
00423 const int16_t *chrFilter, const int16_t **chrUSrc,
00424 const int16_t **chrVSrc,
00425 int chrFilterSize, const int16_t **alpSrc,
00426 uint8_t *dest, int dstW, int dstY)
00427 {
00428 x86_reg dummy=0;
00429 x86_reg dstW_reg = dstW;
00430 x86_reg uv_off = c->uv_off_byte;
00431
00432 YSCALEYUV2PACKEDX_ACCURATE
00433 YSCALEYUV2RGBX
00434 "pxor %%mm7, %%mm7 \n\t"
00435
00436 #ifdef DITHER1XBPP
00437 "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
00438 "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
00439 "paddusb "RED_DITHER"(%0), %%mm5\n\t"
00440 #endif
00441 WRITERGB15(%4, %5, %%REGa)
00442 YSCALEYUV2PACKEDX_END
00443 }
00444
00445 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
00446 const int16_t **lumSrc, int lumFilterSize,
00447 const int16_t *chrFilter, const int16_t **chrUSrc,
00448 const int16_t **chrVSrc,
00449 int chrFilterSize, const int16_t **alpSrc,
00450 uint8_t *dest, int dstW, int dstY)
00451 {
00452 x86_reg dummy=0;
00453 x86_reg dstW_reg = dstW;
00454 x86_reg uv_off = c->uv_off_byte;
00455
00456 YSCALEYUV2PACKEDX
00457 YSCALEYUV2RGBX
00458 "pxor %%mm7, %%mm7 \n\t"
00459
00460 #ifdef DITHER1XBPP
00461 "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
00462 "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
00463 "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
00464 #endif
00465 WRITERGB15(%4, %5, %%REGa)
00466 YSCALEYUV2PACKEDX_END
00467 }
00468
00469 #define WRITEBGR24MMX(dst, dstw, index) \
00470 \
00471 "movq %%mm2, %%mm1 \n\t" \
00472 "movq %%mm5, %%mm6 \n\t" \
00473 "punpcklbw %%mm4, %%mm2 \n\t" \
00474 "punpcklbw %%mm7, %%mm5 \n\t" \
00475 "punpckhbw %%mm4, %%mm1 \n\t" \
00476 "punpckhbw %%mm7, %%mm6 \n\t" \
00477 "movq %%mm2, %%mm0 \n\t" \
00478 "movq %%mm1, %%mm3 \n\t" \
00479 "punpcklwd %%mm5, %%mm0 \n\t" \
00480 "punpckhwd %%mm5, %%mm2 \n\t" \
00481 "punpcklwd %%mm6, %%mm1 \n\t" \
00482 "punpckhwd %%mm6, %%mm3 \n\t" \
00483 \
00484 "movq %%mm0, %%mm4 \n\t" \
00485 "movq %%mm2, %%mm6 \n\t" \
00486 "movq %%mm1, %%mm5 \n\t" \
00487 "movq %%mm3, %%mm7 \n\t" \
00488 \
00489 "psllq $40, %%mm0 \n\t" \
00490 "psllq $40, %%mm2 \n\t" \
00491 "psllq $40, %%mm1 \n\t" \
00492 "psllq $40, %%mm3 \n\t" \
00493 \
00494 "punpckhdq %%mm4, %%mm0 \n\t" \
00495 "punpckhdq %%mm6, %%mm2 \n\t" \
00496 "punpckhdq %%mm5, %%mm1 \n\t" \
00497 "punpckhdq %%mm7, %%mm3 \n\t" \
00498 \
00499 "psrlq $8, %%mm0 \n\t" \
00500 "movq %%mm2, %%mm6 \n\t" \
00501 "psllq $40, %%mm2 \n\t" \
00502 "por %%mm2, %%mm0 \n\t" \
00503 MOVNTQ(%%mm0, (dst))\
00504 \
00505 "psrlq $24, %%mm6 \n\t" \
00506 "movq %%mm1, %%mm5 \n\t" \
00507 "psllq $24, %%mm1 \n\t" \
00508 "por %%mm1, %%mm6 \n\t" \
00509 MOVNTQ(%%mm6, 8(dst))\
00510 \
00511 "psrlq $40, %%mm5 \n\t" \
00512 "psllq $8, %%mm3 \n\t" \
00513 "por %%mm3, %%mm5 \n\t" \
00514 MOVNTQ(%%mm5, 16(dst))\
00515 \
00516 "add $24, "#dst" \n\t"\
00517 \
00518 "add $8, "#index" \n\t"\
00519 "cmp "#dstw", "#index" \n\t"\
00520 " jb 1b \n\t"
00521
00522 #define WRITEBGR24MMX2(dst, dstw, index) \
00523 \
00524 "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
00525 "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
00526 "pshufw $0x50, %%mm2, %%mm1 \n\t" \
00527 "pshufw $0x50, %%mm4, %%mm3 \n\t" \
00528 "pshufw $0x00, %%mm5, %%mm6 \n\t" \
00529 \
00530 "pand %%mm0, %%mm1 \n\t" \
00531 "pand %%mm0, %%mm3 \n\t" \
00532 "pand %%mm7, %%mm6 \n\t" \
00533 \
00534 "psllq $8, %%mm3 \n\t" \
00535 "por %%mm1, %%mm6 \n\t"\
00536 "por %%mm3, %%mm6 \n\t"\
00537 MOVNTQ(%%mm6, (dst))\
00538 \
00539 "psrlq $8, %%mm4 \n\t" \
00540 "pshufw $0xA5, %%mm2, %%mm1 \n\t" \
00541 "pshufw $0x55, %%mm4, %%mm3 \n\t" \
00542 "pshufw $0xA5, %%mm5, %%mm6 \n\t" \
00543 \
00544 "pand "MANGLE(ff_M24B)", %%mm1 \n\t" \
00545 "pand %%mm7, %%mm3 \n\t" \
00546 "pand %%mm0, %%mm6 \n\t" \
00547 \
00548 "por %%mm1, %%mm3 \n\t" \
00549 "por %%mm3, %%mm6 \n\t"\
00550 MOVNTQ(%%mm6, 8(dst))\
00551 \
00552 "pshufw $0xFF, %%mm2, %%mm1 \n\t" \
00553 "pshufw $0xFA, %%mm4, %%mm3 \n\t" \
00554 "pshufw $0xFA, %%mm5, %%mm6 \n\t" \
00555 \
00556 "pand %%mm7, %%mm1 \n\t" \
00557 "pand %%mm0, %%mm3 \n\t" \
00558 "pand "MANGLE(ff_M24B)", %%mm6 \n\t" \
00559 \
00560 "por %%mm1, %%mm3 \n\t"\
00561 "por %%mm3, %%mm6 \n\t"\
00562 MOVNTQ(%%mm6, 16(dst))\
00563 \
00564 "add $24, "#dst" \n\t"\
00565 \
00566 "add $8, "#index" \n\t"\
00567 "cmp "#dstw", "#index" \n\t"\
00568 " jb 1b \n\t"
00569
00570 #if COMPILE_TEMPLATE_MMX2
00571 #undef WRITEBGR24
00572 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
00573 #else
00574 #undef WRITEBGR24
00575 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
00576 #endif
00577
00578 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
00579 const int16_t **lumSrc, int lumFilterSize,
00580 const int16_t *chrFilter, const int16_t **chrUSrc,
00581 const int16_t **chrVSrc,
00582 int chrFilterSize, const int16_t **alpSrc,
00583 uint8_t *dest, int dstW, int dstY)
00584 {
00585 x86_reg dummy=0;
00586 x86_reg dstW_reg = dstW;
00587 x86_reg uv_off = c->uv_off_byte;
00588
00589 YSCALEYUV2PACKEDX_ACCURATE
00590 YSCALEYUV2RGBX
00591 "pxor %%mm7, %%mm7 \n\t"
00592 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t"
00593 "add %4, %%"REG_c" \n\t"
00594 WRITEBGR24(%%REGc, %5, %%REGa)
00595 :: "r" (&c->redDither),
00596 "m" (dummy), "m" (dummy), "m" (dummy),
00597 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00598 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00599 );
00600 }
00601
00602 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
00603 const int16_t **lumSrc, int lumFilterSize,
00604 const int16_t *chrFilter, const int16_t **chrUSrc,
00605 const int16_t **chrVSrc,
00606 int chrFilterSize, const int16_t **alpSrc,
00607 uint8_t *dest, int dstW, int dstY)
00608 {
00609 x86_reg dummy=0;
00610 x86_reg dstW_reg = dstW;
00611 x86_reg uv_off = c->uv_off_byte;
00612
00613 YSCALEYUV2PACKEDX
00614 YSCALEYUV2RGBX
00615 "pxor %%mm7, %%mm7 \n\t"
00616 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t"
00617 "add %4, %%"REG_c" \n\t"
00618 WRITEBGR24(%%REGc, %5, %%REGa)
00619 :: "r" (&c->redDither),
00620 "m" (dummy), "m" (dummy), "m" (dummy),
00621 "r" (dest), "m" (dstW_reg), "m"(uv_off)
00622 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
00623 );
00624 }
00625
00626 #define REAL_WRITEYUY2(dst, dstw, index) \
00627 "packuswb %%mm3, %%mm3 \n\t"\
00628 "packuswb %%mm4, %%mm4 \n\t"\
00629 "packuswb %%mm7, %%mm1 \n\t"\
00630 "punpcklbw %%mm4, %%mm3 \n\t"\
00631 "movq %%mm1, %%mm7 \n\t"\
00632 "punpcklbw %%mm3, %%mm1 \n\t"\
00633 "punpckhbw %%mm3, %%mm7 \n\t"\
00634 \
00635 MOVNTQ(%%mm1, (dst, index, 2))\
00636 MOVNTQ(%%mm7, 8(dst, index, 2))\
00637 \
00638 "add $8, "#index" \n\t"\
00639 "cmp "#dstw", "#index" \n\t"\
00640 " jb 1b \n\t"
00641 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
00642
00643 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
00644 const int16_t **lumSrc, int lumFilterSize,
00645 const int16_t *chrFilter, const int16_t **chrUSrc,
00646 const int16_t **chrVSrc,
00647 int chrFilterSize, const int16_t **alpSrc,
00648 uint8_t *dest, int dstW, int dstY)
00649 {
00650 x86_reg dummy=0;
00651 x86_reg dstW_reg = dstW;
00652 x86_reg uv_off = c->uv_off_byte;
00653
00654 YSCALEYUV2PACKEDX_ACCURATE
00655
00656 "psraw $3, %%mm3 \n\t"
00657 "psraw $3, %%mm4 \n\t"
00658 "psraw $3, %%mm1 \n\t"
00659 "psraw $3, %%mm7 \n\t"
00660 WRITEYUY2(%4, %5, %%REGa)
00661 YSCALEYUV2PACKEDX_END
00662 }
00663
00664 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
00665 const int16_t **lumSrc, int lumFilterSize,
00666 const int16_t *chrFilter, const int16_t **chrUSrc,
00667 const int16_t **chrVSrc,
00668 int chrFilterSize, const int16_t **alpSrc,
00669 uint8_t *dest, int dstW, int dstY)
00670 {
00671 x86_reg dummy=0;
00672 x86_reg dstW_reg = dstW;
00673 x86_reg uv_off = c->uv_off_byte;
00674
00675 YSCALEYUV2PACKEDX
00676
00677 "psraw $3, %%mm3 \n\t"
00678 "psraw $3, %%mm4 \n\t"
00679 "psraw $3, %%mm1 \n\t"
00680 "psraw $3, %%mm7 \n\t"
00681 WRITEYUY2(%4, %5, %%REGa)
00682 YSCALEYUV2PACKEDX_END
00683 }
00684
00685 #define REAL_YSCALEYUV2RGB_UV(index, c) \
00686 "xor "#index", "#index" \n\t"\
00687 ".p2align 4 \n\t"\
00688 "1: \n\t"\
00689 "movq (%2, "#index"), %%mm2 \n\t" \
00690 "movq (%3, "#index"), %%mm3 \n\t" \
00691 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00692 "movq (%2, "#index"), %%mm5 \n\t" \
00693 "movq (%3, "#index"), %%mm4 \n\t" \
00694 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00695 "psubw %%mm3, %%mm2 \n\t" \
00696 "psubw %%mm4, %%mm5 \n\t" \
00697 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00698 "pmulhw %%mm0, %%mm2 \n\t" \
00699 "pmulhw %%mm0, %%mm5 \n\t" \
00700 "psraw $4, %%mm3 \n\t" \
00701 "psraw $4, %%mm4 \n\t" \
00702 "paddw %%mm2, %%mm3 \n\t" \
00703 "paddw %%mm5, %%mm4 \n\t" \
00704 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00705 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00706 "movq %%mm3, %%mm2 \n\t" \
00707 "movq %%mm4, %%mm5 \n\t" \
00708 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00709 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00710 \
00711
00712 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
00713 "movq ("#b1", "#index", 2), %%mm0 \n\t" \
00714 "movq ("#b2", "#index", 2), %%mm1 \n\t" \
00715 "movq 8("#b1", "#index", 2), %%mm6 \n\t" \
00716 "movq 8("#b2", "#index", 2), %%mm7 \n\t" \
00717 "psubw %%mm1, %%mm0 \n\t" \
00718 "psubw %%mm7, %%mm6 \n\t" \
00719 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00720 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00721 "psraw $4, %%mm1 \n\t" \
00722 "psraw $4, %%mm7 \n\t" \
00723 "paddw %%mm0, %%mm1 \n\t" \
00724 "paddw %%mm6, %%mm7 \n\t" \
00725
00726 #define REAL_YSCALEYUV2RGB_COEFF(c) \
00727 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00728 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00729 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00730 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00731 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00732 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00733 \
00734 "paddw %%mm3, %%mm4 \n\t"\
00735 "movq %%mm2, %%mm0 \n\t"\
00736 "movq %%mm5, %%mm6 \n\t"\
00737 "movq %%mm4, %%mm3 \n\t"\
00738 "punpcklwd %%mm2, %%mm2 \n\t"\
00739 "punpcklwd %%mm5, %%mm5 \n\t"\
00740 "punpcklwd %%mm4, %%mm4 \n\t"\
00741 "paddw %%mm1, %%mm2 \n\t"\
00742 "paddw %%mm1, %%mm5 \n\t"\
00743 "paddw %%mm1, %%mm4 \n\t"\
00744 "punpckhwd %%mm0, %%mm0 \n\t"\
00745 "punpckhwd %%mm6, %%mm6 \n\t"\
00746 "punpckhwd %%mm3, %%mm3 \n\t"\
00747 "paddw %%mm7, %%mm0 \n\t"\
00748 "paddw %%mm7, %%mm6 \n\t"\
00749 "paddw %%mm7, %%mm3 \n\t"\
00750 \
00751 "packuswb %%mm0, %%mm2 \n\t"\
00752 "packuswb %%mm6, %%mm5 \n\t"\
00753 "packuswb %%mm3, %%mm4 \n\t"\
00754
00755 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
00756
00757 #define YSCALEYUV2RGB(index, c) \
00758 REAL_YSCALEYUV2RGB_UV(index, c) \
00759 REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
00760 REAL_YSCALEYUV2RGB_COEFF(c)
00761
00765 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
00766 const int16_t *ubuf[2], const int16_t *vbuf[2],
00767 const int16_t *abuf[2], uint8_t *dest,
00768 int dstW, int yalpha, int uvalpha, int y)
00769 {
00770 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00771 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00772
00773 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
00774 const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
00775 #if ARCH_X86_64
00776 __asm__ volatile(
00777 YSCALEYUV2RGB(%%r8, %5)
00778 YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
00779 "psraw $3, %%mm1 \n\t"
00780 "psraw $3, %%mm7 \n\t"
00781 "packuswb %%mm7, %%mm1 \n\t"
00782 WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00783 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
00784 "a" (&c->redDither),
00785 "r" (abuf0), "r" (abuf1)
00786 : "%r8"
00787 );
00788 #else
00789 *(const uint16_t **)(&c->u_temp)=abuf0;
00790 *(const uint16_t **)(&c->v_temp)=abuf1;
00791 __asm__ volatile(
00792 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00793 "mov %4, %%"REG_b" \n\t"
00794 "push %%"REG_BP" \n\t"
00795 YSCALEYUV2RGB(%%REGBP, %5)
00796 "push %0 \n\t"
00797 "push %1 \n\t"
00798 "mov "U_TEMP"(%5), %0 \n\t"
00799 "mov "V_TEMP"(%5), %1 \n\t"
00800 YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
00801 "psraw $3, %%mm1 \n\t"
00802 "psraw $3, %%mm7 \n\t"
00803 "packuswb %%mm7, %%mm1 \n\t"
00804 "pop %1 \n\t"
00805 "pop %0 \n\t"
00806 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
00807 "pop %%"REG_BP" \n\t"
00808 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00809 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00810 "a" (&c->redDither)
00811 );
00812 #endif
00813 } else {
00814 __asm__ volatile(
00815 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00816 "mov %4, %%"REG_b" \n\t"
00817 "push %%"REG_BP" \n\t"
00818 YSCALEYUV2RGB(%%REGBP, %5)
00819 "pcmpeqd %%mm7, %%mm7 \n\t"
00820 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
00821 "pop %%"REG_BP" \n\t"
00822 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00823 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00824 "a" (&c->redDither)
00825 );
00826 }
00827 }
00828
00829 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
00830 const int16_t *ubuf[2], const int16_t *vbuf[2],
00831 const int16_t *abuf[2], uint8_t *dest,
00832 int dstW, int yalpha, int uvalpha, int y)
00833 {
00834 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00835 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00836
00837
00838 __asm__ volatile(
00839 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00840 "mov %4, %%"REG_b" \n\t"
00841 "push %%"REG_BP" \n\t"
00842 YSCALEYUV2RGB(%%REGBP, %5)
00843 "pxor %%mm7, %%mm7 \n\t"
00844 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
00845 "pop %%"REG_BP" \n\t"
00846 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00847 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00848 "a" (&c->redDither)
00849 );
00850 }
00851
00852 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
00853 const int16_t *ubuf[2], const int16_t *vbuf[2],
00854 const int16_t *abuf[2], uint8_t *dest,
00855 int dstW, int yalpha, int uvalpha, int y)
00856 {
00857 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00858 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00859
00860
00861 __asm__ volatile(
00862 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00863 "mov %4, %%"REG_b" \n\t"
00864 "push %%"REG_BP" \n\t"
00865 YSCALEYUV2RGB(%%REGBP, %5)
00866 "pxor %%mm7, %%mm7 \n\t"
00867
00868 #ifdef DITHER1XBPP
00869 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00870 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00871 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00872 #endif
00873 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
00874 "pop %%"REG_BP" \n\t"
00875 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00876 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00877 "a" (&c->redDither)
00878 );
00879 }
00880
00881 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
00882 const int16_t *ubuf[2], const int16_t *vbuf[2],
00883 const int16_t *abuf[2], uint8_t *dest,
00884 int dstW, int yalpha, int uvalpha, int y)
00885 {
00886 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00887 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00888
00889
00890 __asm__ volatile(
00891 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00892 "mov %4, %%"REG_b" \n\t"
00893 "push %%"REG_BP" \n\t"
00894 YSCALEYUV2RGB(%%REGBP, %5)
00895 "pxor %%mm7, %%mm7 \n\t"
00896
00897 #ifdef DITHER1XBPP
00898 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
00899 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
00900 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
00901 #endif
00902 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
00903 "pop %%"REG_BP" \n\t"
00904 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00905 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00906 "a" (&c->redDither)
00907 );
00908 }
00909
00910 #define REAL_YSCALEYUV2PACKED(index, c) \
00911 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00912 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
00913 "psraw $3, %%mm0 \n\t"\
00914 "psraw $3, %%mm1 \n\t"\
00915 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00916 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
00917 "xor "#index", "#index" \n\t"\
00918 ".p2align 4 \n\t"\
00919 "1: \n\t"\
00920 "movq (%2, "#index"), %%mm2 \n\t" \
00921 "movq (%3, "#index"), %%mm3 \n\t" \
00922 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00923 "movq (%2, "#index"), %%mm5 \n\t" \
00924 "movq (%3, "#index"), %%mm4 \n\t" \
00925 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00926 "psubw %%mm3, %%mm2 \n\t" \
00927 "psubw %%mm4, %%mm5 \n\t" \
00928 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
00929 "pmulhw %%mm0, %%mm2 \n\t" \
00930 "pmulhw %%mm0, %%mm5 \n\t" \
00931 "psraw $7, %%mm3 \n\t" \
00932 "psraw $7, %%mm4 \n\t" \
00933 "paddw %%mm2, %%mm3 \n\t" \
00934 "paddw %%mm5, %%mm4 \n\t" \
00935 "movq (%0, "#index", 2), %%mm0 \n\t" \
00936 "movq (%1, "#index", 2), %%mm1 \n\t" \
00937 "movq 8(%0, "#index", 2), %%mm6 \n\t" \
00938 "movq 8(%1, "#index", 2), %%mm7 \n\t" \
00939 "psubw %%mm1, %%mm0 \n\t" \
00940 "psubw %%mm7, %%mm6 \n\t" \
00941 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" \
00942 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" \
00943 "psraw $7, %%mm1 \n\t" \
00944 "psraw $7, %%mm7 \n\t" \
00945 "paddw %%mm0, %%mm1 \n\t" \
00946 "paddw %%mm6, %%mm7 \n\t" \
00947
00948 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
00949
00950 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
00951 const int16_t *ubuf[2], const int16_t *vbuf[2],
00952 const int16_t *abuf[2], uint8_t *dest,
00953 int dstW, int yalpha, int uvalpha, int y)
00954 {
00955 const int16_t *buf0 = buf[0], *buf1 = buf[1],
00956 *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
00957
00958
00959 __asm__ volatile(
00960 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
00961 "mov %4, %%"REG_b" \n\t"
00962 "push %%"REG_BP" \n\t"
00963 YSCALEYUV2PACKED(%%REGBP, %5)
00964 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
00965 "pop %%"REG_BP" \n\t"
00966 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
00967 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
00968 "a" (&c->redDither)
00969 );
00970 }
00971
00972 #define REAL_YSCALEYUV2RGB1(index, c) \
00973 "xor "#index", "#index" \n\t"\
00974 ".p2align 4 \n\t"\
00975 "1: \n\t"\
00976 "movq (%2, "#index"), %%mm3 \n\t" \
00977 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00978 "movq (%2, "#index"), %%mm4 \n\t" \
00979 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
00980 "psraw $4, %%mm3 \n\t" \
00981 "psraw $4, %%mm4 \n\t" \
00982 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
00983 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
00984 "movq %%mm3, %%mm2 \n\t" \
00985 "movq %%mm4, %%mm5 \n\t" \
00986 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
00987 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
00988 \
00989 "movq (%0, "#index", 2), %%mm1 \n\t" \
00990 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
00991 "psraw $4, %%mm1 \n\t" \
00992 "psraw $4, %%mm7 \n\t" \
00993 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
00994 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
00995 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
00996 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
00997 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
00998 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
00999 \
01000 "paddw %%mm3, %%mm4 \n\t"\
01001 "movq %%mm2, %%mm0 \n\t"\
01002 "movq %%mm5, %%mm6 \n\t"\
01003 "movq %%mm4, %%mm3 \n\t"\
01004 "punpcklwd %%mm2, %%mm2 \n\t"\
01005 "punpcklwd %%mm5, %%mm5 \n\t"\
01006 "punpcklwd %%mm4, %%mm4 \n\t"\
01007 "paddw %%mm1, %%mm2 \n\t"\
01008 "paddw %%mm1, %%mm5 \n\t"\
01009 "paddw %%mm1, %%mm4 \n\t"\
01010 "punpckhwd %%mm0, %%mm0 \n\t"\
01011 "punpckhwd %%mm6, %%mm6 \n\t"\
01012 "punpckhwd %%mm3, %%mm3 \n\t"\
01013 "paddw %%mm7, %%mm0 \n\t"\
01014 "paddw %%mm7, %%mm6 \n\t"\
01015 "paddw %%mm7, %%mm3 \n\t"\
01016 \
01017 "packuswb %%mm0, %%mm2 \n\t"\
01018 "packuswb %%mm6, %%mm5 \n\t"\
01019 "packuswb %%mm3, %%mm4 \n\t"\
01020
01021 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
01022
01023
01024 #define REAL_YSCALEYUV2RGB1b(index, c) \
01025 "xor "#index", "#index" \n\t"\
01026 ".p2align 4 \n\t"\
01027 "1: \n\t"\
01028 "movq (%2, "#index"), %%mm2 \n\t" \
01029 "movq (%3, "#index"), %%mm3 \n\t" \
01030 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01031 "movq (%2, "#index"), %%mm5 \n\t" \
01032 "movq (%3, "#index"), %%mm4 \n\t" \
01033 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01034 "paddw %%mm2, %%mm3 \n\t" \
01035 "paddw %%mm5, %%mm4 \n\t" \
01036 "psrlw $5, %%mm3 \n\t" \
01037 "psrlw $5, %%mm4 \n\t" \
01038 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" \
01039 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" \
01040 "movq %%mm3, %%mm2 \n\t" \
01041 "movq %%mm4, %%mm5 \n\t" \
01042 "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
01043 "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
01044 \
01045 "movq (%0, "#index", 2), %%mm1 \n\t" \
01046 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01047 "psraw $4, %%mm1 \n\t" \
01048 "psraw $4, %%mm7 \n\t" \
01049 "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
01050 "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
01051 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" \
01052 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" \
01053 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
01054 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
01055 \
01056 "paddw %%mm3, %%mm4 \n\t"\
01057 "movq %%mm2, %%mm0 \n\t"\
01058 "movq %%mm5, %%mm6 \n\t"\
01059 "movq %%mm4, %%mm3 \n\t"\
01060 "punpcklwd %%mm2, %%mm2 \n\t"\
01061 "punpcklwd %%mm5, %%mm5 \n\t"\
01062 "punpcklwd %%mm4, %%mm4 \n\t"\
01063 "paddw %%mm1, %%mm2 \n\t"\
01064 "paddw %%mm1, %%mm5 \n\t"\
01065 "paddw %%mm1, %%mm4 \n\t"\
01066 "punpckhwd %%mm0, %%mm0 \n\t"\
01067 "punpckhwd %%mm6, %%mm6 \n\t"\
01068 "punpckhwd %%mm3, %%mm3 \n\t"\
01069 "paddw %%mm7, %%mm0 \n\t"\
01070 "paddw %%mm7, %%mm6 \n\t"\
01071 "paddw %%mm7, %%mm3 \n\t"\
01072 \
01073 "packuswb %%mm0, %%mm2 \n\t"\
01074 "packuswb %%mm6, %%mm5 \n\t"\
01075 "packuswb %%mm3, %%mm4 \n\t"\
01076
01077 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
01078
01079 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
01080 "movq (%1, "#index", 2), %%mm7 \n\t" \
01081 "movq 8(%1, "#index", 2), %%mm1 \n\t" \
01082 "psraw $7, %%mm7 \n\t" \
01083 "psraw $7, %%mm1 \n\t" \
01084 "packuswb %%mm1, %%mm7 \n\t"
01085 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
01086
01090 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
01091 const int16_t *ubuf[2], const int16_t *bguf[2],
01092 const int16_t *abuf0, uint8_t *dest,
01093 int dstW, int uvalpha, int y)
01094 {
01095 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01096 const int16_t *buf1= buf0;
01097
01098 if (uvalpha < 2048) {
01099 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01100 __asm__ volatile(
01101 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01102 "mov %4, %%"REG_b" \n\t"
01103 "push %%"REG_BP" \n\t"
01104 YSCALEYUV2RGB1(%%REGBP, %5)
01105 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01106 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01107 "pop %%"REG_BP" \n\t"
01108 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01109 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01110 "a" (&c->redDither)
01111 );
01112 } else {
01113 __asm__ volatile(
01114 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01115 "mov %4, %%"REG_b" \n\t"
01116 "push %%"REG_BP" \n\t"
01117 YSCALEYUV2RGB1(%%REGBP, %5)
01118 "pcmpeqd %%mm7, %%mm7 \n\t"
01119 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01120 "pop %%"REG_BP" \n\t"
01121 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01122 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01123 "a" (&c->redDither)
01124 );
01125 }
01126 } else {
01127 if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01128 __asm__ volatile(
01129 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01130 "mov %4, %%"REG_b" \n\t"
01131 "push %%"REG_BP" \n\t"
01132 YSCALEYUV2RGB1b(%%REGBP, %5)
01133 YSCALEYUV2RGB1_ALPHA(%%REGBP)
01134 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01135 "pop %%"REG_BP" \n\t"
01136 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01137 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01138 "a" (&c->redDither)
01139 );
01140 } else {
01141 __asm__ volatile(
01142 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01143 "mov %4, %%"REG_b" \n\t"
01144 "push %%"REG_BP" \n\t"
01145 YSCALEYUV2RGB1b(%%REGBP, %5)
01146 "pcmpeqd %%mm7, %%mm7 \n\t"
01147 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
01148 "pop %%"REG_BP" \n\t"
01149 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01150 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01151 "a" (&c->redDither)
01152 );
01153 }
01154 }
01155 }
01156
01157 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
01158 const int16_t *ubuf[2], const int16_t *bguf[2],
01159 const int16_t *abuf0, uint8_t *dest,
01160 int dstW, int uvalpha, int y)
01161 {
01162 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01163 const int16_t *buf1= buf0;
01164
01165 if (uvalpha < 2048) {
01166 __asm__ volatile(
01167 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01168 "mov %4, %%"REG_b" \n\t"
01169 "push %%"REG_BP" \n\t"
01170 YSCALEYUV2RGB1(%%REGBP, %5)
01171 "pxor %%mm7, %%mm7 \n\t"
01172 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01173 "pop %%"REG_BP" \n\t"
01174 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01175 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01176 "a" (&c->redDither)
01177 );
01178 } else {
01179 __asm__ volatile(
01180 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01181 "mov %4, %%"REG_b" \n\t"
01182 "push %%"REG_BP" \n\t"
01183 YSCALEYUV2RGB1b(%%REGBP, %5)
01184 "pxor %%mm7, %%mm7 \n\t"
01185 WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
01186 "pop %%"REG_BP" \n\t"
01187 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01188 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01189 "a" (&c->redDither)
01190 );
01191 }
01192 }
01193
01194 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
01195 const int16_t *ubuf[2], const int16_t *bguf[2],
01196 const int16_t *abuf0, uint8_t *dest,
01197 int dstW, int uvalpha, int y)
01198 {
01199 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01200 const int16_t *buf1= buf0;
01201
01202 if (uvalpha < 2048) {
01203 __asm__ volatile(
01204 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01205 "mov %4, %%"REG_b" \n\t"
01206 "push %%"REG_BP" \n\t"
01207 YSCALEYUV2RGB1(%%REGBP, %5)
01208 "pxor %%mm7, %%mm7 \n\t"
01209
01210 #ifdef DITHER1XBPP
01211 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01212 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01213 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01214 #endif
01215 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01216 "pop %%"REG_BP" \n\t"
01217 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01218 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01219 "a" (&c->redDither)
01220 );
01221 } else {
01222 __asm__ volatile(
01223 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01224 "mov %4, %%"REG_b" \n\t"
01225 "push %%"REG_BP" \n\t"
01226 YSCALEYUV2RGB1b(%%REGBP, %5)
01227 "pxor %%mm7, %%mm7 \n\t"
01228
01229 #ifdef DITHER1XBPP
01230 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01231 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01232 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01233 #endif
01234 WRITERGB15(%%REGb, 8280(%5), %%REGBP)
01235 "pop %%"REG_BP" \n\t"
01236 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01237 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01238 "a" (&c->redDither)
01239 );
01240 }
01241 }
01242
01243 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
01244 const int16_t *ubuf[2], const int16_t *bguf[2],
01245 const int16_t *abuf0, uint8_t *dest,
01246 int dstW, int uvalpha, int y)
01247 {
01248 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01249 const int16_t *buf1= buf0;
01250
01251 if (uvalpha < 2048) {
01252 __asm__ volatile(
01253 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01254 "mov %4, %%"REG_b" \n\t"
01255 "push %%"REG_BP" \n\t"
01256 YSCALEYUV2RGB1(%%REGBP, %5)
01257 "pxor %%mm7, %%mm7 \n\t"
01258
01259 #ifdef DITHER1XBPP
01260 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01261 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01262 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01263 #endif
01264 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01265 "pop %%"REG_BP" \n\t"
01266 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01267 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01268 "a" (&c->redDither)
01269 );
01270 } else {
01271 __asm__ volatile(
01272 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01273 "mov %4, %%"REG_b" \n\t"
01274 "push %%"REG_BP" \n\t"
01275 YSCALEYUV2RGB1b(%%REGBP, %5)
01276 "pxor %%mm7, %%mm7 \n\t"
01277
01278 #ifdef DITHER1XBPP
01279 "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
01280 "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
01281 "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
01282 #endif
01283 WRITERGB16(%%REGb, 8280(%5), %%REGBP)
01284 "pop %%"REG_BP" \n\t"
01285 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01286 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01287 "a" (&c->redDither)
01288 );
01289 }
01290 }
01291
01292 #define REAL_YSCALEYUV2PACKED1(index, c) \
01293 "xor "#index", "#index" \n\t"\
01294 ".p2align 4 \n\t"\
01295 "1: \n\t"\
01296 "movq (%2, "#index"), %%mm3 \n\t" \
01297 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01298 "movq (%2, "#index"), %%mm4 \n\t" \
01299 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01300 "psraw $7, %%mm3 \n\t" \
01301 "psraw $7, %%mm4 \n\t" \
01302 "movq (%0, "#index", 2), %%mm1 \n\t" \
01303 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01304 "psraw $7, %%mm1 \n\t" \
01305 "psraw $7, %%mm7 \n\t" \
01306
01307 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
01308
01309 #define REAL_YSCALEYUV2PACKED1b(index, c) \
01310 "xor "#index", "#index" \n\t"\
01311 ".p2align 4 \n\t"\
01312 "1: \n\t"\
01313 "movq (%2, "#index"), %%mm2 \n\t" \
01314 "movq (%3, "#index"), %%mm3 \n\t" \
01315 "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01316 "movq (%2, "#index"), %%mm5 \n\t" \
01317 "movq (%3, "#index"), %%mm4 \n\t" \
01318 "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
01319 "paddw %%mm2, %%mm3 \n\t" \
01320 "paddw %%mm5, %%mm4 \n\t" \
01321 "psrlw $8, %%mm3 \n\t" \
01322 "psrlw $8, %%mm4 \n\t" \
01323 "movq (%0, "#index", 2), %%mm1 \n\t" \
01324 "movq 8(%0, "#index", 2), %%mm7 \n\t" \
01325 "psraw $7, %%mm1 \n\t" \
01326 "psraw $7, %%mm7 \n\t"
01327 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
01328
01329 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
01330 const int16_t *ubuf[2], const int16_t *bguf[2],
01331 const int16_t *abuf0, uint8_t *dest,
01332 int dstW, int uvalpha, int y)
01333 {
01334 const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
01335 const int16_t *buf1= buf0;
01336
01337 if (uvalpha < 2048) {
01338 __asm__ volatile(
01339 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01340 "mov %4, %%"REG_b" \n\t"
01341 "push %%"REG_BP" \n\t"
01342 YSCALEYUV2PACKED1(%%REGBP, %5)
01343 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01344 "pop %%"REG_BP" \n\t"
01345 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01346 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01347 "a" (&c->redDither)
01348 );
01349 } else {
01350 __asm__ volatile(
01351 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
01352 "mov %4, %%"REG_b" \n\t"
01353 "push %%"REG_BP" \n\t"
01354 YSCALEYUV2PACKED1b(%%REGBP, %5)
01355 WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
01356 "pop %%"REG_BP" \n\t"
01357 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
01358 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
01359 "a" (&c->redDither)
01360 );
01361 }
01362 }
01363
01364 static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src,
01365 int width, enum PixelFormat srcFormat)
01366 {
01367
01368 if(srcFormat == PIX_FMT_BGR24) {
01369 __asm__ volatile(
01370 "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
01371 "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
01372 :
01373 );
01374 } else {
01375 __asm__ volatile(
01376 "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
01377 "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
01378 :
01379 );
01380 }
01381
01382 __asm__ volatile(
01383 "movq "MANGLE(ff_bgr24toYOffset)", %%mm4 \n\t"
01384 "mov %2, %%"REG_a" \n\t"
01385 "pxor %%mm7, %%mm7 \n\t"
01386 "1: \n\t"
01387 PREFETCH" 64(%0) \n\t"
01388 "movd (%0), %%mm0 \n\t"
01389 "movd 2(%0), %%mm1 \n\t"
01390 "movd 6(%0), %%mm2 \n\t"
01391 "movd 8(%0), %%mm3 \n\t"
01392 "add $12, %0 \n\t"
01393 "punpcklbw %%mm7, %%mm0 \n\t"
01394 "punpcklbw %%mm7, %%mm1 \n\t"
01395 "punpcklbw %%mm7, %%mm2 \n\t"
01396 "punpcklbw %%mm7, %%mm3 \n\t"
01397 "pmaddwd %%mm5, %%mm0 \n\t"
01398 "pmaddwd %%mm6, %%mm1 \n\t"
01399 "pmaddwd %%mm5, %%mm2 \n\t"
01400 "pmaddwd %%mm6, %%mm3 \n\t"
01401 "paddd %%mm1, %%mm0 \n\t"
01402 "paddd %%mm3, %%mm2 \n\t"
01403 "paddd %%mm4, %%mm0 \n\t"
01404 "paddd %%mm4, %%mm2 \n\t"
01405 "psrad $15, %%mm0 \n\t"
01406 "psrad $15, %%mm2 \n\t"
01407 "packssdw %%mm2, %%mm0 \n\t"
01408 "packuswb %%mm0, %%mm0 \n\t"
01409 "movd %%mm0, (%1, %%"REG_a") \n\t"
01410 "add $4, %%"REG_a" \n\t"
01411 " js 1b \n\t"
01412 : "+r" (src)
01413 : "r" (dst+width), "g" ((x86_reg)-width)
01414 : "%"REG_a
01415 );
01416 }
01417
01418 static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src,
01419 int width, uint32_t *unused)
01420 {
01421 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
01422 }
01423
01424 static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src,
01425 int width, uint32_t *unused)
01426 {
01427 RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
01428 }
01429
01430 static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV,
01431 const uint8_t *src, int width,
01432 enum PixelFormat srcFormat)
01433 {
01434 __asm__ volatile(
01435 "movq 24(%4), %%mm6 \n\t"
01436 "mov %3, %%"REG_a" \n\t"
01437 "pxor %%mm7, %%mm7 \n\t"
01438 "1: \n\t"
01439 PREFETCH" 64(%0) \n\t"
01440 "movd (%0), %%mm0 \n\t"
01441 "movd 2(%0), %%mm1 \n\t"
01442 "punpcklbw %%mm7, %%mm0 \n\t"
01443 "punpcklbw %%mm7, %%mm1 \n\t"
01444 "movq %%mm0, %%mm2 \n\t"
01445 "movq %%mm1, %%mm3 \n\t"
01446 "pmaddwd (%4), %%mm0 \n\t"
01447 "pmaddwd 8(%4), %%mm1 \n\t"
01448 "pmaddwd 16(%4), %%mm2 \n\t"
01449 "pmaddwd %%mm6, %%mm3 \n\t"
01450 "paddd %%mm1, %%mm0 \n\t"
01451 "paddd %%mm3, %%mm2 \n\t"
01452
01453 "movd 6(%0), %%mm1 \n\t"
01454 "movd 8(%0), %%mm3 \n\t"
01455 "add $12, %0 \n\t"
01456 "punpcklbw %%mm7, %%mm1 \n\t"
01457 "punpcklbw %%mm7, %%mm3 \n\t"
01458 "movq %%mm1, %%mm4 \n\t"
01459 "movq %%mm3, %%mm5 \n\t"
01460 "pmaddwd (%4), %%mm1 \n\t"
01461 "pmaddwd 8(%4), %%mm3 \n\t"
01462 "pmaddwd 16(%4), %%mm4 \n\t"
01463 "pmaddwd %%mm6, %%mm5 \n\t"
01464 "paddd %%mm3, %%mm1 \n\t"
01465 "paddd %%mm5, %%mm4 \n\t"
01466
01467 "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3 \n\t"
01468 "paddd %%mm3, %%mm0 \n\t"
01469 "paddd %%mm3, %%mm2 \n\t"
01470 "paddd %%mm3, %%mm1 \n\t"
01471 "paddd %%mm3, %%mm4 \n\t"
01472 "psrad $15, %%mm0 \n\t"
01473 "psrad $15, %%mm2 \n\t"
01474 "psrad $15, %%mm1 \n\t"
01475 "psrad $15, %%mm4 \n\t"
01476 "packssdw %%mm1, %%mm0 \n\t"
01477 "packssdw %%mm4, %%mm2 \n\t"
01478 "packuswb %%mm0, %%mm0 \n\t"
01479 "packuswb %%mm2, %%mm2 \n\t"
01480 "movd %%mm0, (%1, %%"REG_a") \n\t"
01481 "movd %%mm2, (%2, %%"REG_a") \n\t"
01482 "add $4, %%"REG_a" \n\t"
01483 " js 1b \n\t"
01484 : "+r" (src)
01485 : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
01486 : "%"REG_a
01487 );
01488 }
01489
01490 static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV,
01491 const uint8_t *src1, const uint8_t *src2,
01492 int width, uint32_t *unused)
01493 {
01494 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
01495 assert(src1 == src2);
01496 }
01497
01498 static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
01499 const uint8_t *src1, const uint8_t *src2,
01500 int width, uint32_t *unused)
01501 {
01502 assert(src1==src2);
01503 RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
01504 }
01505
01506 #if COMPILE_TEMPLATE_MMX2
01507 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
01508 int dstWidth, const uint8_t *src,
01509 int srcW, int xInc)
01510 {
01511 int32_t *filterPos = c->hLumFilterPos;
01512 int16_t *filter = c->hLumFilter;
01513 void *mmx2FilterCode= c->lumMmx2FilterCode;
01514 int i;
01515 #if defined(PIC)
01516 uint64_t ebxsave;
01517 #endif
01518 #if ARCH_X86_64
01519 uint64_t retsave;
01520 #endif
01521
01522 __asm__ volatile(
01523 #if defined(PIC)
01524 "mov %%"REG_b", %5 \n\t"
01525 #if ARCH_X86_64
01526 "mov -8(%%rsp), %%"REG_a" \n\t"
01527 "mov %%"REG_a", %6 \n\t"
01528 #endif
01529 #else
01530 #if ARCH_X86_64
01531 "mov -8(%%rsp), %%"REG_a" \n\t"
01532 "mov %%"REG_a", %5 \n\t"
01533 #endif
01534 #endif
01535 "pxor %%mm7, %%mm7 \n\t"
01536 "mov %0, %%"REG_c" \n\t"
01537 "mov %1, %%"REG_D" \n\t"
01538 "mov %2, %%"REG_d" \n\t"
01539 "mov %3, %%"REG_b" \n\t"
01540 "xor %%"REG_a", %%"REG_a" \n\t"
01541 PREFETCH" (%%"REG_c") \n\t"
01542 PREFETCH" 32(%%"REG_c") \n\t"
01543 PREFETCH" 64(%%"REG_c") \n\t"
01544
01545 #if ARCH_X86_64
01546 #define CALL_MMX2_FILTER_CODE \
01547 "movl (%%"REG_b"), %%esi \n\t"\
01548 "call *%4 \n\t"\
01549 "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
01550 "add %%"REG_S", %%"REG_c" \n\t"\
01551 "add %%"REG_a", %%"REG_D" \n\t"\
01552 "xor %%"REG_a", %%"REG_a" \n\t"\
01553
01554 #else
01555 #define CALL_MMX2_FILTER_CODE \
01556 "movl (%%"REG_b"), %%esi \n\t"\
01557 "call *%4 \n\t"\
01558 "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
01559 "add %%"REG_a", %%"REG_D" \n\t"\
01560 "xor %%"REG_a", %%"REG_a" \n\t"\
01561
01562 #endif
01563
01564 CALL_MMX2_FILTER_CODE
01565 CALL_MMX2_FILTER_CODE
01566 CALL_MMX2_FILTER_CODE
01567 CALL_MMX2_FILTER_CODE
01568 CALL_MMX2_FILTER_CODE
01569 CALL_MMX2_FILTER_CODE
01570 CALL_MMX2_FILTER_CODE
01571 CALL_MMX2_FILTER_CODE
01572
01573 #if defined(PIC)
01574 "mov %5, %%"REG_b" \n\t"
01575 #if ARCH_X86_64
01576 "mov %6, %%"REG_a" \n\t"
01577 "mov %%"REG_a", -8(%%rsp) \n\t"
01578 #endif
01579 #else
01580 #if ARCH_X86_64
01581 "mov %5, %%"REG_a" \n\t"
01582 "mov %%"REG_a", -8(%%rsp) \n\t"
01583 #endif
01584 #endif
01585 :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
01586 "m" (mmx2FilterCode)
01587 #if defined(PIC)
01588 ,"m" (ebxsave)
01589 #endif
01590 #if ARCH_X86_64
01591 ,"m"(retsave)
01592 #endif
01593 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01594 #if !defined(PIC)
01595 ,"%"REG_b
01596 #endif
01597 );
01598
01599 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
01600 dst[i] = src[srcW-1]*128;
01601 }
01602
01603 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
01604 int dstWidth, const uint8_t *src1,
01605 const uint8_t *src2, int srcW, int xInc)
01606 {
01607 int32_t *filterPos = c->hChrFilterPos;
01608 int16_t *filter = c->hChrFilter;
01609 void *mmx2FilterCode= c->chrMmx2FilterCode;
01610 int i;
01611 #if defined(PIC)
01612 DECLARE_ALIGNED(8, uint64_t, ebxsave);
01613 #endif
01614 #if ARCH_X86_64
01615 DECLARE_ALIGNED(8, uint64_t, retsave);
01616 #endif
01617
01618 __asm__ volatile(
01619 #if defined(PIC)
01620 "mov %%"REG_b", %7 \n\t"
01621 #if ARCH_X86_64
01622 "mov -8(%%rsp), %%"REG_a" \n\t"
01623 "mov %%"REG_a", %8 \n\t"
01624 #endif
01625 #else
01626 #if ARCH_X86_64
01627 "mov -8(%%rsp), %%"REG_a" \n\t"
01628 "mov %%"REG_a", %7 \n\t"
01629 #endif
01630 #endif
01631 "pxor %%mm7, %%mm7 \n\t"
01632 "mov %0, %%"REG_c" \n\t"
01633 "mov %1, %%"REG_D" \n\t"
01634 "mov %2, %%"REG_d" \n\t"
01635 "mov %3, %%"REG_b" \n\t"
01636 "xor %%"REG_a", %%"REG_a" \n\t"
01637 PREFETCH" (%%"REG_c") \n\t"
01638 PREFETCH" 32(%%"REG_c") \n\t"
01639 PREFETCH" 64(%%"REG_c") \n\t"
01640
01641 CALL_MMX2_FILTER_CODE
01642 CALL_MMX2_FILTER_CODE
01643 CALL_MMX2_FILTER_CODE
01644 CALL_MMX2_FILTER_CODE
01645 "xor %%"REG_a", %%"REG_a" \n\t"
01646 "mov %5, %%"REG_c" \n\t"
01647 "mov %6, %%"REG_D" \n\t"
01648 PREFETCH" (%%"REG_c") \n\t"
01649 PREFETCH" 32(%%"REG_c") \n\t"
01650 PREFETCH" 64(%%"REG_c") \n\t"
01651
01652 CALL_MMX2_FILTER_CODE
01653 CALL_MMX2_FILTER_CODE
01654 CALL_MMX2_FILTER_CODE
01655 CALL_MMX2_FILTER_CODE
01656
01657 #if defined(PIC)
01658 "mov %7, %%"REG_b" \n\t"
01659 #if ARCH_X86_64
01660 "mov %8, %%"REG_a" \n\t"
01661 "mov %%"REG_a", -8(%%rsp) \n\t"
01662 #endif
01663 #else
01664 #if ARCH_X86_64
01665 "mov %7, %%"REG_a" \n\t"
01666 "mov %%"REG_a", -8(%%rsp) \n\t"
01667 #endif
01668 #endif
01669 :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
01670 "m" (mmx2FilterCode), "m" (src2), "m"(dst2)
01671 #if defined(PIC)
01672 ,"m" (ebxsave)
01673 #endif
01674 #if ARCH_X86_64
01675 ,"m"(retsave)
01676 #endif
01677 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
01678 #if !defined(PIC)
01679 ,"%"REG_b
01680 #endif
01681 );
01682
01683 for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
01684 dst1[i] = src1[srcW-1]*128;
01685 dst2[i] = src2[srcW-1]*128;
01686 }
01687 }
01688 #endif
01689
01690 static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
01691 {
01692 enum PixelFormat srcFormat = c->srcFormat,
01693 dstFormat = c->dstFormat;
01694
01695 if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
01696 dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) {
01697 if (!(c->flags & SWS_BITEXACT)) {
01698 if (c->flags & SWS_ACCURATE_RND) {
01699 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01700 switch (c->dstFormat) {
01701 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
01702 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
01703 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
01704 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
01705 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
01706 default: break;
01707 }
01708 }
01709 } else {
01710 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01711 switch (c->dstFormat) {
01712 case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
01713 case PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
01714 case PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
01715 case PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
01716 case PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
01717 default: break;
01718 }
01719 }
01720 }
01721 }
01722 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
01723 switch (c->dstFormat) {
01724 case PIX_FMT_RGB32:
01725 c->yuv2packed1 = RENAME(yuv2rgb32_1);
01726 c->yuv2packed2 = RENAME(yuv2rgb32_2);
01727 break;
01728 case PIX_FMT_BGR24:
01729 c->yuv2packed1 = RENAME(yuv2bgr24_1);
01730 c->yuv2packed2 = RENAME(yuv2bgr24_2);
01731 break;
01732 case PIX_FMT_RGB555:
01733 c->yuv2packed1 = RENAME(yuv2rgb555_1);
01734 c->yuv2packed2 = RENAME(yuv2rgb555_2);
01735 break;
01736 case PIX_FMT_RGB565:
01737 c->yuv2packed1 = RENAME(yuv2rgb565_1);
01738 c->yuv2packed2 = RENAME(yuv2rgb565_2);
01739 break;
01740 case PIX_FMT_YUYV422:
01741 c->yuv2packed1 = RENAME(yuv2yuyv422_1);
01742 c->yuv2packed2 = RENAME(yuv2yuyv422_2);
01743 break;
01744 default:
01745 break;
01746 }
01747 }
01748 }
01749
01750 if (c->srcBpc == 8 && c->dstBpc <= 10) {
01751
01752 #if COMPILE_TEMPLATE_MMX2
01753 if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
01754 {
01755 c->hyscale_fast = RENAME(hyscale_fast);
01756 c->hcscale_fast = RENAME(hcscale_fast);
01757 } else {
01758 #endif
01759 c->hyscale_fast = NULL;
01760 c->hcscale_fast = NULL;
01761 #if COMPILE_TEMPLATE_MMX2
01762 }
01763 #endif
01764 }
01765
01766 if (!c->chrSrcHSubSample) {
01767 switch(srcFormat) {
01768 case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
01769 case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
01770 default: break;
01771 }
01772 }
01773
01774 switch (srcFormat) {
01775 case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
01776 case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
01777 default: break;
01778 }
01779 }