libswscale/swscale.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of Libav.
00005  *
00006  * Libav is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * Libav is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with Libav; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include <inttypes.h>
00022 #include <string.h>
00023 #include <math.h>
00024 #include <stdio.h>
00025 #include "config.h"
00026 #include <assert.h>
00027 #include "swscale.h"
00028 #include "swscale_internal.h"
00029 #include "rgb2rgb.h"
00030 #include "libavutil/intreadwrite.h"
00031 #include "libavutil/cpu.h"
00032 #include "libavutil/avutil.h"
00033 #include "libavutil/mathematics.h"
00034 #include "libavutil/bswap.h"
00035 #include "libavutil/pixdesc.h"
00036 
00037 #define DITHER1XBPP
00038 
00039 #define RGB2YUV_SHIFT 15
00040 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00041 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00042 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00043 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00044 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00045 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00046 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00047 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00048 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00049 
00050 /*
00051 NOTES
00052 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
00053 
00054 TODO
00055 more intelligent misalignment avoidance for the horizontal scaler
00056 write special vertical cubic upscale version
00057 optimize C code (YV12 / minmax)
00058 add support for packed pixel YUV input & output
00059 add support for Y8 output
00060 optimize BGR24 & BGR32
00061 add BGR4 output support
00062 write special BGR->BGR scaler
00063 */
00064 
00065 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
00066 {  1,   3,   1,   3,   1,   3,   1,   3, },
00067 {  2,   0,   2,   0,   2,   0,   2,   0, },
00068 };
00069 
00070 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
00071 {  6,   2,   6,   2,   6,   2,   6,   2, },
00072 {  0,   4,   0,   4,   0,   4,   0,   4, },
00073 };
00074 
00075 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
00076 {  8,   4,  11,   7,   8,   4,  11,   7, },
00077 {  2,  14,   1,  13,   2,  14,   1,  13, },
00078 { 10,   6,   9,   5,  10,   6,   9,   5, },
00079 {  0,  12,   3,  15,   0,  12,   3,  15, },
00080 };
00081 
00082 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
00083 { 17,   9,  23,  15,  16,   8,  22,  14, },
00084 {  5,  29,   3,  27,   4,  28,   2,  26, },
00085 { 21,  13,  19,  11,  20,  12,  18,  10, },
00086 {  0,  24,   6,  30,   1,  25,   7,  31, },
00087 { 16,   8,  22,  14,  17,   9,  23,  15, },
00088 {  4,  28,   2,  26,   5,  29,   3,  27, },
00089 { 20,  12,  18,  10,  21,  13,  19,  11, },
00090 {  1,  25,   7,  31,   0,  24,   6,  30, },
00091 };
00092 
00093 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
00094 {  0,  55,  14,  68,   3,  58,  17,  72, },
00095 { 37,  18,  50,  32,  40,  22,  54,  35, },
00096 {  9,  64,   5,  59,  13,  67,   8,  63, },
00097 { 46,  27,  41,  23,  49,  31,  44,  26, },
00098 {  2,  57,  16,  71,   1,  56,  15,  70, },
00099 { 39,  21,  52,  34,  38,  19,  51,  33, },
00100 { 11,  66,   7,  62,  10,  65,   6,  60, },
00101 { 48,  30,  43,  25,  47,  29,  42,  24, },
00102 };
00103 
00104 #if 1
00105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00106 {117,  62, 158, 103, 113,  58, 155, 100, },
00107 { 34, 199,  21, 186,  31, 196,  17, 182, },
00108 {144,  89, 131,  76, 141,  86, 127,  72, },
00109 {  0, 165,  41, 206,  10, 175,  52, 217, },
00110 {110,  55, 151,  96, 120,  65, 162, 107, },
00111 { 28, 193,  14, 179,  38, 203,  24, 189, },
00112 {138,  83, 124,  69, 148,  93, 134,  79, },
00113 {  7, 172,  48, 213,   3, 168,  45, 210, },
00114 };
00115 #elif 1
00116 // tries to correct a gamma of 1.5
00117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00118 {  0, 143,  18, 200,   2, 156,  25, 215, },
00119 { 78,  28, 125,  64,  89,  36, 138,  74, },
00120 { 10, 180,   3, 161,  16, 195,   8, 175, },
00121 {109,  51,  93,  38, 121,  60, 105,  47, },
00122 {  1, 152,  23, 210,   0, 147,  20, 205, },
00123 { 85,  33, 134,  71,  81,  30, 130,  67, },
00124 { 14, 190,   6, 171,  12, 185,   5, 166, },
00125 {117,  57, 101,  44, 113,  54,  97,  41, },
00126 };
00127 #elif 1
00128 // tries to correct a gamma of 2.0
00129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00130 {  0, 124,   8, 193,   0, 140,  12, 213, },
00131 { 55,  14, 104,  42,  66,  19, 119,  52, },
00132 {  3, 168,   1, 145,   6, 187,   3, 162, },
00133 { 86,  31,  70,  21,  99,  39,  82,  28, },
00134 {  0, 134,  11, 206,   0, 129,   9, 200, },
00135 { 62,  17, 114,  48,  58,  16, 109,  45, },
00136 {  5, 181,   2, 157,   4, 175,   1, 151, },
00137 { 95,  36,  78,  26,  90,  34,  74,  24, },
00138 };
00139 #else
00140 // tries to correct a gamma of 2.5
00141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00142 {  0, 107,   3, 187,   0, 125,   6, 212, },
00143 { 39,   7,  86,  28,  49,  11, 102,  36, },
00144 {  1, 158,   0, 131,   3, 180,   1, 151, },
00145 { 68,  19,  52,  12,  81,  25,  64,  17, },
00146 {  0, 119,   5, 203,   0, 113,   4, 195, },
00147 { 45,   9,  96,  33,  42,   8,  91,  30, },
00148 {  2, 172,   1, 144,   2, 165,   0, 137, },
00149 { 77,  23,  60,  15,  72,  21,  56,  14, },
00150 };
00151 #endif
00152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
00153 {  36, 68, 60, 92, 34, 66, 58, 90,},
00154 { 100,  4,124, 28, 98,  2,122, 26,},
00155 {  52, 84, 44, 76, 50, 82, 42, 74,},
00156 { 116, 20,108, 12,114, 18,106, 10,},
00157 {  32, 64, 56, 88, 38, 70, 62, 94,},
00158 {  96,  0,120, 24,102,  6,126, 30,},
00159 {  48, 80, 40, 72, 54, 86, 46, 78,},
00160 { 112, 16,104,  8,118, 22,110, 14,},
00161 };
00162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
00163 {  64, 64, 64, 64, 64, 64, 64, 64 };
00164 
00165 #define output_pixel(pos, val, bias, signedness) \
00166     if (big_endian) { \
00167         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00168     } else { \
00169         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
00170     }
00171 
00172 static av_always_inline void
00173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
00174                          int big_endian, int output_bits)
00175 {
00176     int i;
00177     int shift = 19 - output_bits;
00178 
00179     for (i = 0; i < dstW; i++) {
00180         int val = src[i] + (1 << (shift - 1));
00181         output_pixel(&dest[i], val, 0, uint);
00182     }
00183 }
00184 
00185 static av_always_inline void
00186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
00187                          const int32_t **src, uint16_t *dest, int dstW,
00188                          int big_endian, int output_bits)
00189 {
00190     int i;
00191     int shift = 15 + 16 - output_bits;
00192 
00193     for (i = 0; i < dstW; i++) {
00194         int val = 1 << (30-output_bits);
00195         int j;
00196 
00197         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
00198          * filters (or anything with negative coeffs, the range can be slightly
00199          * wider in both directions. To account for this overflow, we subtract
00200          * a constant so it always fits in the signed range (assuming a
00201          * reasonable filterSize), and re-add that at the end. */
00202         val -= 0x40000000;
00203         for (j = 0; j < filterSize; j++)
00204             val += src[j][i] * filter[j];
00205 
00206         output_pixel(&dest[i], val, 0x8000, int);
00207     }
00208 }
00209 
00210 #undef output_pixel
00211 
00212 #define output_pixel(pos, val) \
00213     if (big_endian) { \
00214         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00215     } else { \
00216         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00217     }
00218 
00219 static av_always_inline void
00220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
00221                          int big_endian, int output_bits)
00222 {
00223     int i;
00224     int shift = 15 - output_bits;
00225 
00226     for (i = 0; i < dstW; i++) {
00227         int val = src[i] + (1 << (shift - 1));
00228         output_pixel(&dest[i], val);
00229     }
00230 }
00231 
00232 static av_always_inline void
00233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
00234                          const int16_t **src, uint16_t *dest, int dstW,
00235                          int big_endian, int output_bits)
00236 {
00237     int i;
00238     int shift = 11 + 16 - output_bits;
00239 
00240     for (i = 0; i < dstW; i++) {
00241         int val = 1 << (26-output_bits);
00242         int j;
00243 
00244         for (j = 0; j < filterSize; j++)
00245             val += src[j][i] * filter[j];
00246 
00247         output_pixel(&dest[i], val);
00248     }
00249 }
00250 
00251 #undef output_pixel
00252 
00253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
00254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
00255                               uint8_t *dest, int dstW, \
00256                               const uint8_t *dither, int offset)\
00257 { \
00258     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
00259                          (uint16_t *) dest, dstW, is_be, bits); \
00260 }\
00261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
00262                               const int16_t **src, uint8_t *dest, int dstW, \
00263                               const uint8_t *dither, int offset)\
00264 { \
00265     yuv2planeX_## template_size ## _c_template(filter, \
00266                          filterSize, (const typeX_t **) src, \
00267                          (uint16_t *) dest, dstW, is_be, bits); \
00268 }
00269 yuv2NBPS( 9, BE, 1, 10, int16_t)
00270 yuv2NBPS( 9, LE, 0, 10, int16_t)
00271 yuv2NBPS(10, BE, 1, 10, int16_t)
00272 yuv2NBPS(10, LE, 0, 10, int16_t)
00273 yuv2NBPS(16, BE, 1, 16, int32_t)
00274 yuv2NBPS(16, LE, 0, 16, int32_t)
00275 
00276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
00277                            const int16_t **src, uint8_t *dest, int dstW,
00278                            const uint8_t *dither, int offset)
00279 {
00280     int i;
00281     for (i=0; i<dstW; i++) {
00282         int val = dither[(i + offset) & 7] << 12;
00283         int j;
00284         for (j=0; j<filterSize; j++)
00285             val += src[j][i] * filter[j];
00286 
00287         dest[i]= av_clip_uint8(val>>19);
00288     }
00289 }
00290 
00291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
00292                            const uint8_t *dither, int offset)
00293 {
00294     int i;
00295     for (i=0; i<dstW; i++) {
00296         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
00297         dest[i]= av_clip_uint8(val);
00298     }
00299 }
00300 
00301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
00302                         const int16_t **chrUSrc, const int16_t **chrVSrc,
00303                         uint8_t *dest, int chrDstW)
00304 {
00305     enum PixelFormat dstFormat = c->dstFormat;
00306     const uint8_t *chrDither = c->chrDither8;
00307     int i;
00308 
00309     if (dstFormat == PIX_FMT_NV12)
00310         for (i=0; i<chrDstW; i++) {
00311             int u = chrDither[i & 7] << 12;
00312             int v = chrDither[(i + 3) & 7] << 12;
00313             int j;
00314             for (j=0; j<chrFilterSize; j++) {
00315                 u += chrUSrc[j][i] * chrFilter[j];
00316                 v += chrVSrc[j][i] * chrFilter[j];
00317             }
00318 
00319             dest[2*i]= av_clip_uint8(u>>19);
00320             dest[2*i+1]= av_clip_uint8(v>>19);
00321         }
00322     else
00323         for (i=0; i<chrDstW; i++) {
00324             int u = chrDither[i & 7] << 12;
00325             int v = chrDither[(i + 3) & 7] << 12;
00326             int j;
00327             for (j=0; j<chrFilterSize; j++) {
00328                 u += chrUSrc[j][i] * chrFilter[j];
00329                 v += chrVSrc[j][i] * chrFilter[j];
00330             }
00331 
00332             dest[2*i]= av_clip_uint8(v>>19);
00333             dest[2*i+1]= av_clip_uint8(u>>19);
00334         }
00335 }
00336 
00337 #define output_pixel(pos, val) \
00338         if (target == PIX_FMT_GRAY16BE) { \
00339             AV_WB16(pos, val); \
00340         } else { \
00341             AV_WL16(pos, val); \
00342         }
00343 
00344 static av_always_inline void
00345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
00346                         const int32_t **lumSrc, int lumFilterSize,
00347                         const int16_t *chrFilter, const int32_t **chrUSrc,
00348                         const int32_t **chrVSrc, int chrFilterSize,
00349                         const int32_t **alpSrc, uint16_t *dest, int dstW,
00350                         int y, enum PixelFormat target)
00351 {
00352     int i;
00353 
00354     for (i = 0; i < (dstW >> 1); i++) {
00355         int j;
00356         int Y1 = (1 << 14) - 0x40000000;
00357         int Y2 = (1 << 14) - 0x40000000;
00358 
00359         for (j = 0; j < lumFilterSize; j++) {
00360             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00361             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00362         }
00363         Y1 >>= 15;
00364         Y2 >>= 15;
00365         Y1 = av_clip_int16(Y1);
00366         Y2 = av_clip_int16(Y2);
00367         output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
00368         output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
00369     }
00370 }
00371 
00372 static av_always_inline void
00373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
00374                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00375                         const int32_t *abuf[2], uint16_t *dest, int dstW,
00376                         int yalpha, int uvalpha, int y,
00377                         enum PixelFormat target)
00378 {
00379     int  yalpha1 = 4095 - yalpha;
00380     int i;
00381     const int32_t *buf0 = buf[0], *buf1 = buf[1];
00382 
00383     for (i = 0; i < (dstW >> 1); i++) {
00384         int Y1 = (buf0[i * 2    ] * yalpha1 + buf1[i * 2    ] * yalpha) >> 15;
00385         int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
00386 
00387         output_pixel(&dest[i * 2 + 0], Y1);
00388         output_pixel(&dest[i * 2 + 1], Y2);
00389     }
00390 }
00391 
00392 static av_always_inline void
00393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
00394                         const int32_t *ubuf[2], const int32_t *vbuf[2],
00395                         const int32_t *abuf0, uint16_t *dest, int dstW,
00396                         int uvalpha, int y, enum PixelFormat target)
00397 {
00398     int i;
00399 
00400     for (i = 0; i < (dstW >> 1); i++) {
00401         int Y1 = buf0[i * 2    ] << 1;
00402         int Y2 = buf0[i * 2 + 1] << 1;
00403 
00404         output_pixel(&dest[i * 2 + 0], Y1);
00405         output_pixel(&dest[i * 2 + 1], Y2);
00406     }
00407 }
00408 
00409 #undef output_pixel
00410 
00411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
00412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00413                         const int16_t **_lumSrc, int lumFilterSize, \
00414                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
00415                         const int16_t **_chrVSrc, int chrFilterSize, \
00416                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
00417                         int y) \
00418 { \
00419     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
00420                   **chrUSrc = (const int32_t **) _chrUSrc, \
00421                   **chrVSrc = (const int32_t **) _chrVSrc, \
00422                   **alpSrc  = (const int32_t **) _alpSrc; \
00423     uint16_t *dest = (uint16_t *) _dest; \
00424     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00425                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00426                           alpSrc, dest, dstW, y, fmt); \
00427 } \
00428  \
00429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
00430                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00431                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
00432                         int yalpha, int uvalpha, int y) \
00433 { \
00434     const int32_t **buf  = (const int32_t **) _buf, \
00435                   **ubuf = (const int32_t **) _ubuf, \
00436                   **vbuf = (const int32_t **) _vbuf, \
00437                   **abuf = (const int32_t **) _abuf; \
00438     uint16_t *dest = (uint16_t *) _dest; \
00439     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00440                           dest, dstW, yalpha, uvalpha, y, fmt); \
00441 } \
00442  \
00443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
00444                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
00445                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
00446                         int uvalpha, int y) \
00447 { \
00448     const int32_t *buf0  = (const int32_t *)  _buf0, \
00449                  **ubuf  = (const int32_t **) _ubuf, \
00450                  **vbuf  = (const int32_t **) _vbuf, \
00451                   *abuf0 = (const int32_t *)  _abuf0; \
00452     uint16_t *dest = (uint16_t *) _dest; \
00453     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
00454                                   dstW, uvalpha, y, fmt); \
00455 }
00456 
00457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
00458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
00459 
00460 #define output_pixel(pos, acc) \
00461     if (target == PIX_FMT_MONOBLACK) { \
00462         pos = acc; \
00463     } else { \
00464         pos = ~acc; \
00465     }
00466 
00467 static av_always_inline void
00468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
00469                       const int16_t **lumSrc, int lumFilterSize,
00470                       const int16_t *chrFilter, const int16_t **chrUSrc,
00471                       const int16_t **chrVSrc, int chrFilterSize,
00472                       const int16_t **alpSrc, uint8_t *dest, int dstW,
00473                       int y, enum PixelFormat target)
00474 {
00475     const uint8_t * const d128=dither_8x8_220[y&7];
00476     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00477     int i;
00478     unsigned acc = 0;
00479 
00480     for (i = 0; i < dstW - 1; i += 2) {
00481         int j;
00482         int Y1 = 1 << 18;
00483         int Y2 = 1 << 18;
00484 
00485         for (j = 0; j < lumFilterSize; j++) {
00486             Y1 += lumSrc[j][i]   * lumFilter[j];
00487             Y2 += lumSrc[j][i+1] * lumFilter[j];
00488         }
00489         Y1 >>= 19;
00490         Y2 >>= 19;
00491         if ((Y1 | Y2) & 0x100) {
00492             Y1 = av_clip_uint8(Y1);
00493             Y2 = av_clip_uint8(Y2);
00494         }
00495         acc += acc + g[Y1 + d128[(i + 0) & 7]];
00496         acc += acc + g[Y2 + d128[(i + 1) & 7]];
00497         if ((i & 7) == 6) {
00498             output_pixel(*dest++, acc);
00499         }
00500     }
00501 }
00502 
00503 static av_always_inline void
00504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
00505                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00506                       const int16_t *abuf[2], uint8_t *dest, int dstW,
00507                       int yalpha, int uvalpha, int y,
00508                       enum PixelFormat target)
00509 {
00510     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
00511     const uint8_t * const d128 = dither_8x8_220[y & 7];
00512     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00513     int  yalpha1 = 4095 - yalpha;
00514     int i;
00515 
00516     for (i = 0; i < dstW - 7; i += 8) {
00517         int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
00518         acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
00519         acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
00520         acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
00521         acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
00522         acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
00523         acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
00524         acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
00525         output_pixel(*dest++, acc);
00526     }
00527 }
00528 
00529 static av_always_inline void
00530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
00531                       const int16_t *ubuf[2], const int16_t *vbuf[2],
00532                       const int16_t *abuf0, uint8_t *dest, int dstW,
00533                       int uvalpha, int y, enum PixelFormat target)
00534 {
00535     const uint8_t * const d128 = dither_8x8_220[y & 7];
00536     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00537     int i;
00538 
00539     for (i = 0; i < dstW - 7; i += 8) {
00540         int acc =    g[(buf0[i    ] >> 7) + d128[0]];
00541         acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
00542         acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
00543         acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
00544         acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
00545         acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
00546         acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
00547         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
00548         output_pixel(*dest++, acc);
00549     }
00550 }
00551 
00552 #undef output_pixel
00553 
00554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
00555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00556                                 const int16_t **lumSrc, int lumFilterSize, \
00557                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
00558                                 const int16_t **chrVSrc, int chrFilterSize, \
00559                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
00560                                 int y) \
00561 { \
00562     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00563                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00564                                   alpSrc, dest, dstW, y, fmt); \
00565 } \
00566  \
00567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
00568                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00569                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
00570                                 int yalpha, int uvalpha, int y) \
00571 { \
00572     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
00573                                   dest, dstW, yalpha, uvalpha, y, fmt); \
00574 } \
00575  \
00576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
00577                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
00578                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
00579                                 int uvalpha, int y) \
00580 { \
00581     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
00582                                   abuf0, dest, dstW, uvalpha, \
00583                                   y, fmt); \
00584 }
00585 
00586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
00587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
00588 
00589 #define output_pixels(pos, Y1, U, Y2, V) \
00590     if (target == PIX_FMT_YUYV422) { \
00591         dest[pos + 0] = Y1; \
00592         dest[pos + 1] = U;  \
00593         dest[pos + 2] = Y2; \
00594         dest[pos + 3] = V;  \
00595     } else { \
00596         dest[pos + 0] = U;  \
00597         dest[pos + 1] = Y1; \
00598         dest[pos + 2] = V;  \
00599         dest[pos + 3] = Y2; \
00600     }
00601 
00602 static av_always_inline void
00603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
00604                      const int16_t **lumSrc, int lumFilterSize,
00605                      const int16_t *chrFilter, const int16_t **chrUSrc,
00606                      const int16_t **chrVSrc, int chrFilterSize,
00607                      const int16_t **alpSrc, uint8_t *dest, int dstW,
00608                      int y, enum PixelFormat target)
00609 {
00610     int i;
00611 
00612     for (i = 0; i < (dstW >> 1); i++) {
00613         int j;
00614         int Y1 = 1 << 18;
00615         int Y2 = 1 << 18;
00616         int U  = 1 << 18;
00617         int V  = 1 << 18;
00618 
00619         for (j = 0; j < lumFilterSize; j++) {
00620             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00621             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00622         }
00623         for (j = 0; j < chrFilterSize; j++) {
00624             U += chrUSrc[j][i] * chrFilter[j];
00625             V += chrVSrc[j][i] * chrFilter[j];
00626         }
00627         Y1 >>= 19;
00628         Y2 >>= 19;
00629         U  >>= 19;
00630         V  >>= 19;
00631         if ((Y1 | Y2 | U | V) & 0x100) {
00632             Y1 = av_clip_uint8(Y1);
00633             Y2 = av_clip_uint8(Y2);
00634             U  = av_clip_uint8(U);
00635             V  = av_clip_uint8(V);
00636         }
00637         output_pixels(4*i, Y1, U, Y2, V);
00638     }
00639 }
00640 
00641 static av_always_inline void
00642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
00643                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00644                      const int16_t *abuf[2], uint8_t *dest, int dstW,
00645                      int yalpha, int uvalpha, int y,
00646                      enum PixelFormat target)
00647 {
00648     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
00649                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00650                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00651     int  yalpha1 = 4095 - yalpha;
00652     int uvalpha1 = 4095 - uvalpha;
00653     int i;
00654 
00655     for (i = 0; i < (dstW >> 1); i++) {
00656         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00657         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00658         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00659         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00660 
00661         output_pixels(i * 4, Y1, U, Y2, V);
00662     }
00663 }
00664 
00665 static av_always_inline void
00666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
00667                      const int16_t *ubuf[2], const int16_t *vbuf[2],
00668                      const int16_t *abuf0, uint8_t *dest, int dstW,
00669                      int uvalpha, int y, enum PixelFormat target)
00670 {
00671     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00672                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00673     int i;
00674 
00675     if (uvalpha < 2048) {
00676         for (i = 0; i < (dstW >> 1); i++) {
00677             int Y1 = buf0[i * 2]     >> 7;
00678             int Y2 = buf0[i * 2 + 1] >> 7;
00679             int U  = ubuf1[i]        >> 7;
00680             int V  = vbuf1[i]        >> 7;
00681 
00682             output_pixels(i * 4, Y1, U, Y2, V);
00683         }
00684     } else {
00685         for (i = 0; i < (dstW >> 1); i++) {
00686             int Y1 =  buf0[i * 2]          >> 7;
00687             int Y2 =  buf0[i * 2 + 1]      >> 7;
00688             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00689             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00690 
00691             output_pixels(i * 4, Y1, U, Y2, V);
00692         }
00693     }
00694 }
00695 
00696 #undef output_pixels
00697 
00698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
00699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
00700 
00701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
00702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
00703 #define output_pixel(pos, val) \
00704     if (isBE(target)) { \
00705         AV_WB16(pos, val); \
00706     } else { \
00707         AV_WL16(pos, val); \
00708     }
00709 
00710 static av_always_inline void
00711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
00712                        const int32_t **lumSrc, int lumFilterSize,
00713                        const int16_t *chrFilter, const int32_t **chrUSrc,
00714                        const int32_t **chrVSrc, int chrFilterSize,
00715                        const int32_t **alpSrc, uint16_t *dest, int dstW,
00716                        int y, enum PixelFormat target)
00717 {
00718     int i;
00719 
00720     for (i = 0; i < (dstW >> 1); i++) {
00721         int j;
00722         int Y1 = -0x40000000;
00723         int Y2 = -0x40000000;
00724         int U  = -128 << 23; // 19
00725         int V  = -128 << 23;
00726         int R, G, B;
00727 
00728         for (j = 0; j < lumFilterSize; j++) {
00729             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00730             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00731         }
00732         for (j = 0; j < chrFilterSize; j++) {
00733             U += chrUSrc[j][i] * chrFilter[j];
00734             V += chrVSrc[j][i] * chrFilter[j];
00735         }
00736 
00737         // 8bit: 12+15=27; 16-bit: 12+19=31
00738         Y1 >>= 14; // 10
00739         Y1 += 0x10000;
00740         Y2 >>= 14;
00741         Y2 += 0x10000;
00742         U  >>= 14;
00743         V  >>= 14;
00744 
00745         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
00746         Y1 -= c->yuv2rgb_y_offset;
00747         Y2 -= c->yuv2rgb_y_offset;
00748         Y1 *= c->yuv2rgb_y_coeff;
00749         Y2 *= c->yuv2rgb_y_coeff;
00750         Y1 += 1 << 13; // 21
00751         Y2 += 1 << 13;
00752         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
00753 
00754         R = V * c->yuv2rgb_v2r_coeff;
00755         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00756         B =                            U * c->yuv2rgb_u2b_coeff;
00757 
00758         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
00759         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00760         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00761         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00762         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00763         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00764         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00765         dest += 6;
00766     }
00767 }
00768 
00769 static av_always_inline void
00770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
00771                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00772                        const int32_t *abuf[2], uint16_t *dest, int dstW,
00773                        int yalpha, int uvalpha, int y,
00774                        enum PixelFormat target)
00775 {
00776     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
00777                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00778                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00779     int  yalpha1 = 4095 - yalpha;
00780     int uvalpha1 = 4095 - uvalpha;
00781     int i;
00782 
00783     for (i = 0; i < (dstW >> 1); i++) {
00784         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
00785         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
00786         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00787         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
00788         int R, G, B;
00789 
00790         Y1 -= c->yuv2rgb_y_offset;
00791         Y2 -= c->yuv2rgb_y_offset;
00792         Y1 *= c->yuv2rgb_y_coeff;
00793         Y2 *= c->yuv2rgb_y_coeff;
00794         Y1 += 1 << 13;
00795         Y2 += 1 << 13;
00796 
00797         R = V * c->yuv2rgb_v2r_coeff;
00798         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00799         B =                            U * c->yuv2rgb_u2b_coeff;
00800 
00801         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00802         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00803         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00804         output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00805         output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00806         output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00807         dest += 6;
00808     }
00809 }
00810 
00811 static av_always_inline void
00812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
00813                        const int32_t *ubuf[2], const int32_t *vbuf[2],
00814                        const int32_t *abuf0, uint16_t *dest, int dstW,
00815                        int uvalpha, int y, enum PixelFormat target)
00816 {
00817     const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
00818                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
00819     int i;
00820 
00821     if (uvalpha < 2048) {
00822         for (i = 0; i < (dstW >> 1); i++) {
00823             int Y1 = (buf0[i * 2]    ) >> 2;
00824             int Y2 = (buf0[i * 2 + 1]) >> 2;
00825             int U  = (ubuf0[i] + (-128 << 11)) >> 2;
00826             int V  = (vbuf0[i] + (-128 << 11)) >> 2;
00827             int R, G, B;
00828 
00829             Y1 -= c->yuv2rgb_y_offset;
00830             Y2 -= c->yuv2rgb_y_offset;
00831             Y1 *= c->yuv2rgb_y_coeff;
00832             Y2 *= c->yuv2rgb_y_coeff;
00833             Y1 += 1 << 13;
00834             Y2 += 1 << 13;
00835 
00836             R = V * c->yuv2rgb_v2r_coeff;
00837             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00838             B =                            U * c->yuv2rgb_u2b_coeff;
00839 
00840             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00841             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00842             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00843             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00844             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00845             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00846             dest += 6;
00847         }
00848     } else {
00849         for (i = 0; i < (dstW >> 1); i++) {
00850             int Y1 = (buf0[i * 2]    ) >> 2;
00851             int Y2 = (buf0[i * 2 + 1]) >> 2;
00852             int U  = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
00853             int V  = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
00854             int R, G, B;
00855 
00856             Y1 -= c->yuv2rgb_y_offset;
00857             Y2 -= c->yuv2rgb_y_offset;
00858             Y1 *= c->yuv2rgb_y_coeff;
00859             Y2 *= c->yuv2rgb_y_coeff;
00860             Y1 += 1 << 13;
00861             Y2 += 1 << 13;
00862 
00863             R = V * c->yuv2rgb_v2r_coeff;
00864             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
00865             B =                            U * c->yuv2rgb_u2b_coeff;
00866 
00867             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
00868             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
00869             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
00870             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
00871             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
00872             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
00873             dest += 6;
00874         }
00875     }
00876 }
00877 
00878 #undef output_pixel
00879 #undef r_b
00880 #undef b_r
00881 
00882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
00883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
00884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
00885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
00886 
00887 /*
00888  * Write out 2 RGB pixels in the target pixel format. This function takes a
00889  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
00890  * things like endianness conversion and shifting. The caller takes care of
00891  * setting the correct offset in these tables from the chroma (U/V) values.
00892  * This function then uses the luminance (Y1/Y2) values to write out the
00893  * correct RGB values into the destination buffer.
00894  */
00895 static av_always_inline void
00896 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
00897               unsigned A1, unsigned A2,
00898               const void *_r, const void *_g, const void *_b, int y,
00899               enum PixelFormat target, int hasAlpha)
00900 {
00901     if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
00902         target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
00903         uint32_t *dest = (uint32_t *) _dest;
00904         const uint32_t *r = (const uint32_t *) _r;
00905         const uint32_t *g = (const uint32_t *) _g;
00906         const uint32_t *b = (const uint32_t *) _b;
00907 
00908 #if CONFIG_SMALL
00909         int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
00910 
00911         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
00912         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
00913 #else
00914         if (hasAlpha) {
00915             int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
00916 
00917             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
00918             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
00919         } else {
00920             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
00921             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
00922         }
00923 #endif
00924     } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
00925         uint8_t *dest = (uint8_t *) _dest;
00926         const uint8_t *r = (const uint8_t *) _r;
00927         const uint8_t *g = (const uint8_t *) _g;
00928         const uint8_t *b = (const uint8_t *) _b;
00929 
00930 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
00931 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
00932         dest[i * 6 + 0] = r_b[Y1];
00933         dest[i * 6 + 1] =   g[Y1];
00934         dest[i * 6 + 2] = b_r[Y1];
00935         dest[i * 6 + 3] = r_b[Y2];
00936         dest[i * 6 + 4] =   g[Y2];
00937         dest[i * 6 + 5] = b_r[Y2];
00938 #undef r_b
00939 #undef b_r
00940     } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
00941                target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
00942                target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
00943         uint16_t *dest = (uint16_t *) _dest;
00944         const uint16_t *r = (const uint16_t *) _r;
00945         const uint16_t *g = (const uint16_t *) _g;
00946         const uint16_t *b = (const uint16_t *) _b;
00947         int dr1, dg1, db1, dr2, dg2, db2;
00948 
00949         if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
00950             dr1 = dither_2x2_8[ y & 1     ][0];
00951             dg1 = dither_2x2_4[ y & 1     ][0];
00952             db1 = dither_2x2_8[(y & 1) ^ 1][0];
00953             dr2 = dither_2x2_8[ y & 1     ][1];
00954             dg2 = dither_2x2_4[ y & 1     ][1];
00955             db2 = dither_2x2_8[(y & 1) ^ 1][1];
00956         } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
00957             dr1 = dither_2x2_8[ y & 1     ][0];
00958             dg1 = dither_2x2_8[ y & 1     ][1];
00959             db1 = dither_2x2_8[(y & 1) ^ 1][0];
00960             dr2 = dither_2x2_8[ y & 1     ][1];
00961             dg2 = dither_2x2_8[ y & 1     ][0];
00962             db2 = dither_2x2_8[(y & 1) ^ 1][1];
00963         } else {
00964             dr1 = dither_4x4_16[ y & 3     ][0];
00965             dg1 = dither_4x4_16[ y & 3     ][1];
00966             db1 = dither_4x4_16[(y & 3) ^ 3][0];
00967             dr2 = dither_4x4_16[ y & 3     ][1];
00968             dg2 = dither_4x4_16[ y & 3     ][0];
00969             db2 = dither_4x4_16[(y & 3) ^ 3][1];
00970         }
00971 
00972         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
00973         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
00974     } else /* 8/4-bit */ {
00975         uint8_t *dest = (uint8_t *) _dest;
00976         const uint8_t *r = (const uint8_t *) _r;
00977         const uint8_t *g = (const uint8_t *) _g;
00978         const uint8_t *b = (const uint8_t *) _b;
00979         int dr1, dg1, db1, dr2, dg2, db2;
00980 
00981         if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
00982             const uint8_t * const d64 = dither_8x8_73[y & 7];
00983             const uint8_t * const d32 = dither_8x8_32[y & 7];
00984             dr1 = dg1 = d32[(i * 2 + 0) & 7];
00985             db1 =       d64[(i * 2 + 0) & 7];
00986             dr2 = dg2 = d32[(i * 2 + 1) & 7];
00987             db2 =       d64[(i * 2 + 1) & 7];
00988         } else {
00989             const uint8_t * const d64  = dither_8x8_73 [y & 7];
00990             const uint8_t * const d128 = dither_8x8_220[y & 7];
00991             dr1 = db1 = d128[(i * 2 + 0) & 7];
00992             dg1 =        d64[(i * 2 + 0) & 7];
00993             dr2 = db2 = d128[(i * 2 + 1) & 7];
00994             dg2 =        d64[(i * 2 + 1) & 7];
00995         }
00996 
00997         if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
00998             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
00999                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
01000         } else {
01001             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
01002             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
01003         }
01004     }
01005 }
01006 
01007 static av_always_inline void
01008 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
01009                      const int16_t **lumSrc, int lumFilterSize,
01010                      const int16_t *chrFilter, const int16_t **chrUSrc,
01011                      const int16_t **chrVSrc, int chrFilterSize,
01012                      const int16_t **alpSrc, uint8_t *dest, int dstW,
01013                      int y, enum PixelFormat target, int hasAlpha)
01014 {
01015     int i;
01016 
01017     for (i = 0; i < (dstW >> 1); i++) {
01018         int j;
01019         int Y1 = 1 << 18;
01020         int Y2 = 1 << 18;
01021         int U  = 1 << 18;
01022         int V  = 1 << 18;
01023         int av_unused A1, A2;
01024         const void *r, *g, *b;
01025 
01026         for (j = 0; j < lumFilterSize; j++) {
01027             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
01028             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
01029         }
01030         for (j = 0; j < chrFilterSize; j++) {
01031             U += chrUSrc[j][i] * chrFilter[j];
01032             V += chrVSrc[j][i] * chrFilter[j];
01033         }
01034         Y1 >>= 19;
01035         Y2 >>= 19;
01036         U  >>= 19;
01037         V  >>= 19;
01038         if ((Y1 | Y2 | U | V) & 0x100) {
01039             Y1 = av_clip_uint8(Y1);
01040             Y2 = av_clip_uint8(Y2);
01041             U  = av_clip_uint8(U);
01042             V  = av_clip_uint8(V);
01043         }
01044         if (hasAlpha) {
01045             A1 = 1 << 18;
01046             A2 = 1 << 18;
01047             for (j = 0; j < lumFilterSize; j++) {
01048                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
01049                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
01050             }
01051             A1 >>= 19;
01052             A2 >>= 19;
01053             if ((A1 | A2) & 0x100) {
01054                 A1 = av_clip_uint8(A1);
01055                 A2 = av_clip_uint8(A2);
01056             }
01057         }
01058 
01059         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
01060         r =  c->table_rV[V];
01061         g = (c->table_gU[U] + c->table_gV[V]);
01062         b =  c->table_bU[U];
01063 
01064         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01065                       r, g, b, y, target, hasAlpha);
01066     }
01067 }
01068 
01069 static av_always_inline void
01070 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
01071                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01072                      const int16_t *abuf[2], uint8_t *dest, int dstW,
01073                      int yalpha, int uvalpha, int y,
01074                      enum PixelFormat target, int hasAlpha)
01075 {
01076     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
01077                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01078                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
01079                   *abuf0 = hasAlpha ? abuf[0] : NULL,
01080                   *abuf1 = hasAlpha ? abuf[1] : NULL;
01081     int  yalpha1 = 4095 - yalpha;
01082     int uvalpha1 = 4095 - uvalpha;
01083     int i;
01084 
01085     for (i = 0; i < (dstW >> 1); i++) {
01086         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
01087         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
01088         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
01089         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
01090         int A1, A2;
01091         const void *r =  c->table_rV[V],
01092                    *g = (c->table_gU[U] + c->table_gV[V]),
01093                    *b =  c->table_bU[U];
01094 
01095         if (hasAlpha) {
01096             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
01097             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
01098         }
01099 
01100         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01101                       r, g, b, y, target, hasAlpha);
01102     }
01103 }
01104 
01105 static av_always_inline void
01106 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
01107                      const int16_t *ubuf[2], const int16_t *vbuf[2],
01108                      const int16_t *abuf0, uint8_t *dest, int dstW,
01109                      int uvalpha, int y, enum PixelFormat target,
01110                      int hasAlpha)
01111 {
01112     const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
01113                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
01114     int i;
01115 
01116     if (uvalpha < 2048) {
01117         for (i = 0; i < (dstW >> 1); i++) {
01118             int Y1 = buf0[i * 2]     >> 7;
01119             int Y2 = buf0[i * 2 + 1] >> 7;
01120             int U  = ubuf1[i]        >> 7;
01121             int V  = vbuf1[i]        >> 7;
01122             int A1, A2;
01123             const void *r =  c->table_rV[V],
01124                        *g = (c->table_gU[U] + c->table_gV[V]),
01125                        *b =  c->table_bU[U];
01126 
01127             if (hasAlpha) {
01128                 A1 = abuf0[i * 2    ] >> 7;
01129                 A2 = abuf0[i * 2 + 1] >> 7;
01130             }
01131 
01132             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01133                           r, g, b, y, target, hasAlpha);
01134         }
01135     } else {
01136         for (i = 0; i < (dstW >> 1); i++) {
01137             int Y1 =  buf0[i * 2]          >> 7;
01138             int Y2 =  buf0[i * 2 + 1]      >> 7;
01139             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
01140             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
01141             int A1, A2;
01142             const void *r =  c->table_rV[V],
01143                        *g = (c->table_gU[U] + c->table_gV[V]),
01144                        *b =  c->table_bU[U];
01145 
01146             if (hasAlpha) {
01147                 A1 = abuf0[i * 2    ] >> 7;
01148                 A2 = abuf0[i * 2 + 1] >> 7;
01149             }
01150 
01151             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
01152                           r, g, b, y, target, hasAlpha);
01153         }
01154     }
01155 }
01156 
01157 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01158 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
01159                                 const int16_t **lumSrc, int lumFilterSize, \
01160                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
01161                                 const int16_t **chrVSrc, int chrFilterSize, \
01162                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
01163                                 int y) \
01164 { \
01165     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
01166                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
01167                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
01168 }
01169 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
01170 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
01171 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
01172                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01173                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
01174                                 int yalpha, int uvalpha, int y) \
01175 { \
01176     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
01177                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
01178 } \
01179  \
01180 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
01181                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
01182                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
01183                                 int uvalpha, int y) \
01184 { \
01185     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
01186                                   dstW, uvalpha, y, fmt, hasAlpha); \
01187 }
01188 
01189 #if CONFIG_SMALL
01190 YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01191 YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01192 #else
01193 #if CONFIG_SWSCALE_ALPHA
01194 YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1)
01195 YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1)
01196 #endif
01197 YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0)
01198 YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0)
01199 #endif
01200 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0)
01201 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0)
01202 YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0)
01203 YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0)
01204 YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0)
01205 YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0)
01206 YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0)
01207 YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0)
01208 
01209 static av_always_inline void
01210 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
01211                           const int16_t **lumSrc, int lumFilterSize,
01212                           const int16_t *chrFilter, const int16_t **chrUSrc,
01213                           const int16_t **chrVSrc, int chrFilterSize,
01214                           const int16_t **alpSrc, uint8_t *dest,
01215                           int dstW, int y, enum PixelFormat target, int hasAlpha)
01216 {
01217     int i;
01218     int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
01219 
01220     for (i = 0; i < dstW; i++) {
01221         int j;
01222         int Y = 0;
01223         int U = -128 << 19;
01224         int V = -128 << 19;
01225         int av_unused A;
01226         int R, G, B;
01227 
01228         for (j = 0; j < lumFilterSize; j++) {
01229             Y += lumSrc[j][i] * lumFilter[j];
01230         }
01231         for (j = 0; j < chrFilterSize; j++) {
01232             U += chrUSrc[j][i] * chrFilter[j];
01233             V += chrVSrc[j][i] * chrFilter[j];
01234         }
01235         Y >>= 10;
01236         U >>= 10;
01237         V >>= 10;
01238         if (hasAlpha) {
01239             A = 1 << 21;
01240             for (j = 0; j < lumFilterSize; j++) {
01241                 A += alpSrc[j][i] * lumFilter[j];
01242             }
01243             A >>= 19;
01244             if (A & 0x100)
01245                 A = av_clip_uint8(A);
01246         }
01247         Y -= c->yuv2rgb_y_offset;
01248         Y *= c->yuv2rgb_y_coeff;
01249         Y += 1 << 21;
01250         R = Y + V*c->yuv2rgb_v2r_coeff;
01251         G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
01252         B = Y +                          U*c->yuv2rgb_u2b_coeff;
01253         if ((R | G | B) & 0xC0000000) {
01254             R = av_clip_uintp2(R, 30);
01255             G = av_clip_uintp2(G, 30);
01256             B = av_clip_uintp2(B, 30);
01257         }
01258 
01259         switch(target) {
01260         case PIX_FMT_ARGB:
01261             dest[0] = hasAlpha ? A : 255;
01262             dest[1] = R >> 22;
01263             dest[2] = G >> 22;
01264             dest[3] = B >> 22;
01265             break;
01266         case PIX_FMT_RGB24:
01267             dest[0] = R >> 22;
01268             dest[1] = G >> 22;
01269             dest[2] = B >> 22;
01270             break;
01271         case PIX_FMT_RGBA:
01272             dest[0] = R >> 22;
01273             dest[1] = G >> 22;
01274             dest[2] = B >> 22;
01275             dest[3] = hasAlpha ? A : 255;
01276             break;
01277         case PIX_FMT_ABGR:
01278             dest[0] = hasAlpha ? A : 255;
01279             dest[1] = B >> 22;
01280             dest[2] = G >> 22;
01281             dest[3] = R >> 22;
01282             dest += 4;
01283             break;
01284         case PIX_FMT_BGR24:
01285             dest[0] = B >> 22;
01286             dest[1] = G >> 22;
01287             dest[2] = R >> 22;
01288             break;
01289         case PIX_FMT_BGRA:
01290             dest[0] = B >> 22;
01291             dest[1] = G >> 22;
01292             dest[2] = R >> 22;
01293             dest[3] = hasAlpha ? A : 255;
01294             break;
01295         }
01296         dest += step;
01297     }
01298 }
01299 
01300 #if CONFIG_SMALL
01301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01302 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01303 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01304 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
01305 #else
01306 #if CONFIG_SWSCALE_ALPHA
01307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  1)
01308 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  1)
01309 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  1)
01310 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  1)
01311 #endif
01312 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA,  0)
01313 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR,  0)
01314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA,  0)
01315 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0)
01316 #endif
01317 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0)
01318 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0)
01319 
01320 static av_always_inline void fillPlane(uint8_t* plane, int stride,
01321                                        int width, int height,
01322                                        int y, uint8_t val)
01323 {
01324     int i;
01325     uint8_t *ptr = plane + stride*y;
01326     for (i=0; i<height; i++) {
01327         memset(ptr, val, width);
01328         ptr += stride;
01329     }
01330 }
01331 
01332 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01333 
01334 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
01335 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
01336 
01337 static av_always_inline void
01338 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
01339                     enum PixelFormat origin)
01340 {
01341     int i;
01342     for (i = 0; i < width; i++) {
01343         unsigned int r_b = input_pixel(&src[i*3+0]);
01344         unsigned int   g = input_pixel(&src[i*3+1]);
01345         unsigned int b_r = input_pixel(&src[i*3+2]);
01346 
01347         dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01348     }
01349 }
01350 
01351 static av_always_inline void
01352 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
01353                     const uint16_t *src1, const uint16_t *src2,
01354                     int width, enum PixelFormat origin)
01355 {
01356     int i;
01357     assert(src1==src2);
01358     for (i = 0; i < width; i++) {
01359         int r_b = input_pixel(&src1[i*3+0]);
01360         int   g = input_pixel(&src1[i*3+1]);
01361         int b_r = input_pixel(&src1[i*3+2]);
01362 
01363         dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01364         dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01365     }
01366 }
01367 
01368 static av_always_inline void
01369 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
01370                           const uint16_t *src1, const uint16_t *src2,
01371                           int width, enum PixelFormat origin)
01372 {
01373     int i;
01374     assert(src1==src2);
01375     for (i = 0; i < width; i++) {
01376         int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
01377         int   g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
01378         int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
01379 
01380         dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01381         dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
01382     }
01383 }
01384 
01385 #undef r
01386 #undef b
01387 #undef input_pixel
01388 
01389 #define rgb48funcs(pattern, BE_LE, origin) \
01390 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
01391                                     int width, uint32_t *unused) \
01392 { \
01393     const uint16_t *src = (const uint16_t *) _src; \
01394     uint16_t *dst = (uint16_t *) _dst; \
01395     rgb48ToY_c_template(dst, src, width, origin); \
01396 } \
01397  \
01398 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
01399                                     const uint8_t *_src1, const uint8_t *_src2, \
01400                                     int width, uint32_t *unused) \
01401 { \
01402     const uint16_t *src1 = (const uint16_t *) _src1, \
01403                    *src2 = (const uint16_t *) _src2; \
01404     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01405     rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
01406 } \
01407  \
01408 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
01409                                     const uint8_t *_src1, const uint8_t *_src2, \
01410                                     int width, uint32_t *unused) \
01411 { \
01412     const uint16_t *src1 = (const uint16_t *) _src1, \
01413                    *src2 = (const uint16_t *) _src2; \
01414     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
01415     rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
01416 }
01417 
01418 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
01419 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
01420 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
01421 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
01422 
01423 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
01424                          origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
01425                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
01426 
01427 static av_always_inline void
01428 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
01429                        int width, enum PixelFormat origin,
01430                        int shr,   int shg,   int shb, int shp,
01431                        int maskr, int maskg, int maskb,
01432                        int rsh,   int gsh,   int bsh, int S)
01433 {
01434     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
01435     const unsigned rnd = 33u << (S - 1);
01436     int i;
01437 
01438     for (i = 0; i < width; i++) {
01439         int px = input_pixel(i) >> shp;
01440         int b = (px & maskb) >> shb;
01441         int g = (px & maskg) >> shg;
01442         int r = (px & maskr) >> shr;
01443 
01444         dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
01445     }
01446 }
01447 
01448 static av_always_inline void
01449 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
01450                         const uint8_t *src, int width,
01451                         enum PixelFormat origin,
01452                         int shr,   int shg,   int shb, int shp,
01453                         int maskr, int maskg, int maskb,
01454                         int rsh,   int gsh,   int bsh, int S)
01455 {
01456     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01457               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
01458     const unsigned rnd = 257u << (S - 1);
01459     int i;
01460 
01461     for (i = 0; i < width; i++) {
01462         int px = input_pixel(i) >> shp;
01463         int b = (px & maskb) >> shb;
01464         int g = (px & maskg) >> shg;
01465         int r = (px & maskr) >> shr;
01466 
01467         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
01468         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
01469     }
01470 }
01471 
01472 static av_always_inline void
01473 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
01474                              const uint8_t *src, int width,
01475                              enum PixelFormat origin,
01476                              int shr,   int shg,   int shb, int shp,
01477                              int maskr, int maskg, int maskb,
01478                              int rsh,   int gsh,   int bsh, int S)
01479 {
01480     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01481               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01482               maskgx = ~(maskr | maskb);
01483     const unsigned rnd = 257u << S;
01484     int i;
01485 
01486     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
01487     for (i = 0; i < width; i++) {
01488         int px0 = input_pixel(2 * i + 0) >> shp;
01489         int px1 = input_pixel(2 * i + 1) >> shp;
01490         int b, r, g = (px0 & maskgx) + (px1 & maskgx);
01491         int rb = px0 + px1 - g;
01492 
01493         b = (rb & maskb) >> shb;
01494         if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
01495             origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
01496             g >>= shg;
01497         } else {
01498             g = (g  & maskg) >> shg;
01499         }
01500         r = (rb & maskr) >> shr;
01501 
01502         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
01503         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
01504     }
01505 }
01506 
01507 #undef input_pixel
01508 
01509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
01510                          maskg, maskb, rsh, gsh, bsh, S) \
01511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
01512                           int width, uint32_t *unused) \
01513 { \
01514     rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
01515                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
01516 } \
01517  \
01518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01519                            const uint8_t *src, const uint8_t *dummy, \
01520                            int width, uint32_t *unused) \
01521 { \
01522     rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01523                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
01524 } \
01525  \
01526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01527                                 const uint8_t *src, const uint8_t *dummy, \
01528                                 int width, uint32_t *unused) \
01529 { \
01530     rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01531                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
01532 }
01533 
01534 rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
01535 rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
01536 rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
01537 rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
01538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
01539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
01540 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
01541 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
01542 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
01543 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
01544 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
01545 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
01546 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
01547 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
01548 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
01549 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
01550 
01551 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
01552 {
01553     int i;
01554     for (i=0; i<width; i++) {
01555         dst[i]= src[4*i];
01556     }
01557 }
01558 
01559 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
01560 {
01561     int i;
01562     for (i=0; i<width; i++) {
01563         dst[i]= src[4*i+3];
01564     }
01565 }
01566 
01567 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
01568 {
01569     int i;
01570     for (i=0; i<width; i++) {
01571         int d= src[i];
01572 
01573         dst[i]= pal[d] & 0xFF;
01574     }
01575 }
01576 
01577 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
01578                       const uint8_t *src1, const uint8_t *src2,
01579                       int width, uint32_t *pal)
01580 {
01581     int i;
01582     assert(src1 == src2);
01583     for (i=0; i<width; i++) {
01584         int p= pal[src1[i]];
01585 
01586         dstU[i]= p>>8;
01587         dstV[i]= p>>16;
01588     }
01589 }
01590 
01591 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
01592                           int width, uint32_t *unused)
01593 {
01594     int i, j;
01595     for (i=0; i<width/8; i++) {
01596         int d= ~src[i];
01597         for(j=0; j<8; j++)
01598             dst[8*i+j]= ((d>>(7-j))&1)*255;
01599     }
01600 }
01601 
01602 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
01603                           int width, uint32_t *unused)
01604 {
01605     int i, j;
01606     for (i=0; i<width/8; i++) {
01607         int d= src[i];
01608         for(j=0; j<8; j++)
01609             dst[8*i+j]= ((d>>(7-j))&1)*255;
01610     }
01611 }
01612 
01613 //FIXME yuy2* can read up to 7 samples too much
01614 
01615 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
01616                       uint32_t *unused)
01617 {
01618     int i;
01619     for (i=0; i<width; i++)
01620         dst[i]= src[2*i];
01621 }
01622 
01623 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01624                        const uint8_t *src2, int width, uint32_t *unused)
01625 {
01626     int i;
01627     for (i=0; i<width; i++) {
01628         dstU[i]= src1[4*i + 1];
01629         dstV[i]= src1[4*i + 3];
01630     }
01631     assert(src1 == src2);
01632 }
01633 
01634 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
01635 {
01636     int i;
01637     const uint16_t *src = (const uint16_t *) _src;
01638     uint16_t *dst = (uint16_t *) _dst;
01639     for (i=0; i<width; i++) {
01640         dst[i] = av_bswap16(src[i]);
01641     }
01642 }
01643 
01644 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
01645                         const uint8_t *_src2, int width, uint32_t *unused)
01646 {
01647     int i;
01648     const uint16_t *src1 = (const uint16_t *) _src1,
01649                    *src2 = (const uint16_t *) _src2;
01650     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
01651     for (i=0; i<width; i++) {
01652         dstU[i] = av_bswap16(src1[i]);
01653         dstV[i] = av_bswap16(src2[i]);
01654     }
01655 }
01656 
01657 /* This is almost identical to the previous, end exists only because
01658  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
01659 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
01660                       uint32_t *unused)
01661 {
01662     int i;
01663     for (i=0; i<width; i++)
01664         dst[i]= src[2*i+1];
01665 }
01666 
01667 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01668                        const uint8_t *src2, int width, uint32_t *unused)
01669 {
01670     int i;
01671     for (i=0; i<width; i++) {
01672         dstU[i]= src1[4*i + 0];
01673         dstV[i]= src1[4*i + 2];
01674     }
01675     assert(src1 == src2);
01676 }
01677 
01678 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
01679                                         const uint8_t *src, int width)
01680 {
01681     int i;
01682     for (i = 0; i < width; i++) {
01683         dst1[i] = src[2*i+0];
01684         dst2[i] = src[2*i+1];
01685     }
01686 }
01687 
01688 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
01689                        const uint8_t *src1, const uint8_t *src2,
01690                        int width, uint32_t *unused)
01691 {
01692     nvXXtoUV_c(dstU, dstV, src1, width);
01693 }
01694 
01695 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
01696                        const uint8_t *src1, const uint8_t *src2,
01697                        int width, uint32_t *unused)
01698 {
01699     nvXXtoUV_c(dstV, dstU, src1, width);
01700 }
01701 
01702 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01703 
01704 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
01705                        int width, uint32_t *unused)
01706 {
01707     int i;
01708     for (i=0; i<width; i++) {
01709         int b= src[i*3+0];
01710         int g= src[i*3+1];
01711         int r= src[i*3+2];
01712 
01713         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01714     }
01715 }
01716 
01717 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01718                         const uint8_t *src2, int width, uint32_t *unused)
01719 {
01720     int i;
01721     for (i=0; i<width; i++) {
01722         int b= src1[3*i + 0];
01723         int g= src1[3*i + 1];
01724         int r= src1[3*i + 2];
01725 
01726         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01727         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01728     }
01729     assert(src1 == src2);
01730 }
01731 
01732 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01733                              const uint8_t *src2, int width, uint32_t *unused)
01734 {
01735     int i;
01736     for (i=0; i<width; i++) {
01737         int b= src1[6*i + 0] + src1[6*i + 3];
01738         int g= src1[6*i + 1] + src1[6*i + 4];
01739         int r= src1[6*i + 2] + src1[6*i + 5];
01740 
01741         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01742         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01743     }
01744     assert(src1 == src2);
01745 }
01746 
01747 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
01748                        uint32_t *unused)
01749 {
01750     int i;
01751     for (i=0; i<width; i++) {
01752         int r= src[i*3+0];
01753         int g= src[i*3+1];
01754         int b= src[i*3+2];
01755 
01756         dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
01757     }
01758 }
01759 
01760 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01761                         const uint8_t *src2, int width, uint32_t *unused)
01762 {
01763     int i;
01764     assert(src1==src2);
01765     for (i=0; i<width; i++) {
01766         int r= src1[3*i + 0];
01767         int g= src1[3*i + 1];
01768         int b= src1[3*i + 2];
01769 
01770         dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01771         dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
01772     }
01773 }
01774 
01775 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01776                              const uint8_t *src2, int width, uint32_t *unused)
01777 {
01778     int i;
01779     assert(src1==src2);
01780     for (i=0; i<width; i++) {
01781         int r= src1[6*i + 0] + src1[6*i + 3];
01782         int g= src1[6*i + 1] + src1[6*i + 4];
01783         int b= src1[6*i + 2] + src1[6*i + 5];
01784 
01785         dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01786         dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
01787     }
01788 }
01789 
01790 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
01791 {
01792     int i;
01793     for (i = 0; i < width; i++) {
01794         int g = src[0][i];
01795         int b = src[1][i];
01796         int r = src[2][i];
01797 
01798         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01799     }
01800 }
01801 
01802 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01803 {
01804     int i;
01805     const uint16_t **src = (const uint16_t **) _src;
01806     uint16_t *dst = (uint16_t *) _dst;
01807     for (i = 0; i < width; i++) {
01808         int g = AV_RL16(src[0] + i);
01809         int b = AV_RL16(src[1] + i);
01810         int r = AV_RL16(src[2] + i);
01811 
01812         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01813     }
01814 }
01815 
01816 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
01817 {
01818     int i;
01819     const uint16_t **src = (const uint16_t **) _src;
01820     uint16_t *dst = (uint16_t *) _dst;
01821     for (i = 0; i < width; i++) {
01822         int g = AV_RB16(src[0] + i);
01823         int b = AV_RB16(src[1] + i);
01824         int r = AV_RB16(src[2] + i);
01825 
01826         dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
01827     }
01828 }
01829 
01830 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
01831 {
01832     int i;
01833     for (i = 0; i < width; i++) {
01834         int g = src[0][i];
01835         int b = src[1][i];
01836         int r = src[2][i];
01837 
01838         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01839         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01840     }
01841 }
01842 
01843 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
01844 {
01845     int i;
01846     const uint16_t **src = (const uint16_t **) _src;
01847     uint16_t *dstU = (uint16_t *) _dstU;
01848     uint16_t *dstV = (uint16_t *) _dstV;
01849     for (i = 0; i < width; i++) {
01850         int g = AV_RL16(src[0] + i);
01851         int b = AV_RL16(src[1] + i);
01852         int r = AV_RL16(src[2] + i);
01853 
01854         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01855         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01856     }
01857 }
01858 
01859 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
01860 {
01861     int i;
01862     const uint16_t **src = (const uint16_t **) _src;
01863     uint16_t *dstU = (uint16_t *) _dstU;
01864     uint16_t *dstV = (uint16_t *) _dstV;
01865     for (i = 0; i < width; i++) {
01866         int g = AV_RB16(src[0] + i);
01867         int b = AV_RB16(src[1] + i);
01868         int r = AV_RB16(src[2] + i);
01869 
01870         dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01871         dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
01872     }
01873 }
01874 
01875 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
01876                            const int16_t *filter,
01877                            const int32_t *filterPos, int filterSize)
01878 {
01879     int i;
01880     int32_t *dst = (int32_t *) _dst;
01881     const uint16_t *src = (const uint16_t *) _src;
01882     int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
01883     int sh = bits - 4;
01884 
01885     for (i = 0; i < dstW; i++) {
01886         int j;
01887         int srcPos = filterPos[i];
01888         int val = 0;
01889 
01890         for (j = 0; j < filterSize; j++) {
01891             val += src[srcPos + j] * filter[filterSize * i + j];
01892         }
01893         // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
01894         dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
01895     }
01896 }
01897 
01898 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
01899                            const int16_t *filter,
01900                            const int32_t *filterPos, int filterSize)
01901 {
01902     int i;
01903     const uint16_t *src = (const uint16_t *) _src;
01904     int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
01905 
01906     for (i = 0; i < dstW; i++) {
01907         int j;
01908         int srcPos = filterPos[i];
01909         int val = 0;
01910 
01911         for (j = 0; j < filterSize; j++) {
01912             val += src[srcPos + j] * filter[filterSize * i + j];
01913         }
01914         // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
01915         dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
01916     }
01917 }
01918 
01919 // bilinear / bicubic scaling
01920 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
01921                           const int16_t *filter, const int32_t *filterPos,
01922                           int filterSize)
01923 {
01924     int i;
01925     for (i=0; i<dstW; i++) {
01926         int j;
01927         int srcPos= filterPos[i];
01928         int val=0;
01929         for (j=0; j<filterSize; j++) {
01930             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
01931         }
01932         //filter += hFilterSize;
01933         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
01934         //dst[i] = val>>7;
01935     }
01936 }
01937 
01938 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
01939                           const int16_t *filter, const int32_t *filterPos,
01940                           int filterSize)
01941 {
01942     int i;
01943     int32_t *dst = (int32_t *) _dst;
01944     for (i=0; i<dstW; i++) {
01945         int j;
01946         int srcPos= filterPos[i];
01947         int val=0;
01948         for (j=0; j<filterSize; j++) {
01949             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
01950         }
01951         //filter += hFilterSize;
01952         dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
01953         //dst[i] = val>>7;
01954     }
01955 }
01956 
01957 //FIXME all pal and rgb srcFormats could do this convertion as well
01958 //FIXME all scalers more complex than bilinear could do half of this transform
01959 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01960 {
01961     int i;
01962     for (i = 0; i < width; i++) {
01963         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
01964         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
01965     }
01966 }
01967 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01968 {
01969     int i;
01970     for (i = 0; i < width; i++) {
01971         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
01972         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
01973     }
01974 }
01975 static void lumRangeToJpeg_c(int16_t *dst, int width)
01976 {
01977     int i;
01978     for (i = 0; i < width; i++)
01979         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
01980 }
01981 static void lumRangeFromJpeg_c(int16_t *dst, int width)
01982 {
01983     int i;
01984     for (i = 0; i < width; i++)
01985         dst[i] = (dst[i]*14071 + 33561947)>>14;
01986 }
01987 
01988 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
01989 {
01990     int i;
01991     int32_t *dstU = (int32_t *) _dstU;
01992     int32_t *dstV = (int32_t *) _dstV;
01993     for (i = 0; i < width; i++) {
01994         dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
01995         dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
01996     }
01997 }
01998 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
01999 {
02000     int i;
02001     int32_t *dstU = (int32_t *) _dstU;
02002     int32_t *dstV = (int32_t *) _dstV;
02003     for (i = 0; i < width; i++) {
02004         dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
02005         dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
02006     }
02007 }
02008 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
02009 {
02010     int i;
02011     int32_t *dst = (int32_t *) _dst;
02012     for (i = 0; i < width; i++)
02013         dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
02014 }
02015 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
02016 {
02017     int i;
02018     int32_t *dst = (int32_t *) _dst;
02019     for (i = 0; i < width; i++)
02020         dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
02021 }
02022 
02023 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
02024                            const uint8_t *src, int srcW, int xInc)
02025 {
02026     int i;
02027     unsigned int xpos=0;
02028     for (i=0;i<dstWidth;i++) {
02029         register unsigned int xx=xpos>>16;
02030         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02031         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02032         xpos+=xInc;
02033     }
02034 }
02035 
02036 // *** horizontal scale Y line to temp buffer
02037 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
02038                                      const uint8_t *src_in[4], int srcW, int xInc,
02039                                      const int16_t *hLumFilter,
02040                                      const int32_t *hLumFilterPos, int hLumFilterSize,
02041                                      uint8_t *formatConvBuffer,
02042                                      uint32_t *pal, int isAlpha)
02043 {
02044     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
02045     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
02046     const uint8_t *src = src_in[isAlpha ? 3 : 0];
02047 
02048     if (toYV12) {
02049         toYV12(formatConvBuffer, src, srcW, pal);
02050         src= formatConvBuffer;
02051     } else if (c->readLumPlanar && !isAlpha) {
02052         c->readLumPlanar(formatConvBuffer, src_in, srcW);
02053         src = formatConvBuffer;
02054     }
02055 
02056     if (!c->hyscale_fast) {
02057         c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
02058     } else { // fast bilinear upscale / crap downscale
02059         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
02060     }
02061 
02062     if (convertRange)
02063         convertRange(dst, dstWidth);
02064 }
02065 
02066 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
02067                            int dstWidth, const uint8_t *src1,
02068                            const uint8_t *src2, int srcW, int xInc)
02069 {
02070     int i;
02071     unsigned int xpos=0;
02072     for (i=0;i<dstWidth;i++) {
02073         register unsigned int xx=xpos>>16;
02074         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02075         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02076         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02077         xpos+=xInc;
02078     }
02079 }
02080 
02081 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
02082                                      const uint8_t *src_in[4],
02083                                      int srcW, int xInc, const int16_t *hChrFilter,
02084                                      const int32_t *hChrFilterPos, int hChrFilterSize,
02085                                      uint8_t *formatConvBuffer, uint32_t *pal)
02086 {
02087     const uint8_t *src1 = src_in[1], *src2 = src_in[2];
02088     if (c->chrToYV12) {
02089         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
02090         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
02091         src1= formatConvBuffer;
02092         src2= buf2;
02093     } else if (c->readChrPlanar) {
02094         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
02095         c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
02096         src1= formatConvBuffer;
02097         src2= buf2;
02098     }
02099 
02100     if (!c->hcscale_fast) {
02101         c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
02102         c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
02103     } else { // fast bilinear upscale / crap downscale
02104         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
02105     }
02106 
02107     if (c->chrConvertRange)
02108         c->chrConvertRange(dst1, dst2, dstWidth);
02109 }
02110 
02111 static av_always_inline void
02112 find_c_packed_planar_out_funcs(SwsContext *c,
02113                                yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
02114                                yuv2interleavedX_fn *yuv2nv12cX,
02115                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
02116                                yuv2packedX_fn *yuv2packedX)
02117 {
02118     enum PixelFormat dstFormat = c->dstFormat;
02119 
02120     if (is16BPS(dstFormat)) {
02121         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
02122         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
02123     } else if (is9_OR_10BPS(dstFormat)) {
02124         if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
02125             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
02126             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
02127         } else {
02128             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
02129             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
02130         }
02131     } else {
02132         *yuv2plane1 = yuv2plane1_8_c;
02133         *yuv2planeX = yuv2planeX_8_c;
02134         if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
02135             *yuv2nv12cX = yuv2nv12cX_c;
02136     }
02137 
02138     if(c->flags & SWS_FULL_CHR_H_INT) {
02139         switch (dstFormat) {
02140             case PIX_FMT_RGBA:
02141 #if CONFIG_SMALL
02142                 *yuv2packedX = yuv2rgba32_full_X_c;
02143 #else
02144 #if CONFIG_SWSCALE_ALPHA
02145                 if (c->alpPixBuf) {
02146                     *yuv2packedX = yuv2rgba32_full_X_c;
02147                 } else
02148 #endif /* CONFIG_SWSCALE_ALPHA */
02149                 {
02150                     *yuv2packedX = yuv2rgbx32_full_X_c;
02151                 }
02152 #endif /* !CONFIG_SMALL */
02153                 break;
02154             case PIX_FMT_ARGB:
02155 #if CONFIG_SMALL
02156                 *yuv2packedX = yuv2argb32_full_X_c;
02157 #else
02158 #if CONFIG_SWSCALE_ALPHA
02159                 if (c->alpPixBuf) {
02160                     *yuv2packedX = yuv2argb32_full_X_c;
02161                 } else
02162 #endif /* CONFIG_SWSCALE_ALPHA */
02163                 {
02164                     *yuv2packedX = yuv2xrgb32_full_X_c;
02165                 }
02166 #endif /* !CONFIG_SMALL */
02167                 break;
02168             case PIX_FMT_BGRA:
02169 #if CONFIG_SMALL
02170                 *yuv2packedX = yuv2bgra32_full_X_c;
02171 #else
02172 #if CONFIG_SWSCALE_ALPHA
02173                 if (c->alpPixBuf) {
02174                     *yuv2packedX = yuv2bgra32_full_X_c;
02175                 } else
02176 #endif /* CONFIG_SWSCALE_ALPHA */
02177                 {
02178                     *yuv2packedX = yuv2bgrx32_full_X_c;
02179                 }
02180 #endif /* !CONFIG_SMALL */
02181                 break;
02182             case PIX_FMT_ABGR:
02183 #if CONFIG_SMALL
02184                 *yuv2packedX = yuv2abgr32_full_X_c;
02185 #else
02186 #if CONFIG_SWSCALE_ALPHA
02187                 if (c->alpPixBuf) {
02188                     *yuv2packedX = yuv2abgr32_full_X_c;
02189                 } else
02190 #endif /* CONFIG_SWSCALE_ALPHA */
02191                 {
02192                     *yuv2packedX = yuv2xbgr32_full_X_c;
02193                 }
02194 #endif /* !CONFIG_SMALL */
02195                 break;
02196             case PIX_FMT_RGB24:
02197             *yuv2packedX = yuv2rgb24_full_X_c;
02198             break;
02199         case PIX_FMT_BGR24:
02200             *yuv2packedX = yuv2bgr24_full_X_c;
02201             break;
02202         }
02203     } else {
02204         switch (dstFormat) {
02205         case PIX_FMT_RGB48LE:
02206             *yuv2packed1 = yuv2rgb48le_1_c;
02207             *yuv2packed2 = yuv2rgb48le_2_c;
02208             *yuv2packedX = yuv2rgb48le_X_c;
02209             break;
02210         case PIX_FMT_RGB48BE:
02211             *yuv2packed1 = yuv2rgb48be_1_c;
02212             *yuv2packed2 = yuv2rgb48be_2_c;
02213             *yuv2packedX = yuv2rgb48be_X_c;
02214             break;
02215         case PIX_FMT_BGR48LE:
02216             *yuv2packed1 = yuv2bgr48le_1_c;
02217             *yuv2packed2 = yuv2bgr48le_2_c;
02218             *yuv2packedX = yuv2bgr48le_X_c;
02219             break;
02220         case PIX_FMT_BGR48BE:
02221             *yuv2packed1 = yuv2bgr48be_1_c;
02222             *yuv2packed2 = yuv2bgr48be_2_c;
02223             *yuv2packedX = yuv2bgr48be_X_c;
02224             break;
02225         case PIX_FMT_RGB32:
02226         case PIX_FMT_BGR32:
02227 #if CONFIG_SMALL
02228             *yuv2packed1 = yuv2rgb32_1_c;
02229             *yuv2packed2 = yuv2rgb32_2_c;
02230             *yuv2packedX = yuv2rgb32_X_c;
02231 #else
02232 #if CONFIG_SWSCALE_ALPHA
02233                 if (c->alpPixBuf) {
02234                     *yuv2packed1 = yuv2rgba32_1_c;
02235                     *yuv2packed2 = yuv2rgba32_2_c;
02236                     *yuv2packedX = yuv2rgba32_X_c;
02237                 } else
02238 #endif /* CONFIG_SWSCALE_ALPHA */
02239                 {
02240                     *yuv2packed1 = yuv2rgbx32_1_c;
02241                     *yuv2packed2 = yuv2rgbx32_2_c;
02242                     *yuv2packedX = yuv2rgbx32_X_c;
02243                 }
02244 #endif /* !CONFIG_SMALL */
02245             break;
02246         case PIX_FMT_RGB32_1:
02247         case PIX_FMT_BGR32_1:
02248 #if CONFIG_SMALL
02249                 *yuv2packed1 = yuv2rgb32_1_1_c;
02250                 *yuv2packed2 = yuv2rgb32_1_2_c;
02251                 *yuv2packedX = yuv2rgb32_1_X_c;
02252 #else
02253 #if CONFIG_SWSCALE_ALPHA
02254                 if (c->alpPixBuf) {
02255                     *yuv2packed1 = yuv2rgba32_1_1_c;
02256                     *yuv2packed2 = yuv2rgba32_1_2_c;
02257                     *yuv2packedX = yuv2rgba32_1_X_c;
02258                 } else
02259 #endif /* CONFIG_SWSCALE_ALPHA */
02260                 {
02261                     *yuv2packed1 = yuv2rgbx32_1_1_c;
02262                     *yuv2packed2 = yuv2rgbx32_1_2_c;
02263                     *yuv2packedX = yuv2rgbx32_1_X_c;
02264                 }
02265 #endif /* !CONFIG_SMALL */
02266                 break;
02267         case PIX_FMT_RGB24:
02268             *yuv2packed1 = yuv2rgb24_1_c;
02269             *yuv2packed2 = yuv2rgb24_2_c;
02270             *yuv2packedX = yuv2rgb24_X_c;
02271             break;
02272         case PIX_FMT_BGR24:
02273             *yuv2packed1 = yuv2bgr24_1_c;
02274             *yuv2packed2 = yuv2bgr24_2_c;
02275             *yuv2packedX = yuv2bgr24_X_c;
02276             break;
02277         case PIX_FMT_RGB565LE:
02278         case PIX_FMT_RGB565BE:
02279         case PIX_FMT_BGR565LE:
02280         case PIX_FMT_BGR565BE:
02281             *yuv2packed1 = yuv2rgb16_1_c;
02282             *yuv2packed2 = yuv2rgb16_2_c;
02283             *yuv2packedX = yuv2rgb16_X_c;
02284             break;
02285         case PIX_FMT_RGB555LE:
02286         case PIX_FMT_RGB555BE:
02287         case PIX_FMT_BGR555LE:
02288         case PIX_FMT_BGR555BE:
02289             *yuv2packed1 = yuv2rgb15_1_c;
02290             *yuv2packed2 = yuv2rgb15_2_c;
02291             *yuv2packedX = yuv2rgb15_X_c;
02292             break;
02293         case PIX_FMT_RGB444LE:
02294         case PIX_FMT_RGB444BE:
02295         case PIX_FMT_BGR444LE:
02296         case PIX_FMT_BGR444BE:
02297             *yuv2packed1 = yuv2rgb12_1_c;
02298             *yuv2packed2 = yuv2rgb12_2_c;
02299             *yuv2packedX = yuv2rgb12_X_c;
02300             break;
02301         case PIX_FMT_RGB8:
02302         case PIX_FMT_BGR8:
02303             *yuv2packed1 = yuv2rgb8_1_c;
02304             *yuv2packed2 = yuv2rgb8_2_c;
02305             *yuv2packedX = yuv2rgb8_X_c;
02306             break;
02307         case PIX_FMT_RGB4:
02308         case PIX_FMT_BGR4:
02309             *yuv2packed1 = yuv2rgb4_1_c;
02310             *yuv2packed2 = yuv2rgb4_2_c;
02311             *yuv2packedX = yuv2rgb4_X_c;
02312             break;
02313         case PIX_FMT_RGB4_BYTE:
02314         case PIX_FMT_BGR4_BYTE:
02315             *yuv2packed1 = yuv2rgb4b_1_c;
02316             *yuv2packed2 = yuv2rgb4b_2_c;
02317             *yuv2packedX = yuv2rgb4b_X_c;
02318             break;
02319         }
02320     }
02321     switch (dstFormat) {
02322     case PIX_FMT_GRAY16BE:
02323         *yuv2packed1 = yuv2gray16BE_1_c;
02324         *yuv2packed2 = yuv2gray16BE_2_c;
02325         *yuv2packedX = yuv2gray16BE_X_c;
02326         break;
02327     case PIX_FMT_GRAY16LE:
02328         *yuv2packed1 = yuv2gray16LE_1_c;
02329         *yuv2packed2 = yuv2gray16LE_2_c;
02330         *yuv2packedX = yuv2gray16LE_X_c;
02331         break;
02332     case PIX_FMT_MONOWHITE:
02333         *yuv2packed1 = yuv2monowhite_1_c;
02334         *yuv2packed2 = yuv2monowhite_2_c;
02335         *yuv2packedX = yuv2monowhite_X_c;
02336         break;
02337     case PIX_FMT_MONOBLACK:
02338         *yuv2packed1 = yuv2monoblack_1_c;
02339         *yuv2packed2 = yuv2monoblack_2_c;
02340         *yuv2packedX = yuv2monoblack_X_c;
02341         break;
02342     case PIX_FMT_YUYV422:
02343         *yuv2packed1 = yuv2yuyv422_1_c;
02344         *yuv2packed2 = yuv2yuyv422_2_c;
02345         *yuv2packedX = yuv2yuyv422_X_c;
02346         break;
02347     case PIX_FMT_UYVY422:
02348         *yuv2packed1 = yuv2uyvy422_1_c;
02349         *yuv2packed2 = yuv2uyvy422_2_c;
02350         *yuv2packedX = yuv2uyvy422_X_c;
02351         break;
02352     }
02353 }
02354 
02355 #define DEBUG_SWSCALE_BUFFERS 0
02356 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02357 
02358 static int swScale(SwsContext *c, const uint8_t* src[],
02359                    int srcStride[], int srcSliceY,
02360                    int srcSliceH, uint8_t* dst[], int dstStride[])
02361 {
02362     /* load a few things into local vars to make the code more readable? and faster */
02363     const int srcW= c->srcW;
02364     const int dstW= c->dstW;
02365     const int dstH= c->dstH;
02366     const int chrDstW= c->chrDstW;
02367     const int chrSrcW= c->chrSrcW;
02368     const int lumXInc= c->lumXInc;
02369     const int chrXInc= c->chrXInc;
02370     const enum PixelFormat dstFormat= c->dstFormat;
02371     const int flags= c->flags;
02372     int32_t *vLumFilterPos= c->vLumFilterPos;
02373     int32_t *vChrFilterPos= c->vChrFilterPos;
02374     int32_t *hLumFilterPos= c->hLumFilterPos;
02375     int32_t *hChrFilterPos= c->hChrFilterPos;
02376     int16_t *vLumFilter= c->vLumFilter;
02377     int16_t *vChrFilter= c->vChrFilter;
02378     int16_t *hLumFilter= c->hLumFilter;
02379     int16_t *hChrFilter= c->hChrFilter;
02380     int32_t *lumMmxFilter= c->lumMmxFilter;
02381     int32_t *chrMmxFilter= c->chrMmxFilter;
02382     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02383     const int vLumFilterSize= c->vLumFilterSize;
02384     const int vChrFilterSize= c->vChrFilterSize;
02385     const int hLumFilterSize= c->hLumFilterSize;
02386     const int hChrFilterSize= c->hChrFilterSize;
02387     int16_t **lumPixBuf= c->lumPixBuf;
02388     int16_t **chrUPixBuf= c->chrUPixBuf;
02389     int16_t **chrVPixBuf= c->chrVPixBuf;
02390     int16_t **alpPixBuf= c->alpPixBuf;
02391     const int vLumBufSize= c->vLumBufSize;
02392     const int vChrBufSize= c->vChrBufSize;
02393     uint8_t *formatConvBuffer= c->formatConvBuffer;
02394     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02395     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02396     int lastDstY;
02397     uint32_t *pal=c->pal_yuv;
02398     yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
02399     yuv2planarX_fn yuv2planeX = c->yuv2planeX;
02400     yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
02401     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
02402     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
02403     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
02404     int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
02405 
02406     /* vars which will change and which we need to store back in the context */
02407     int dstY= c->dstY;
02408     int lumBufIndex= c->lumBufIndex;
02409     int chrBufIndex= c->chrBufIndex;
02410     int lastInLumBuf= c->lastInLumBuf;
02411     int lastInChrBuf= c->lastInChrBuf;
02412 
02413     if (isPacked(c->srcFormat)) {
02414         src[0]=
02415         src[1]=
02416         src[2]=
02417         src[3]= src[0];
02418         srcStride[0]=
02419         srcStride[1]=
02420         srcStride[2]=
02421         srcStride[3]= srcStride[0];
02422     }
02423     srcStride[1]<<= c->vChrDrop;
02424     srcStride[2]<<= c->vChrDrop;
02425 
02426     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02427                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02428                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02429     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02430                    srcSliceY,    srcSliceH,    dstY,    dstH);
02431     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02432                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
02433 
02434     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
02435         static int warnedAlready=0; //FIXME move this into the context perhaps
02436         if (flags & SWS_PRINT_INFO && !warnedAlready) {
02437             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02438                    "         ->cannot do aligned memory accesses anymore\n");
02439             warnedAlready=1;
02440         }
02441     }
02442 
02443     /* Note the user might start scaling the picture in the middle so this
02444        will not get executed. This is not really intended but works
02445        currently, so people might do it. */
02446     if (srcSliceY ==0) {
02447         lumBufIndex=-1;
02448         chrBufIndex=-1;
02449         dstY=0;
02450         lastInLumBuf= -1;
02451         lastInChrBuf= -1;
02452     }
02453 
02454     if (!should_dither) {
02455         c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
02456     }
02457     lastDstY= dstY;
02458 
02459     for (;dstY < dstH; dstY++) {
02460         const int chrDstY= dstY>>c->chrDstVSubSample;
02461         uint8_t *dest[4] = {
02462             dst[0] + dstStride[0] * dstY,
02463             dst[1] + dstStride[1] * chrDstY,
02464             dst[2] + dstStride[2] * chrDstY,
02465             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
02466         };
02467 
02468         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
02469         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02470         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
02471 
02472         // Last line needed as input
02473         int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
02474         int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
02475         int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
02476         int enough_lines;
02477 
02478         //handle holes (FAST_BILINEAR & weird filters)
02479         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02480         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02481         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02482         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02483 
02484         DEBUG_BUFFERS("dstY: %d\n", dstY);
02485         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02486                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
02487         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02488                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
02489 
02490         // Do we have enough lines in this slice to output the dstY line
02491         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02492 
02493         if (!enough_lines) {
02494             lastLumSrcY = srcSliceY + srcSliceH - 1;
02495             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02496             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02497                                             lastLumSrcY, lastChrSrcY);
02498         }
02499 
02500         //Do horizontal scaling
02501         while(lastInLumBuf < lastLumSrcY) {
02502             const uint8_t *src1[4] = {
02503                 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
02504                 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
02505                 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
02506                 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
02507             };
02508             lumBufIndex++;
02509             assert(lumBufIndex < 2*vLumBufSize);
02510             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02511             assert(lastInLumBuf + 1 - srcSliceY >= 0);
02512             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02513                     hLumFilter, hLumFilterPos, hLumFilterSize,
02514                     formatConvBuffer,
02515                     pal, 0);
02516             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02517                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
02518                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
02519                         formatConvBuffer,
02520                         pal, 1);
02521             lastInLumBuf++;
02522             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02523                                lumBufIndex,    lastInLumBuf);
02524         }
02525         while(lastInChrBuf < lastChrSrcY) {
02526             const uint8_t *src1[4] = {
02527                 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
02528                 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
02529                 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
02530                 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
02531             };
02532             chrBufIndex++;
02533             assert(chrBufIndex < 2*vChrBufSize);
02534             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02535             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02536             //FIXME replace parameters through context struct (some at least)
02537 
02538             if (c->needs_hcscale)
02539                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
02540                           chrDstW, src1, chrSrcW, chrXInc,
02541                           hChrFilter, hChrFilterPos, hChrFilterSize,
02542                           formatConvBuffer, pal);
02543             lastInChrBuf++;
02544             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02545                                chrBufIndex,    lastInChrBuf);
02546         }
02547         //wrap buf index around to stay inside the ring buffer
02548         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02549         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02550         if (!enough_lines)
02551             break; //we can't output a dstY line so let's try with the next slice
02552 
02553 #if HAVE_MMX
02554         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
02555 #endif
02556         if (should_dither) {
02557             c->chrDither8 = dither_8x8_128[chrDstY & 7];
02558             c->lumDither8 = dither_8x8_128[dstY & 7];
02559         }
02560         if (dstY >= dstH-2) {
02561             // hmm looks like we can't use MMX here without overwriting this array's tail
02562             find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
02563                                            &yuv2packed1, &yuv2packed2, &yuv2packedX);
02564         }
02565 
02566         {
02567             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02568             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02569             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02570             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02571 
02572             if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
02573                 const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
02574                 int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
02575                 for (i = 0; i < neg;            i++)
02576                     tmpY[i] = lumSrcPtr[neg];
02577                 for (     ; i < end;            i++)
02578                     tmpY[i] = lumSrcPtr[i];
02579                 for (     ; i < vLumFilterSize; i++)
02580                     tmpY[i] = tmpY[i-1];
02581                 lumSrcPtr = tmpY;
02582 
02583                 if (alpSrcPtr) {
02584                     const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
02585                     for (i = 0; i < neg;            i++)
02586                         tmpA[i] = alpSrcPtr[neg];
02587                     for (     ; i < end;            i++)
02588                         tmpA[i] = alpSrcPtr[i];
02589                     for (     ; i < vLumFilterSize; i++)
02590                         tmpA[i] = tmpA[i - 1];
02591                     alpSrcPtr = tmpA;
02592                 }
02593             }
02594             if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
02595                 const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
02596                               **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
02597                 int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
02598                 for (i = 0; i < neg;            i++) {
02599                     tmpU[i] = chrUSrcPtr[neg];
02600                     tmpV[i] = chrVSrcPtr[neg];
02601                 }
02602                 for (     ; i < end;            i++) {
02603                     tmpU[i] = chrUSrcPtr[i];
02604                     tmpV[i] = chrVSrcPtr[i];
02605                 }
02606                 for (     ; i < vChrFilterSize; i++) {
02607                     tmpU[i] = tmpU[i - 1];
02608                     tmpV[i] = tmpV[i - 1];
02609                 }
02610                 chrUSrcPtr = tmpU;
02611                 chrVSrcPtr = tmpV;
02612             }
02613 
02614             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
02615                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02616 
02617                 if (vLumFilterSize == 1) {
02618                     yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
02619                 } else {
02620                     yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
02621                                lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
02622                 }
02623 
02624                 if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
02625                     if (yuv2nv12cX) {
02626                         yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
02627                     } else if (vChrFilterSize == 1) {
02628                         yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
02629                         yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
02630                     } else {
02631                         yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
02632                                    chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
02633                         yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
02634                                    chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
02635                     }
02636                 }
02637 
02638                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
02639                     if (vLumFilterSize == 1) {
02640                         yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
02641                     } else {
02642                         yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
02643                                    alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
02644                     }
02645                 }
02646             } else {
02647                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
02648                     int chrAlpha = vChrFilter[2 * dstY + 1];
02649                     yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02650                                 alpPixBuf ? *alpSrcPtr : NULL,
02651                                 dest[0], dstW, chrAlpha, dstY);
02652                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
02653                     int lumAlpha = vLumFilter[2 * dstY + 1];
02654                     int chrAlpha = vChrFilter[2 * dstY + 1];
02655                     lumMmxFilter[2] =
02656                     lumMmxFilter[3] = vLumFilter[2 * dstY   ] * 0x10001;
02657                     chrMmxFilter[2] =
02658                     chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
02659                     yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
02660                                 alpPixBuf ? alpSrcPtr : NULL,
02661                                 dest[0], dstW, lumAlpha, chrAlpha, dstY);
02662                 } else { //general RGB
02663                     yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
02664                                 lumSrcPtr, vLumFilterSize,
02665                                 vChrFilter + dstY * vChrFilterSize,
02666                                 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
02667                                 alpSrcPtr, dest[0], dstW, dstY);
02668                 }
02669             }
02670         }
02671     }
02672 
02673     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02674         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02675 
02676 #if HAVE_MMX2
02677     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
02678         __asm__ volatile("sfence":::"memory");
02679 #endif
02680     emms_c();
02681 
02682     /* store changed local vars back in the context */
02683     c->dstY= dstY;
02684     c->lumBufIndex= lumBufIndex;
02685     c->chrBufIndex= chrBufIndex;
02686     c->lastInLumBuf= lastInLumBuf;
02687     c->lastInChrBuf= lastInChrBuf;
02688 
02689     return dstY - lastDstY;
02690 }
02691 
02692 static av_cold void sws_init_swScale_c(SwsContext *c)
02693 {
02694     enum PixelFormat srcFormat = c->srcFormat;
02695 
02696     find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
02697                                    &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
02698                                    &c->yuv2packedX);
02699 
02700     c->chrToYV12 = NULL;
02701     switch(srcFormat) {
02702         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
02703         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
02704         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
02705         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
02706         case PIX_FMT_RGB8     :
02707         case PIX_FMT_BGR8     :
02708         case PIX_FMT_PAL8     :
02709         case PIX_FMT_BGR4_BYTE:
02710         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
02711         case PIX_FMT_GBRP9LE:
02712         case PIX_FMT_GBRP10LE:
02713         case PIX_FMT_GBRP16LE:  c->readChrPlanar = planar_rgb16le_to_uv; break;
02714         case PIX_FMT_GBRP9BE:
02715         case PIX_FMT_GBRP10BE:
02716         case PIX_FMT_GBRP16BE:  c->readChrPlanar = planar_rgb16be_to_uv; break;
02717         case PIX_FMT_GBRP:      c->readChrPlanar = planar_rgb_to_uv; break;
02718 #if HAVE_BIGENDIAN
02719         case PIX_FMT_YUV444P9LE:
02720         case PIX_FMT_YUV422P9LE:
02721         case PIX_FMT_YUV420P9LE:
02722         case PIX_FMT_YUV422P10LE:
02723         case PIX_FMT_YUV444P10LE:
02724         case PIX_FMT_YUV420P10LE:
02725         case PIX_FMT_YUV420P16LE:
02726         case PIX_FMT_YUV422P16LE:
02727         case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
02728 #else
02729         case PIX_FMT_YUV444P9BE:
02730         case PIX_FMT_YUV422P9BE:
02731         case PIX_FMT_YUV420P9BE:
02732         case PIX_FMT_YUV444P10BE:
02733         case PIX_FMT_YUV422P10BE:
02734         case PIX_FMT_YUV420P10BE:
02735         case PIX_FMT_YUV420P16BE:
02736         case PIX_FMT_YUV422P16BE:
02737         case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
02738 #endif
02739     }
02740     if (c->chrSrcHSubSample) {
02741         switch(srcFormat) {
02742         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
02743         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
02744         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
02745         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
02746         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
02747         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
02748         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
02749         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
02750         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
02751         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
02752         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
02753         case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
02754         case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
02755         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
02756         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
02757         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
02758         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
02759         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
02760         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
02761         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
02762         case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
02763         case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
02764         }
02765     } else {
02766         switch(srcFormat) {
02767         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
02768         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
02769         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
02770         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
02771         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
02772         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
02773         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
02774         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
02775         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
02776         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
02777         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
02778         case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
02779         case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
02780         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
02781         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
02782         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
02783         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
02784         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
02785         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
02786         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
02787         case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
02788         case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
02789         }
02790     }
02791 
02792     c->lumToYV12 = NULL;
02793     c->alpToYV12 = NULL;
02794     switch (srcFormat) {
02795     case PIX_FMT_GBRP9LE:
02796     case PIX_FMT_GBRP10LE:
02797     case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
02798     case PIX_FMT_GBRP9BE:
02799     case PIX_FMT_GBRP10BE:
02800     case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
02801     case PIX_FMT_GBRP:     c->readLumPlanar = planar_rgb_to_y; break;
02802 #if HAVE_BIGENDIAN
02803     case PIX_FMT_YUV444P9LE:
02804     case PIX_FMT_YUV422P9LE:
02805     case PIX_FMT_YUV420P9LE:
02806     case PIX_FMT_YUV444P10LE:
02807     case PIX_FMT_YUV422P10LE:
02808     case PIX_FMT_YUV420P10LE:
02809     case PIX_FMT_YUV420P16LE:
02810     case PIX_FMT_YUV422P16LE:
02811     case PIX_FMT_YUV444P16LE:
02812     case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
02813 #else
02814     case PIX_FMT_YUV444P9BE:
02815     case PIX_FMT_YUV422P9BE:
02816     case PIX_FMT_YUV420P9BE:
02817     case PIX_FMT_YUV444P10BE:
02818     case PIX_FMT_YUV422P10BE:
02819     case PIX_FMT_YUV420P10BE:
02820     case PIX_FMT_YUV420P16BE:
02821     case PIX_FMT_YUV422P16BE:
02822     case PIX_FMT_YUV444P16BE:
02823     case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
02824 #endif
02825     case PIX_FMT_YUYV422  :
02826     case PIX_FMT_Y400A    : c->lumToYV12 = yuy2ToY_c; break;
02827     case PIX_FMT_UYVY422  : c->lumToYV12 = uyvyToY_c;    break;
02828     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
02829     case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
02830     case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
02831     case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
02832     case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
02833     case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
02834     case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
02835     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
02836     case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
02837     case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
02838     case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
02839     case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
02840     case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
02841     case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
02842     case PIX_FMT_RGB8     :
02843     case PIX_FMT_BGR8     :
02844     case PIX_FMT_PAL8     :
02845     case PIX_FMT_BGR4_BYTE:
02846     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
02847     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
02848     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
02849     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
02850     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
02851     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
02852     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
02853     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
02854     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
02855     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
02856     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
02857     }
02858     if (c->alpPixBuf) {
02859         switch (srcFormat) {
02860         case PIX_FMT_BGRA:
02861         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
02862         case PIX_FMT_ABGR:
02863         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
02864         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
02865         }
02866     }
02867 
02868     if (c->srcBpc == 8) {
02869         if (c->dstBpc <= 10) {
02870             c->hyScale = c->hcScale = hScale8To15_c;
02871             if (c->flags & SWS_FAST_BILINEAR) {
02872                 c->hyscale_fast = hyscale_fast_c;
02873                 c->hcscale_fast = hcscale_fast_c;
02874             }
02875         } else {
02876             c->hyScale = c->hcScale = hScale8To19_c;
02877         }
02878     } else {
02879         c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
02880     }
02881 
02882     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
02883         if (c->dstBpc <= 10) {
02884             if (c->srcRange) {
02885                 c->lumConvertRange = lumRangeFromJpeg_c;
02886                 c->chrConvertRange = chrRangeFromJpeg_c;
02887             } else {
02888                 c->lumConvertRange = lumRangeToJpeg_c;
02889                 c->chrConvertRange = chrRangeToJpeg_c;
02890             }
02891         } else {
02892             if (c->srcRange) {
02893                 c->lumConvertRange = lumRangeFromJpeg16_c;
02894                 c->chrConvertRange = chrRangeFromJpeg16_c;
02895             } else {
02896                 c->lumConvertRange = lumRangeToJpeg16_c;
02897                 c->chrConvertRange = chrRangeToJpeg16_c;
02898             }
02899         }
02900     }
02901 
02902     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
02903           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
02904         c->needs_hcscale = 1;
02905 }
02906 
02907 SwsFunc ff_getSwsFunc(SwsContext *c)
02908 {
02909     sws_init_swScale_c(c);
02910 
02911     if (HAVE_MMX)
02912         ff_sws_init_swScale_mmx(c);
02913     if (HAVE_ALTIVEC)
02914         ff_sws_init_swScale_altivec(c);
02915 
02916     return swScale;
02917 }