• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/sparc/simple_idct_vis.c

Go to the documentation of this file.
00001 /*
00002  * SPARC VIS optimized inverse DCT
00003  * Copyright (c) 2007 Denes Balatoni < dbalatoni XatX interware XdotX hu >
00004  *
00005  * I did consult the following fine web page about dct
00006  * http://www.geocities.com/ssavekar/dct.htm
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00025 #include "libavcodec/dsputil.h"
00026 #include "dsputil_vis.h"
00027 
00028 static const DECLARE_ALIGNED(8, int16_t, coeffs)[28] = {
00029     - 1259,- 1259,- 1259,- 1259,
00030     - 4989,- 4989,- 4989,- 4989,
00031     -11045,-11045,-11045,-11045,
00032     -19195,-19195,-19195,-19195,
00033     -29126,-29126,-29126,-29126,
00034      25080, 25080, 25080, 25080,
00035      12785, 12785, 12785, 12785
00036 };
00037 static const DECLARE_ALIGNED(8, uint16_t, scale)[4] = {
00038     65536>>6, 65536>>6, 65536>>6, 65536>>6
00039 };
00040 static const DECLARE_ALIGNED(8, uint16_t, rounder)[4] = {
00041     1<<5, 1<<5, 1<<5, 1<<5
00042 };
00043 static const DECLARE_ALIGNED(8, uint16_t, expand)[4] = {
00044     1<<14, 1<<14, 1<<14, 1<<14
00045 };
00046 
00047 #define INIT_IDCT \
00048         "ldd [%1], %%f32         \n\t"\
00049         "ldd [%1+8], %%f34       \n\t"\
00050         "ldd [%1+16], %%f36      \n\t"\
00051         "ldd [%1+24], %%f38      \n\t"\
00052         "ldd [%1+32], %%f40      \n\t"\
00053         "ldd [%1+40], %%f42      \n\t"\
00054         "ldd [%1+48], %%f44      \n\t"\
00055         "ldd [%0], %%f46         \n\t"\
00056         "fzero %%f62             \n\t"\
00057 
00058 #define LOADSCALE(in) \
00059         "ldd [" in "], %%f0          \n\t"\
00060         "ldd [" in "+16], %%f2       \n\t"\
00061         "ldd [" in "+32], %%f4       \n\t"\
00062         "ldd [" in "+48], %%f6       \n\t"\
00063         "ldd [" in "+64], %%f8       \n\t"\
00064         "ldd [" in "+80], %%f10      \n\t"\
00065         "ldd [" in "+96], %%f12      \n\t"\
00066         "ldd [" in "+112], %%f14     \n\t"\
00067         "fpadd16 %%f0, %%f0, %%f0    \n\t"\
00068         "fpadd16 %%f2, %%f2, %%f2    \n\t"\
00069         "fpadd16 %%f4, %%f4, %%f4    \n\t"\
00070         "fpadd16 %%f6, %%f6, %%f6    \n\t"\
00071         "fpadd16 %%f8, %%f8, %%f8    \n\t"\
00072         "fpadd16 %%f10, %%f10, %%f10 \n\t"\
00073         "fpadd16 %%f12, %%f12, %%f12 \n\t"\
00074         "fpadd16 %%f14, %%f14, %%f14 \n\t"\
00075 \
00076         "fpadd16 %%f0, %%f0, %%f0    \n\t"\
00077         "fpadd16 %%f2, %%f2, %%f2    \n\t"\
00078         "fpadd16 %%f4, %%f4, %%f4    \n\t"\
00079         "fpadd16 %%f6, %%f6, %%f6    \n\t"\
00080         "fpadd16 %%f8, %%f8, %%f8    \n\t"\
00081         "fpadd16 %%f10, %%f10, %%f10 \n\t"\
00082         "fpadd16 %%f12, %%f12, %%f12 \n\t"\
00083         "fpadd16 %%f14, %%f14, %%f14 \n\t"\
00084 \
00085         "fpadd16 %%f0, %%f0, %%f0    \n\t"\
00086         "fpadd16 %%f2, %%f2, %%f2    \n\t"\
00087         "fpadd16 %%f4, %%f4, %%f4    \n\t"\
00088         "fpadd16 %%f6, %%f6, %%f6    \n\t"\
00089         "fpadd16 %%f8, %%f8, %%f8    \n\t"\
00090         "fpadd16 %%f10, %%f10, %%f10 \n\t"\
00091         "fpadd16 %%f12, %%f12, %%f12 \n\t"\
00092         "fpadd16 %%f14, %%f14, %%f14 \n\t"\
00093 \
00094         "fpadd16 %%f0, %%f0, %%f0    \n\t"\
00095         "fpadd16 %%f2, %%f2, %%f2    \n\t"\
00096         "fpadd16 %%f4, %%f4, %%f4    \n\t"\
00097         "fpadd16 %%f6, %%f6, %%f6    \n\t"\
00098         "fpadd16 %%f8, %%f8, %%f8    \n\t"\
00099         "fpadd16 %%f10, %%f10, %%f10 \n\t"\
00100         "fpadd16 %%f12, %%f12, %%f12 \n\t"\
00101         "fpadd16 %%f14, %%f14, %%f14 \n\t"\
00102 
00103 #define LOAD(in) \
00104         "ldd [" in "], %%f16         \n\t"\
00105         "ldd [" in "+8], %%f18       \n\t"\
00106         "ldd [" in "+16], %%f20      \n\t"\
00107         "ldd [" in "+24], %%f22      \n\t"\
00108         "ldd [" in "+32], %%f24      \n\t"\
00109         "ldd [" in "+40], %%f26      \n\t"\
00110         "ldd [" in "+48], %%f28      \n\t"\
00111         "ldd [" in "+56], %%f30      \n\t"\
00112 
00113 #define TRANSPOSE \
00114         "fpmerge %%f16, %%f24, %%f0  \n\t"\
00115         "fpmerge %%f20, %%f28, %%f2  \n\t"\
00116         "fpmerge %%f17, %%f25, %%f4  \n\t"\
00117         "fpmerge %%f21, %%f29, %%f6  \n\t"\
00118         "fpmerge %%f18, %%f26, %%f8  \n\t"\
00119         "fpmerge %%f22, %%f30, %%f10 \n\t"\
00120         "fpmerge %%f19, %%f27, %%f12 \n\t"\
00121         "fpmerge %%f23, %%f31, %%f14 \n\t"\
00122 \
00123         "fpmerge %%f0, %%f2, %%f16   \n\t"\
00124         "fpmerge %%f1, %%f3, %%f18   \n\t"\
00125         "fpmerge %%f4, %%f6, %%f20   \n\t"\
00126         "fpmerge %%f5, %%f7, %%f22   \n\t"\
00127         "fpmerge %%f8, %%f10, %%f24  \n\t"\
00128         "fpmerge %%f9, %%f11, %%f26  \n\t"\
00129         "fpmerge %%f12, %%f14, %%f28 \n\t"\
00130         "fpmerge %%f13, %%f15, %%f30 \n\t"\
00131 \
00132         "fpmerge %%f16, %%f17, %%f0  \n\t"\
00133         "fpmerge %%f18, %%f19, %%f2  \n\t"\
00134         "fpmerge %%f20, %%f21, %%f4  \n\t"\
00135         "fpmerge %%f22, %%f23, %%f6  \n\t"\
00136         "fpmerge %%f24, %%f25, %%f8  \n\t"\
00137         "fpmerge %%f26, %%f27, %%f10 \n\t"\
00138         "fpmerge %%f28, %%f29, %%f12 \n\t"\
00139         "fpmerge %%f30, %%f31, %%f14 \n\t"\
00140 
00141 #define IDCT4ROWS \
00142     /* 1. column */\
00143         "fmul8ulx16 %%f0, %%f38, %%f28 \n\t"\
00144         "for %%f4, %%f6, %%f60         \n\t"\
00145         "fmul8ulx16 %%f2, %%f32, %%f18 \n\t"\
00146         "fmul8ulx16 %%f2, %%f36, %%f22 \n\t"\
00147         "fmul8ulx16 %%f2, %%f40, %%f26 \n\t"\
00148         "fmul8ulx16 %%f2, %%f44, %%f30 \n\t"\
00149 \
00150         ADDROUNDER\
00151 \
00152         "fmul8sux16 %%f0, %%f38, %%f48 \n\t"\
00153         "fcmpd %%fcc0, %%f62, %%f60    \n\t"\
00154         "for %%f8, %%f10, %%f60        \n\t"\
00155         "fmul8sux16 %%f2, %%f32, %%f50 \n\t"\
00156         "fmul8sux16 %%f2, %%f36, %%f52 \n\t"\
00157         "fmul8sux16 %%f2, %%f40, %%f54 \n\t"\
00158         "fmul8sux16 %%f2, %%f44, %%f56 \n\t"\
00159 \
00160         "fpadd16 %%f48, %%f28, %%f28 \n\t"\
00161         "fcmpd %%fcc1, %%f62, %%f60  \n\t"\
00162         "for %%f12, %%f14, %%f60     \n\t"\
00163         "fpadd16 %%f50, %%f18, %%f18 \n\t"\
00164         "fpadd16 %%f52, %%f22, %%f22 \n\t"\
00165         "fpadd16 %%f54, %%f26, %%f26 \n\t"\
00166         "fpadd16 %%f56, %%f30, %%f30 \n\t"\
00167 \
00168         "fpadd16 %%f28, %%f0, %%f16  \n\t"\
00169         "fcmpd %%fcc2, %%f62, %%f60  \n\t"\
00170         "fpadd16 %%f28, %%f0, %%f20  \n\t"\
00171         "fpadd16 %%f28, %%f0, %%f24  \n\t"\
00172         "fpadd16 %%f28, %%f0, %%f28  \n\t"\
00173         "fpadd16 %%f18, %%f2, %%f18  \n\t"\
00174         "fpadd16 %%f22, %%f2, %%f22  \n\t"\
00175     /* 2. column */\
00176         "fbe %%fcc0, 3f                \n\t"\
00177         "fpadd16 %%f26, %%f2, %%f26    \n\t"\
00178         "fmul8ulx16 %%f4, %%f34, %%f48 \n\t"\
00179         "fmul8ulx16 %%f4, %%f42, %%f50 \n\t"\
00180         "fmul8ulx16 %%f6, %%f36, %%f52 \n\t"\
00181         "fmul8ulx16 %%f6, %%f44, %%f54 \n\t"\
00182         "fmul8ulx16 %%f6, %%f32, %%f56 \n\t"\
00183         "fmul8ulx16 %%f6, %%f40, %%f58 \n\t"\
00184 \
00185         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00186         "fpadd16 %%f20, %%f50, %%f20 \n\t"\
00187         "fpsub16 %%f24, %%f50, %%f24 \n\t"\
00188         "fpsub16 %%f28, %%f48, %%f28 \n\t"\
00189         "fpadd16 %%f18, %%f52, %%f18 \n\t"\
00190         "fpsub16 %%f22, %%f54, %%f22 \n\t"\
00191         "fpsub16 %%f26, %%f56, %%f26 \n\t"\
00192         "fpsub16 %%f30, %%f58, %%f30 \n\t"\
00193 \
00194         "fmul8sux16 %%f4, %%f34, %%f48 \n\t"\
00195         "fmul8sux16 %%f4, %%f42, %%f50 \n\t"\
00196         "fmul8sux16 %%f6, %%f36, %%f52 \n\t"\
00197         "fmul8sux16 %%f6, %%f44, %%f54 \n\t"\
00198         "fmul8sux16 %%f6, %%f32, %%f56 \n\t"\
00199         "fmul8sux16 %%f6, %%f40, %%f58 \n\t"\
00200 \
00201         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00202         "fpadd16 %%f20, %%f50, %%f20 \n\t"\
00203         "fpsub16 %%f24, %%f50, %%f24 \n\t"\
00204         "fpsub16 %%f28, %%f48, %%f28 \n\t"\
00205         "fpadd16 %%f18, %%f52, %%f18 \n\t"\
00206         "fpsub16 %%f22, %%f54, %%f22 \n\t"\
00207         "fpsub16 %%f26, %%f56, %%f26 \n\t"\
00208         "fpsub16 %%f30, %%f58, %%f30 \n\t"\
00209 \
00210         "fpadd16 %%f16, %%f4, %%f16  \n\t"\
00211         "fpsub16 %%f28, %%f4, %%f28  \n\t"\
00212         "fpadd16 %%f18, %%f6, %%f18  \n\t"\
00213         "fpsub16 %%f26, %%f6, %%f26  \n\t"\
00214     /* 3. column */\
00215         "3:                             \n\t"\
00216         "fbe %%fcc1, 4f                 \n\t"\
00217         "fpsub16 %%f30, %%f6, %%f30     \n\t"\
00218         "fmul8ulx16 %%f8, %%f38, %%f48  \n\t"\
00219         "fmul8ulx16 %%f10, %%f40, %%f50 \n\t"\
00220         "fmul8ulx16 %%f10, %%f32, %%f52 \n\t"\
00221         "fmul8ulx16 %%f10, %%f44, %%f54 \n\t"\
00222         "fmul8ulx16 %%f10, %%f36, %%f56 \n\t"\
00223 \
00224         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00225         "fpsub16 %%f20, %%f48, %%f20 \n\t"\
00226         "fpsub16 %%f24, %%f48, %%f24 \n\t"\
00227         "fpadd16 %%f28, %%f48, %%f28 \n\t"\
00228         "fpadd16 %%f18, %%f50, %%f18 \n\t"\
00229         "fpsub16 %%f22, %%f52, %%f22 \n\t"\
00230         "fpadd16 %%f26, %%f54, %%f26 \n\t"\
00231         "fpadd16 %%f30, %%f56, %%f30 \n\t"\
00232 \
00233         "fmul8sux16 %%f8, %%f38, %%f48 \n\t"\
00234         "fmul8sux16 %%f10, %%f40, %%f50 \n\t"\
00235         "fmul8sux16 %%f10, %%f32, %%f52 \n\t"\
00236         "fmul8sux16 %%f10, %%f44, %%f54 \n\t"\
00237         "fmul8sux16 %%f10, %%f36, %%f56 \n\t"\
00238 \
00239         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00240         "fpsub16 %%f20, %%f48, %%f20 \n\t"\
00241         "fpsub16 %%f24, %%f48, %%f24 \n\t"\
00242         "fpadd16 %%f28, %%f48, %%f28 \n\t"\
00243         "fpadd16 %%f18, %%f50, %%f18 \n\t"\
00244         "fpsub16 %%f22, %%f52, %%f22 \n\t"\
00245         "fpadd16 %%f26, %%f54, %%f26 \n\t"\
00246         "fpadd16 %%f30, %%f56, %%f30 \n\t"\
00247 \
00248         "fpadd16 %%f16, %%f8, %%f16  \n\t"\
00249         "fpsub16 %%f20, %%f8, %%f20  \n\t"\
00250         "fpsub16 %%f24, %%f8, %%f24  \n\t"\
00251         "fpadd16 %%f28, %%f8, %%f28  \n\t"\
00252         "fpadd16 %%f18, %%f10, %%f18 \n\t"\
00253         "fpsub16 %%f22, %%f10, %%f22 \n\t"\
00254     /* 4. column */\
00255         "4:                             \n\t"\
00256         "fbe %%fcc2, 5f                 \n\t"\
00257         "fpadd16 %%f30, %%f10, %%f30    \n\t"\
00258         "fmul8ulx16 %%f12, %%f42, %%f48 \n\t"\
00259         "fmul8ulx16 %%f12, %%f34, %%f50 \n\t"\
00260         "fmul8ulx16 %%f14, %%f44, %%f52 \n\t"\
00261         "fmul8ulx16 %%f14, %%f40, %%f54 \n\t"\
00262         "fmul8ulx16 %%f14, %%f36, %%f56 \n\t"\
00263         "fmul8ulx16 %%f14, %%f32, %%f58 \n\t"\
00264 \
00265         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00266         "fpsub16 %%f20, %%f50, %%f20 \n\t"\
00267         "fpadd16 %%f24, %%f50, %%f24 \n\t"\
00268         "fpsub16 %%f28, %%f48, %%f28 \n\t"\
00269         "fpadd16 %%f18, %%f52, %%f18 \n\t"\
00270         "fpsub16 %%f22, %%f54, %%f22 \n\t"\
00271         "fpadd16 %%f26, %%f56, %%f26 \n\t"\
00272         "fpsub16 %%f30, %%f58, %%f30 \n\t"\
00273 \
00274         "fmul8sux16 %%f12, %%f42, %%f48 \n\t"\
00275         "fmul8sux16 %%f12, %%f34, %%f50 \n\t"\
00276         "fmul8sux16 %%f14, %%f44, %%f52 \n\t"\
00277         "fmul8sux16 %%f14, %%f40, %%f54 \n\t"\
00278         "fmul8sux16 %%f14, %%f36, %%f56 \n\t"\
00279         "fmul8sux16 %%f14, %%f32, %%f58 \n\t"\
00280 \
00281         "fpadd16 %%f16, %%f48, %%f16 \n\t"\
00282         "fpsub16 %%f20, %%f50, %%f20 \n\t"\
00283         "fpadd16 %%f24, %%f50, %%f24 \n\t"\
00284         "fpsub16 %%f28, %%f48, %%f28 \n\t"\
00285         "fpadd16 %%f18, %%f52, %%f18 \n\t"\
00286         "fpsub16 %%f22, %%f54, %%f22 \n\t"\
00287         "fpadd16 %%f26, %%f56, %%f26 \n\t"\
00288         "fpsub16 %%f30, %%f58, %%f30 \n\t"\
00289 \
00290         "fpsub16 %%f20, %%f12, %%f20 \n\t"\
00291         "fpadd16 %%f24, %%f12, %%f24 \n\t"\
00292         "fpsub16 %%f22, %%f14, %%f22 \n\t"\
00293         "fpadd16 %%f26, %%f14, %%f26 \n\t"\
00294         "fpsub16 %%f30, %%f14, %%f30 \n\t"\
00295     /* final butterfly */\
00296         "5:                          \n\t"\
00297         "fpsub16 %%f16, %%f18, %%f48 \n\t"\
00298         "fpsub16 %%f20, %%f22, %%f50 \n\t"\
00299         "fpsub16 %%f24, %%f26, %%f52 \n\t"\
00300         "fpsub16 %%f28, %%f30, %%f54 \n\t"\
00301         "fpadd16 %%f16, %%f18, %%f16 \n\t"\
00302         "fpadd16 %%f20, %%f22, %%f20 \n\t"\
00303         "fpadd16 %%f24, %%f26, %%f24 \n\t"\
00304         "fpadd16 %%f28, %%f30, %%f28 \n\t"\
00305 
00306 #define STOREROWS(out) \
00307         "std %%f48, [" out "+112]          \n\t"\
00308         "std %%f50, [" out "+96]           \n\t"\
00309         "std %%f52, [" out "+80]           \n\t"\
00310         "std %%f54, [" out "+64]           \n\t"\
00311         "std %%f16, [" out "]              \n\t"\
00312         "std %%f20, [" out "+16]           \n\t"\
00313         "std %%f24, [" out "+32]           \n\t"\
00314         "std %%f28, [" out "+48]           \n\t"\
00315 
00316 #define SCALEROWS \
00317         "fmul8sux16 %%f46, %%f48, %%f48 \n\t"\
00318         "fmul8sux16 %%f46, %%f50, %%f50 \n\t"\
00319         "fmul8sux16 %%f46, %%f52, %%f52 \n\t"\
00320         "fmul8sux16 %%f46, %%f54, %%f54 \n\t"\
00321         "fmul8sux16 %%f46, %%f16, %%f16 \n\t"\
00322         "fmul8sux16 %%f46, %%f20, %%f20 \n\t"\
00323         "fmul8sux16 %%f46, %%f24, %%f24 \n\t"\
00324         "fmul8sux16 %%f46, %%f28, %%f28 \n\t"\
00325 
00326 #define PUTPIXELSCLAMPED(dest) \
00327         "fpack16 %%f48, %%f14 \n\t"\
00328         "fpack16 %%f50, %%f12 \n\t"\
00329         "fpack16 %%f16, %%f0  \n\t"\
00330         "fpack16 %%f20, %%f2  \n\t"\
00331         "fpack16 %%f24, %%f4  \n\t"\
00332         "fpack16 %%f28, %%f6  \n\t"\
00333         "fpack16 %%f54, %%f8  \n\t"\
00334         "fpack16 %%f52, %%f10 \n\t"\
00335         "st %%f0, [%3+" dest "]   \n\t"\
00336         "st %%f2, [%5+" dest "]   \n\t"\
00337         "st %%f4, [%6+" dest "]   \n\t"\
00338         "st %%f6, [%7+" dest "]   \n\t"\
00339         "st %%f8, [%8+" dest "]   \n\t"\
00340         "st %%f10, [%9+" dest "]  \n\t"\
00341         "st %%f12, [%10+" dest "] \n\t"\
00342         "st %%f14, [%11+" dest "] \n\t"\
00343 
00344 #define ADDPIXELSCLAMPED(dest) \
00345         "ldd [%5], %%f18         \n\t"\
00346         "ld [%3+" dest"], %%f0   \n\t"\
00347         "ld [%6+" dest"], %%f2   \n\t"\
00348         "ld [%7+" dest"], %%f4   \n\t"\
00349         "ld [%8+" dest"], %%f6   \n\t"\
00350         "ld [%9+" dest"], %%f8   \n\t"\
00351         "ld [%10+" dest"], %%f10 \n\t"\
00352         "ld [%11+" dest"], %%f12 \n\t"\
00353         "ld [%12+" dest"], %%f14 \n\t"\
00354         "fmul8x16 %%f0, %%f18, %%f0   \n\t"\
00355         "fmul8x16 %%f2, %%f18, %%f2   \n\t"\
00356         "fmul8x16 %%f4, %%f18, %%f4   \n\t"\
00357         "fmul8x16 %%f6, %%f18, %%f6   \n\t"\
00358         "fmul8x16 %%f8, %%f18, %%f8   \n\t"\
00359         "fmul8x16 %%f10, %%f18, %%f10 \n\t"\
00360         "fmul8x16 %%f12, %%f18, %%f12 \n\t"\
00361         "fmul8x16 %%f14, %%f18, %%f14 \n\t"\
00362         "fpadd16 %%f0, %%f16, %%f0    \n\t"\
00363         "fpadd16 %%f2, %%f20, %%f2    \n\t"\
00364         "fpadd16 %%f4, %%f24, %%f4    \n\t"\
00365         "fpadd16 %%f6, %%f28, %%f6    \n\t"\
00366         "fpadd16 %%f8, %%f54, %%f8    \n\t"\
00367         "fpadd16 %%f10, %%f52, %%f10  \n\t"\
00368         "fpadd16 %%f12, %%f50, %%f12  \n\t"\
00369         "fpadd16 %%f14, %%f48, %%f14  \n\t"\
00370         "fpack16 %%f0, %%f0   \n\t"\
00371         "fpack16 %%f2, %%f2   \n\t"\
00372         "fpack16 %%f4, %%f4   \n\t"\
00373         "fpack16 %%f6, %%f6   \n\t"\
00374         "fpack16 %%f8, %%f8   \n\t"\
00375         "fpack16 %%f10, %%f10 \n\t"\
00376         "fpack16 %%f12, %%f12 \n\t"\
00377         "fpack16 %%f14, %%f14 \n\t"\
00378         "st %%f0, [%3+" dest "]   \n\t"\
00379         "st %%f2, [%6+" dest "]   \n\t"\
00380         "st %%f4, [%7+" dest "]   \n\t"\
00381         "st %%f6, [%8+" dest "]   \n\t"\
00382         "st %%f8, [%9+" dest "]   \n\t"\
00383         "st %%f10, [%10+" dest "] \n\t"\
00384         "st %%f12, [%11+" dest "] \n\t"\
00385         "st %%f14, [%12+" dest "] \n\t"\
00386 
00387 
00388 void ff_simple_idct_vis(DCTELEM *data) {
00389     int out1, out2, out3, out4;
00390     DECLARE_ALIGNED(8, int16_t, temp)[8*8];
00391 
00392     __asm__ volatile(
00393         INIT_IDCT
00394 
00395 #define ADDROUNDER
00396 
00397         // shift right 16-4=12
00398         LOADSCALE("%2+8")
00399         IDCT4ROWS
00400         STOREROWS("%3+8")
00401         LOADSCALE("%2+0")
00402         IDCT4ROWS
00403         "std %%f48, [%3+112] \n\t"
00404         "std %%f50, [%3+96]  \n\t"
00405         "std %%f52, [%3+80]  \n\t"
00406         "std %%f54, [%3+64]  \n\t"
00407 
00408         // shift right 16+4
00409         "ldd [%3+8], %%f18  \n\t"
00410         "ldd [%3+24], %%f22 \n\t"
00411         "ldd [%3+40], %%f26 \n\t"
00412         "ldd [%3+56], %%f30 \n\t"
00413         TRANSPOSE
00414         IDCT4ROWS
00415         SCALEROWS
00416         STOREROWS("%2+0")
00417         LOAD("%3+64")
00418         TRANSPOSE
00419         IDCT4ROWS
00420         SCALEROWS
00421         STOREROWS("%2+8")
00422 
00423         : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4)
00424         : "0" (scale), "1" (coeffs), "2" (data), "3" (temp)
00425     );
00426 }
00427 
00428 void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) {
00429     int out1, out2, out3, out4, out5;
00430     int r1, r2, r3, r4, r5, r6, r7;
00431 
00432     __asm__ volatile(
00433         "wr %%g0, 0x8, %%gsr \n\t"
00434 
00435         INIT_IDCT
00436 
00437         "add %3, %4, %5   \n\t"
00438         "add %5, %4, %6   \n\t"
00439         "add %6, %4, %7   \n\t"
00440         "add %7, %4, %8   \n\t"
00441         "add %8, %4, %9   \n\t"
00442         "add %9, %4, %10  \n\t"
00443         "add %10, %4, %11 \n\t"
00444 
00445         // shift right 16-4=12
00446         LOADSCALE("%2+8")
00447         IDCT4ROWS
00448         STOREROWS("%2+8")
00449         LOADSCALE("%2+0")
00450         IDCT4ROWS
00451         "std %%f48, [%2+112] \n\t"
00452         "std %%f50, [%2+96]  \n\t"
00453         "std %%f52, [%2+80]  \n\t"
00454         "std %%f54, [%2+64]  \n\t"
00455 
00456 #undef ADDROUNDER
00457 #define ADDROUNDER "fpadd16 %%f28, %%f46, %%f28 \n\t"
00458 
00459         // shift right 16+4
00460         "ldd [%2+8], %%f18  \n\t"
00461         "ldd [%2+24], %%f22 \n\t"
00462         "ldd [%2+40], %%f26 \n\t"
00463         "ldd [%2+56], %%f30 \n\t"
00464         TRANSPOSE
00465         IDCT4ROWS
00466         PUTPIXELSCLAMPED("0")
00467         LOAD("%2+64")
00468         TRANSPOSE
00469         IDCT4ROWS
00470         PUTPIXELSCLAMPED("4")
00471 
00472         : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5),
00473           "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r" (r7)
00474         : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size)
00475     );
00476 }
00477 
00478 void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) {
00479     int out1, out2, out3, out4, out5, out6;
00480     int r1, r2, r3, r4, r5, r6, r7;
00481 
00482     __asm__ volatile(
00483         "wr %%g0, 0x8, %%gsr \n\t"
00484 
00485         INIT_IDCT
00486 
00487         "add %3, %4, %6   \n\t"
00488         "add %6, %4, %7   \n\t"
00489         "add %7, %4, %8   \n\t"
00490         "add %8, %4, %9   \n\t"
00491         "add %9, %4, %10  \n\t"
00492         "add %10, %4, %11 \n\t"
00493         "add %11, %4, %12 \n\t"
00494 
00495 #undef ADDROUNDER
00496 #define ADDROUNDER
00497 
00498         // shift right 16-4=12
00499         LOADSCALE("%2+8")
00500         IDCT4ROWS
00501         STOREROWS("%2+8")
00502         LOADSCALE("%2+0")
00503         IDCT4ROWS
00504         "std %%f48, [%2+112] \n\t"
00505         "std %%f50, [%2+96]  \n\t"
00506         "std %%f52, [%2+80]  \n\t"
00507         "std %%f54, [%2+64]  \n\t"
00508 
00509 #undef ADDROUNDER
00510 #define ADDROUNDER "fpadd16 %%f28, %%f46, %%f28 \n\t"
00511 
00512         // shift right 16+4
00513         "ldd [%2+8], %%f18  \n\t"
00514         "ldd [%2+24], %%f22 \n\t"
00515         "ldd [%2+40], %%f26 \n\t"
00516         "ldd [%2+56], %%f30 \n\t"
00517         TRANSPOSE
00518         IDCT4ROWS
00519         ADDPIXELSCLAMPED("0")
00520         LOAD("%2+64")
00521         TRANSPOSE
00522         IDCT4ROWS
00523         ADDPIXELSCLAMPED("4")
00524 
00525         : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6),
00526           "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r" (r7)
00527         : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size), "5" (expand)
00528     );
00529 }

Generated on Fri Sep 16 2011 17:17:43 for FFmpeg by  doxygen 1.7.1