MathFunctions.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
00005 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 //
00007 // Eigen is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU Lesser General Public
00009 // License as published by the Free Software Foundation; either
00010 // version 3 of the License, or (at your option) any later version.
00011 //
00012 // Alternatively, you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License as
00014 // published by the Free Software Foundation; either version 2 of
00015 // the License, or (at your option) any later version.
00016 //
00017 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00018 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00019 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License and a copy of the GNU General Public License along with
00024 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00025 
00026 #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
00027 #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
00028 
00029 namespace Eigen { 
00030 
00031 namespace internal {
00032 
00034 template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
00035 
00036 #ifdef EIGEN_VECTORIZE_SSE
00037 
00038 template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
00039 {
00040   _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
00041   _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
00042   _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
00043 
00044   _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
00045 
00046   _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
00047   _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
00048 
00049   _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
00050   _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
00051   _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
00052   _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
00053   _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
00054 
00055   Packet4f a = pabs(x);//got the absolute value
00056 
00057   Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
00058 
00059   Packet4f z1,z2;//will need them during computation    
00060 
00061 
00062 //will compute the two branches for asin
00063 //so first compare with half
00064 
00065   Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
00066 //both will be taken, and finally results will be merged
00067 //the branch for values >0.5
00068 
00069     {
00070 //the core series expansion 
00071     z1=pmadd(p4f_minus_half,a,p4f_half);
00072     Packet4f x1=psqrt(z1);
00073     Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
00074     Packet4f s2=pmadd(s1, z1, p4f_asin3);
00075     Packet4f s3=pmadd(s2,z1, p4f_asin4);
00076     Packet4f s4=pmadd(s3,z1, p4f_asin5);
00077     Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
00078     z1=pmadd(temp,x1,x1);
00079     z1=padd(z1,z1);
00080     z1=psub(p4f_pi_over_2,z1);
00081     }
00082 
00083     {
00084 //the core series expansion 
00085     Packet4f x2=a;
00086     z2=pmul(x2,x2);
00087     Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
00088     Packet4f s2=pmadd(s1, z2, p4f_asin3);
00089     Packet4f s3=pmadd(s2,z2, p4f_asin4);
00090     Packet4f s4=pmadd(s3,z2, p4f_asin5);
00091     Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
00092     z2=pmadd(temp,x2,x2);
00093     }
00094 
00095 /* select the correct result from the two branch evaluations */
00096   z1  = _mm_and_ps(branch_mask, z1);
00097   z2  = _mm_andnot_ps(branch_mask, z2);
00098   Packet4f z  = _mm_or_ps(z1,z2);
00099 
00100 /* update the sign */
00101   return _mm_xor_ps(z, sign_bit);
00102 }
00103 
00104 #endif // EIGEN_VECTORIZE_SSE
00105 
00106 } // end namespace internal
00107 
00108 } // end namespace Eigen
00109 
00110 #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H