Complex.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // Eigen is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 3 of the License, or (at your option) any later version.
00010 //
00011 // Alternatively, you can redistribute it and/or
00012 // modify it under the terms of the GNU General Public License as
00013 // published by the Free Software Foundation; either version 2 of
00014 // the License, or (at your option) any later version.
00015 //
00016 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00017 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00019 // GNU General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License and a copy of the GNU General Public License along with
00023 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00024 
00025 #ifndef EIGEN_COMPLEX_SSE_H
00026 #define EIGEN_COMPLEX_SSE_H
00027 
00028 namespace Eigen {
00029 
00030 namespace internal {
00031 
00032 //---------- float ----------
00033 struct Packet2cf
00034 {
00035   EIGEN_STRONG_INLINE Packet2cf() {}
00036   EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
00037   __m128  v;
00038 };
00039 
00040 template<> struct packet_traits<std::complex<float> >  : default_packet_traits
00041 {
00042   typedef Packet2cf type;
00043   enum {
00044     Vectorizable = 1,
00045     AlignedOnScalar = 1,
00046     size = 2,
00047 
00048     HasAdd    = 1,
00049     HasSub    = 1,
00050     HasMul    = 1,
00051     HasDiv    = 1,
00052     HasNegate = 1,
00053     HasAbs    = 0,
00054     HasAbs2   = 0,
00055     HasMin    = 0,
00056     HasMax    = 0,
00057     HasSetLinear = 0
00058   };
00059 };
00060 
00061 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
00062 
00063 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
00064 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
00065 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
00066 {
00067   const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
00068   return Packet2cf(_mm_xor_ps(a.v,mask));
00069 }
00070 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
00071 {
00072   const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
00073   return Packet2cf(_mm_xor_ps(a.v,mask));
00074 }
00075 
00076 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00077 {
00078   // TODO optimize it for SSE3 and 4
00079   #ifdef EIGEN_VECTORIZE_SSE3
00080   return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
00081                                  _mm_mul_ps(_mm_movehdup_ps(a.v),
00082                                             vec4f_swizzle1(b.v, 1, 0, 3, 2))));
00083 //   return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
00084 //                                  _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
00085 //                                             vec4f_swizzle1(b.v, 1, 0, 3, 2))));
00086   #else
00087   const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
00088   return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
00089                               _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
00090                                                     vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
00091   #endif
00092 }
00093 
00094 template<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
00095 template<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
00096 template<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
00097 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
00098 
00099 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&real_ref(*from))); }
00100 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&real_ref(*from))); }
00101 
00102 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
00103 {
00104   Packet2cf res;
00105   #if EIGEN_GNUC_AT_MOST(4,2)
00106   // workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
00107   res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
00108   #else
00109   res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
00110   #endif
00111   return Packet2cf(_mm_movelh_ps(res.v,res.v));
00112 }
00113 
00114 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
00115 
00116 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&real_ref(*to), from.v); }
00117 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&real_ref(*to), from.v); }
00118 
00119 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
00120 
00121 template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
00122 {
00123   #if EIGEN_GNUC_AT_MOST(4,3)
00124   // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
00125   // This workaround also fix invalid code generation with gcc 4.3
00126   EIGEN_ALIGN16 std::complex<float> res[2];
00127   _mm_store_ps((float*)res, a.v);
00128   return res[0];
00129   #else
00130   std::complex<float> res;
00131   _mm_storel_pi((__m64*)&res, a.v);
00132   return res;
00133   #endif
00134 }
00135 
00136 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(_mm_castps_pd(a.v)))); }
00137 
00138 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
00139 {
00140   return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
00141 }
00142 
00143 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
00144 {
00145   return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
00146 }
00147 
00148 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
00149 {
00150   return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
00151 }
00152 
00153 template<int Offset>
00154 struct palign_impl<Offset,Packet2cf>
00155 {
00156   static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
00157   {
00158     if (Offset==1)
00159     {
00160       first.v = _mm_movehl_ps(first.v, first.v);
00161       first.v = _mm_movelh_ps(first.v, second.v);
00162     }
00163   }
00164 };
00165 
00166 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
00167 {
00168   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00169   { return padd(pmul(x,y),c); }
00170 
00171   EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00172   {
00173     #ifdef EIGEN_VECTORIZE_SSE3
00174     return internal::pmul(a, pconj(b));
00175     #else
00176     const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
00177     return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
00178                                 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
00179                                            vec4f_swizzle1(b.v, 1, 0, 3, 2))));
00180     #endif
00181   }
00182 };
00183 
00184 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
00185 {
00186   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00187   { return padd(pmul(x,y),c); }
00188 
00189   EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00190   {
00191     #ifdef EIGEN_VECTORIZE_SSE3
00192     return internal::pmul(pconj(a), b);
00193     #else
00194     const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
00195     return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
00196                                 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
00197                                                       vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
00198     #endif
00199   }
00200 };
00201 
00202 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
00203 {
00204   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00205   { return padd(pmul(x,y),c); }
00206 
00207   EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00208   {
00209     #ifdef EIGEN_VECTORIZE_SSE3
00210     return pconj(internal::pmul(a, b));
00211     #else
00212     const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
00213     return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
00214                                 _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
00215                                            vec4f_swizzle1(b.v, 1, 0, 3, 2))));
00216     #endif
00217   }
00218 };
00219 
00220 template<> struct conj_helper<Packet4f, Packet2cf, false,false>
00221 {
00222   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
00223   { return padd(c, pmul(x,y)); }
00224 
00225   EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
00226   { return Packet2cf(Eigen::internal::pmul(x, y.v)); }
00227 };
00228 
00229 template<> struct conj_helper<Packet2cf, Packet4f, false,false>
00230 {
00231   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
00232   { return padd(c, pmul(x,y)); }
00233 
00234   EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
00235   { return Packet2cf(Eigen::internal::pmul(x.v, y)); }
00236 };
00237 
00238 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00239 {
00240   // TODO optimize it for SSE3 and 4
00241   Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
00242   __m128 s = _mm_mul_ps(b.v,b.v);
00243   return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
00244 }
00245 
00246 EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
00247 {
00248   return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
00249 }
00250 
00251 
00252 //---------- double ----------
00253 struct Packet1cd
00254 {
00255   EIGEN_STRONG_INLINE Packet1cd() {}
00256   EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
00257   __m128d  v;
00258 };
00259 
00260 template<> struct packet_traits<std::complex<double> >  : default_packet_traits
00261 {
00262   typedef Packet1cd type;
00263   enum {
00264     Vectorizable = 1,
00265     AlignedOnScalar = 0,
00266     size = 1,
00267 
00268     HasAdd    = 1,
00269     HasSub    = 1,
00270     HasMul    = 1,
00271     HasDiv    = 1,
00272     HasNegate = 1,
00273     HasAbs    = 0,
00274     HasAbs2   = 0,
00275     HasMin    = 0,
00276     HasMax    = 0,
00277     HasSetLinear = 0
00278   };
00279 };
00280 
00281 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1}; };
00282 
00283 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
00284 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
00285 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(a.v)); }
00286 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
00287 {
00288   const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
00289   return Packet1cd(_mm_xor_pd(a.v,mask));
00290 }
00291 
00292 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
00293 {
00294   // TODO optimize it for SSE3 and 4
00295   #ifdef EIGEN_VECTORIZE_SSE3
00296   return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
00297                                  _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
00298                                             vec2d_swizzle1(b.v, 1, 0))));
00299   #else
00300   const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
00301   return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
00302                               _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
00303                                                     vec2d_swizzle1(b.v, 1, 0)), mask)));
00304   #endif
00305 }
00306 
00307 template<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
00308 template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
00309 template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
00310 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
00311 
00312 // FIXME force unaligned load, this is a temporary fix
00313 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
00314 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
00315 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
00316 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
00317 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
00318 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
00319 
00320 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
00321 
00322 // FIXME force unaligned store, this is a temporary fix
00323 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
00324 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
00325 
00326 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
00327 
00328 template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
00329 {
00330   EIGEN_ALIGN16 double res[2];
00331   _mm_store_pd(res, a.v);
00332   return std::complex<double>(res[0],res[1]);
00333 }
00334 
00335 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
00336 
00337 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
00338 {
00339   return pfirst(a);
00340 }
00341 
00342 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
00343 {
00344   return vecs[0];
00345 }
00346 
00347 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
00348 {
00349   return pfirst(a);
00350 }
00351 
00352 template<int Offset>
00353 struct palign_impl<Offset,Packet1cd>
00354 {
00355   static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
00356   {
00357     // FIXME is it sure we never have to align a Packet1cd?
00358     // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
00359   }
00360 };
00361 
00362 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
00363 {
00364   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
00365   { return padd(pmul(x,y),c); }
00366 
00367   EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
00368   {
00369     #ifdef EIGEN_VECTORIZE_SSE3
00370     return internal::pmul(a, pconj(b));
00371     #else
00372     const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
00373     return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
00374                                 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
00375                                            vec2d_swizzle1(b.v, 1, 0))));
00376     #endif
00377   }
00378 };
00379 
00380 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
00381 {
00382   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
00383   { return padd(pmul(x,y),c); }
00384 
00385   EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
00386   {
00387     #ifdef EIGEN_VECTORIZE_SSE3
00388     return internal::pmul(pconj(a), b);
00389     #else
00390     const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
00391     return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
00392                                 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
00393                                                       vec2d_swizzle1(b.v, 1, 0)), mask)));
00394     #endif
00395   }
00396 };
00397 
00398 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
00399 {
00400   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
00401   { return padd(pmul(x,y),c); }
00402 
00403   EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
00404   {
00405     #ifdef EIGEN_VECTORIZE_SSE3
00406     return pconj(internal::pmul(a, b));
00407     #else
00408     const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
00409     return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
00410                                 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
00411                                            vec2d_swizzle1(b.v, 1, 0))));
00412     #endif
00413   }
00414 };
00415 
00416 template<> struct conj_helper<Packet2d, Packet1cd, false,false>
00417 {
00418   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
00419   { return padd(c, pmul(x,y)); }
00420 
00421   EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
00422   { return Packet1cd(Eigen::internal::pmul(x, y.v)); }
00423 };
00424 
00425 template<> struct conj_helper<Packet1cd, Packet2d, false,false>
00426 {
00427   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
00428   { return padd(c, pmul(x,y)); }
00429 
00430   EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
00431   { return Packet1cd(Eigen::internal::pmul(x.v, y)); }
00432 };
00433 
00434 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
00435 {
00436   // TODO optimize it for SSE3 and 4
00437   Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
00438   __m128d s = _mm_mul_pd(b.v,b.v);
00439   return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
00440 }
00441 
00442 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
00443 {
00444   return Packet1cd(preverse(x.v));
00445 }
00446 
00447 } // end namespace internal
00448 
00449 } // end namespace Eigen
00450 
00451 #endif // EIGEN_COMPLEX_SSE_H