CoeffBasedProduct.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00005 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00006 //
00007 // Eigen is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU Lesser General Public
00009 // License as published by the Free Software Foundation; either
00010 // version 3 of the License, or (at your option) any later version.
00011 //
00012 // Alternatively, you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License as
00014 // published by the Free Software Foundation; either version 2 of
00015 // the License, or (at your option) any later version.
00016 //
00017 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00018 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00019 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License and a copy of the GNU General Public License along with
00024 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00025 
00026 #ifndef EIGEN_COEFFBASED_PRODUCT_H
00027 #define EIGEN_COEFFBASED_PRODUCT_H
00028 
00029 namespace Eigen { 
00030 
00031 namespace internal {
00032 
00033 /*********************************************************************************
00034 *  Coefficient based product implementation.
00035 *  It is designed for the following use cases:
00036 *  - small fixed sizes
00037 *  - lazy products
00038 *********************************************************************************/
00039 
00040 /* Since the all the dimensions of the product are small, here we can rely
00041  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
00042  *
00043  * Note that here the inner-loops should always be unrolled.
00044  */
00045 
00046 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00047 struct product_coeff_impl;
00048 
00049 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00050 struct product_packet_impl;
00051 
00052 template<typename LhsNested, typename RhsNested, int NestingFlags>
00053 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
00054 {
00055   typedef MatrixXpr XprKind;
00056   typedef typename remove_all<LhsNested>::type _LhsNested;
00057   typedef typename remove_all<RhsNested>::type _RhsNested;
00058   typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
00059   typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
00060                                            typename traits<_RhsNested>::StorageKind>::ret StorageKind;
00061   typedef typename promote_index_type<typename traits<_LhsNested>::Index,
00062                                          typename traits<_RhsNested>::Index>::type Index;
00063 
00064   enum {
00065       LhsCoeffReadCost = _LhsNested::CoeffReadCost,
00066       RhsCoeffReadCost = _RhsNested::CoeffReadCost,
00067       LhsFlags = _LhsNested::Flags,
00068       RhsFlags = _RhsNested::Flags,
00069 
00070       RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
00071       ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
00072       InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
00073 
00074       MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
00075       MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
00076 
00077       LhsRowMajor = LhsFlags & RowMajorBit,
00078       RhsRowMajor = RhsFlags & RowMajorBit,
00079 
00080       SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
00081 
00082       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
00083                       && (ColsAtCompileTime == Dynamic
00084                           || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
00085                               && (RhsFlags&AlignedBit)
00086                              )
00087                          ),
00088 
00089       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
00090                       && (RowsAtCompileTime == Dynamic
00091                           || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
00092                               && (LhsFlags&AlignedBit)
00093                              )
00094                          ),
00095 
00096       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
00097                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
00098                      : (RhsRowMajor && !CanVectorizeLhs),
00099 
00100       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
00101             | (EvalToRowMajor ? RowMajorBit : 0)
00102             | NestingFlags
00103             | (LhsFlags & RhsFlags & AlignedBit)
00104             // TODO enable vectorization for mixed types
00105             | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
00106 
00107       CoeffReadCost = InnerSize == Dynamic ? Dynamic
00108                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
00109                       + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
00110 
00111       /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
00112       * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
00113       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
00114       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
00115       */
00116       CanVectorizeInner =    SameType
00117                           && LhsRowMajor
00118                           && (!RhsRowMajor)
00119                           && (LhsFlags & RhsFlags & ActualPacketAccessBit)
00120                           && (LhsFlags & RhsFlags & AlignedBit)
00121                           && (InnerSize % packet_traits<Scalar>::size == 0)
00122     };
00123 };
00124 
00125 } // end namespace internal
00126 
00127 template<typename LhsNested, typename RhsNested, int NestingFlags>
00128 class CoeffBasedProduct
00129   : internal::no_assignment_operator,
00130     public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
00131 {
00132   public:
00133 
00134     typedef MatrixBase<CoeffBasedProduct> Base;
00135     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
00136     typedef typename Base::PlainObject PlainObject;
00137 
00138   private:
00139 
00140     typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
00141     typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
00142 
00143     enum {
00144       PacketSize = internal::packet_traits<Scalar>::size,
00145       InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
00146       Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
00147       CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
00148     };
00149 
00150     typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
00151                                    Unroll ? InnerSize-1 : Dynamic,
00152                                    _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
00153 
00154     typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
00155 
00156   public:
00157 
00158     inline CoeffBasedProduct(const CoeffBasedProduct& other)
00159       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
00160     {}
00161 
00162     template<typename Lhs, typename Rhs>
00163     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
00164       : m_lhs(lhs), m_rhs(rhs)
00165     {
00166       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
00167       // We still allow to mix T and complex<T>.
00168       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
00169         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
00170       eigen_assert(lhs.cols() == rhs.rows()
00171         && "invalid matrix product"
00172         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
00173     }
00174 
00175     EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
00176     EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
00177 
00178     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
00179     {
00180       Scalar res;
00181       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00182       return res;
00183     }
00184 
00185     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
00186      * which is why we don't set the LinearAccessBit.
00187      */
00188     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
00189     {
00190       Scalar res;
00191       const Index row = RowsAtCompileTime == 1 ? 0 : index;
00192       const Index col = RowsAtCompileTime == 1 ? index : 0;
00193       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00194       return res;
00195     }
00196 
00197     template<int LoadMode>
00198     EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
00199     {
00200       PacketScalar res;
00201       internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
00202                               Unroll ? InnerSize-1 : Dynamic,
00203                               _LhsNested, _RhsNested, PacketScalar, LoadMode>
00204         ::run(row, col, m_lhs, m_rhs, res);
00205       return res;
00206     }
00207 
00208     // Implicit conversion to the nested type (trigger the evaluation of the product)
00209     EIGEN_STRONG_INLINE operator const PlainObject& () const
00210     {
00211       m_result.lazyAssign(*this);
00212       return m_result;
00213     }
00214 
00215     const _LhsNested& lhs() const { return m_lhs; }
00216     const _RhsNested& rhs() const { return m_rhs; }
00217 
00218     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
00219     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00220 
00221     template<int DiagonalIndex>
00222     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
00223     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00224 
00225     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
00226     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
00227 
00228   protected:
00229     typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
00230     typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
00231 
00232     mutable PlainObject m_result;
00233 };
00234 
00235 namespace internal {
00236 
00237 // here we need to overload the nested rule for products
00238 // such that the nested type is a const reference to a plain matrix
00239 template<typename Lhs, typename Rhs, int N, typename PlainObject>
00240 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
00241 {
00242   typedef PlainObject const& type;
00243 };
00244 
00245 /***************************************************************************
00246 * Normal product .coeff() implementation (with meta-unrolling)
00247 ***************************************************************************/
00248 
00249 /**************************************
00250 *** Scalar path  - no vectorization ***
00251 **************************************/
00252 
00253 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00254 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00255 {
00256   typedef typename Lhs::Index Index;
00257   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00258   {
00259     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
00260     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
00261   }
00262 };
00263 
00264 template<typename Lhs, typename Rhs, typename RetScalar>
00265 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
00266 {
00267   typedef typename Lhs::Index Index;
00268   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00269   {
00270     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00271   }
00272 };
00273 
00274 template<typename Lhs, typename Rhs, typename RetScalar>
00275 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
00276 {
00277   typedef typename Lhs::Index Index;
00278   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
00279   {
00280     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00281     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00282       for(Index i = 1; i < lhs.cols(); ++i)
00283         res += lhs.coeff(row, i) * rhs.coeff(i, col);
00284   }
00285 };
00286 
00287 /*******************************************
00288 *** Scalar path with inner vectorization ***
00289 *******************************************/
00290 
00291 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
00292 struct product_coeff_vectorized_unroller
00293 {
00294   typedef typename Lhs::Index Index;
00295   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00296   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00297   {
00298     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00299     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
00300   }
00301 };
00302 
00303 template<typename Lhs, typename Rhs, typename Packet>
00304 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
00305 {
00306   typedef typename Lhs::Index Index;
00307   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00308   {
00309     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
00310   }
00311 };
00312 
00313 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00314 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00315 {
00316   typedef typename Lhs::PacketScalar Packet;
00317   typedef typename Lhs::Index Index;
00318   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00319   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00320   {
00321     Packet pres;
00322     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00323     product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
00324     res = predux(pres);
00325   }
00326 };
00327 
00328 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
00329 struct product_coeff_vectorized_dyn_selector
00330 {
00331   typedef typename Lhs::Index Index;
00332   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00333   {
00334     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
00335   }
00336 };
00337 
00338 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
00339 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
00340 template<typename Lhs, typename Rhs, int RhsCols>
00341 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
00342 {
00343   typedef typename Lhs::Index Index;
00344   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00345   {
00346     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
00347   }
00348 };
00349 
00350 template<typename Lhs, typename Rhs, int LhsRows>
00351 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
00352 {
00353   typedef typename Lhs::Index Index;
00354   static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00355   {
00356     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
00357   }
00358 };
00359 
00360 template<typename Lhs, typename Rhs>
00361 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
00362 {
00363   typedef typename Lhs::Index Index;
00364   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00365   {
00366     res = lhs.transpose().cwiseProduct(rhs).sum();
00367   }
00368 };
00369 
00370 template<typename Lhs, typename Rhs, typename RetScalar>
00371 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
00372 {
00373   typedef typename Lhs::Index Index;
00374   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00375   {
00376     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
00377   }
00378 };
00379 
00380 /*******************
00381 *** Packet path  ***
00382 *******************/
00383 
00384 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00385 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00386 {
00387   typedef typename Lhs::Index Index;
00388   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00389   {
00390     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00391     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
00392   }
00393 };
00394 
00395 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00396 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00397 {
00398   typedef typename Lhs::Index Index;
00399   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00400   {
00401     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00402     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
00403   }
00404 };
00405 
00406 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00407 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
00408 {
00409   typedef typename Lhs::Index Index;
00410   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00411   {
00412     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00413   }
00414 };
00415 
00416 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00417 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
00418 {
00419   typedef typename Lhs::Index Index;
00420   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00421   {
00422     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00423   }
00424 };
00425 
00426 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00427 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00428 {
00429   typedef typename Lhs::Index Index;
00430   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00431   {
00432     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00433     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00434       for(Index i = 1; i < lhs.cols(); ++i)
00435         res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
00436   }
00437 };
00438 
00439 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00440 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00441 {
00442   typedef typename Lhs::Index Index;
00443   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00444   {
00445     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00446     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00447       for(Index i = 1; i < lhs.cols(); ++i)
00448         res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
00449   }
00450 };
00451 
00452 } // end namespace internal
00453 
00454 } // end namespace Eigen
00455 
00456 #endif // EIGEN_COEFFBASED_PRODUCT_H