Redux.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 //
00007 // Eigen is free software; you can redistribute it and/or
00008 // modify it under the terms of the GNU Lesser General Public
00009 // License as published by the Free Software Foundation; either
00010 // version 3 of the License, or (at your option) any later version.
00011 //
00012 // Alternatively, you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License as
00014 // published by the Free Software Foundation; either version 2 of
00015 // the License, or (at your option) any later version.
00016 //
00017 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00018 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00019 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License and a copy of the GNU General Public License along with
00024 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00025 
00026 #ifndef EIGEN_REDUX_H
00027 #define EIGEN_REDUX_H
00028 
00029 namespace Eigen { 
00030 
00031 namespace internal {
00032 
00033 // TODO
00034 //  * implement other kind of vectorization
00035 //  * factorize code
00036 
00037 /***************************************************************************
00038 * Part 1 : the logic deciding a strategy for vectorization and unrolling
00039 ***************************************************************************/
00040 
00041 template<typename Func, typename Derived>
00042 struct redux_traits
00043 {
00044 public:
00045   enum {
00046     PacketSize = packet_traits<typename Derived::Scalar>::size,
00047     InnerMaxSize = int(Derived::IsRowMajor)
00048                  ? Derived::MaxColsAtCompileTime
00049                  : Derived::MaxRowsAtCompileTime
00050   };
00051 
00052   enum {
00053     MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
00054                   && (functor_traits<Func>::PacketAccess),
00055     MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
00056     MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
00057   };
00058 
00059 public:
00060   enum {
00061     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
00062               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
00063                                         : int(DefaultTraversal)
00064   };
00065 
00066 public:
00067   enum {
00068     Cost = (  Derived::SizeAtCompileTime == Dynamic
00069            || Derived::CoeffReadCost == Dynamic
00070            || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
00071            ) ? Dynamic
00072            : Derived::SizeAtCompileTime * Derived::CoeffReadCost
00073                + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
00074     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
00075   };
00076 
00077 public:
00078   enum {
00079     Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
00080               ? CompleteUnrolling
00081               : NoUnrolling
00082   };
00083 };
00084 
00085 /***************************************************************************
00086 * Part 2 : unrollers
00087 ***************************************************************************/
00088 
00089 /*** no vectorization ***/
00090 
00091 template<typename Func, typename Derived, int Start, int Length>
00092 struct redux_novec_unroller
00093 {
00094   enum {
00095     HalfLength = Length/2
00096   };
00097 
00098   typedef typename Derived::Scalar Scalar;
00099 
00100   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
00101   {
00102     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
00103                 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
00104   }
00105 };
00106 
00107 template<typename Func, typename Derived, int Start>
00108 struct redux_novec_unroller<Func, Derived, Start, 1>
00109 {
00110   enum {
00111     outer = Start / Derived::InnerSizeAtCompileTime,
00112     inner = Start % Derived::InnerSizeAtCompileTime
00113   };
00114 
00115   typedef typename Derived::Scalar Scalar;
00116 
00117   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
00118   {
00119     return mat.coeffByOuterInner(outer, inner);
00120   }
00121 };
00122 
00123 // This is actually dead code and will never be called. It is required
00124 // to prevent false warnings regarding failed inlining though
00125 // for 0 length run() will never be called at all.
00126 template<typename Func, typename Derived, int Start>
00127 struct redux_novec_unroller<Func, Derived, Start, 0>
00128 {
00129   typedef typename Derived::Scalar Scalar;
00130   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
00131 };
00132 
00133 /*** vectorization ***/
00134 
00135 template<typename Func, typename Derived, int Start, int Length>
00136 struct redux_vec_unroller
00137 {
00138   enum {
00139     PacketSize = packet_traits<typename Derived::Scalar>::size,
00140     HalfLength = Length/2
00141   };
00142 
00143   typedef typename Derived::Scalar Scalar;
00144   typedef typename packet_traits<Scalar>::type PacketScalar;
00145 
00146   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
00147   {
00148     return func.packetOp(
00149             redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
00150             redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
00151   }
00152 };
00153 
00154 template<typename Func, typename Derived, int Start>
00155 struct redux_vec_unroller<Func, Derived, Start, 1>
00156 {
00157   enum {
00158     index = Start * packet_traits<typename Derived::Scalar>::size,
00159     outer = index / int(Derived::InnerSizeAtCompileTime),
00160     inner = index % int(Derived::InnerSizeAtCompileTime),
00161     alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
00162   };
00163 
00164   typedef typename Derived::Scalar Scalar;
00165   typedef typename packet_traits<Scalar>::type PacketScalar;
00166 
00167   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
00168   {
00169     return mat.template packetByOuterInner<alignment>(outer, inner);
00170   }
00171 };
00172 
00173 /***************************************************************************
00174 * Part 3 : implementation of all cases
00175 ***************************************************************************/
00176 
00177 template<typename Func, typename Derived,
00178          int Traversal = redux_traits<Func, Derived>::Traversal,
00179          int Unrolling = redux_traits<Func, Derived>::Unrolling
00180 >
00181 struct redux_impl;
00182 
00183 template<typename Func, typename Derived>
00184 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
00185 {
00186   typedef typename Derived::Scalar Scalar;
00187   typedef typename Derived::Index Index;
00188   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
00189   {
00190     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00191     Scalar res;
00192     res = mat.coeffByOuterInner(0, 0);
00193     for(Index i = 1; i < mat.innerSize(); ++i)
00194       res = func(res, mat.coeffByOuterInner(0, i));
00195     for(Index i = 1; i < mat.outerSize(); ++i)
00196       for(Index j = 0; j < mat.innerSize(); ++j)
00197         res = func(res, mat.coeffByOuterInner(i, j));
00198     return res;
00199   }
00200 };
00201 
00202 template<typename Func, typename Derived>
00203 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
00204   : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
00205 {};
00206 
00207 template<typename Func, typename Derived>
00208 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
00209 {
00210   typedef typename Derived::Scalar Scalar;
00211   typedef typename packet_traits<Scalar>::type PacketScalar;
00212   typedef typename Derived::Index Index;
00213 
00214   static Scalar run(const Derived& mat, const Func& func)
00215   {
00216     const Index size = mat.size();
00217     eigen_assert(size && "you are using an empty matrix");
00218     const Index packetSize = packet_traits<Scalar>::size;
00219     const Index alignedStart = internal::first_aligned(mat);
00220     enum {
00221       alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
00222                 ? Aligned : Unaligned
00223     };
00224     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
00225     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
00226     const Index alignedEnd2 = alignedStart + alignedSize2;
00227     const Index alignedEnd  = alignedStart + alignedSize;
00228     Scalar res;
00229     if(alignedSize)
00230     {
00231       PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
00232       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
00233       {
00234         PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
00235         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
00236         {
00237           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
00238           packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
00239         }
00240 
00241         packet_res0 = func.packetOp(packet_res0,packet_res1);
00242         if(alignedEnd>alignedEnd2)
00243           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
00244       }
00245       res = func.predux(packet_res0);
00246 
00247       for(Index index = 0; index < alignedStart; ++index)
00248         res = func(res,mat.coeff(index));
00249 
00250       for(Index index = alignedEnd; index < size; ++index)
00251         res = func(res,mat.coeff(index));
00252     }
00253     else // too small to vectorize anything.
00254          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
00255     {
00256       res = mat.coeff(0);
00257       for(Index index = 1; index < size; ++index)
00258         res = func(res,mat.coeff(index));
00259     }
00260 
00261     return res;
00262   }
00263 };
00264 
00265 template<typename Func, typename Derived>
00266 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
00267 {
00268   typedef typename Derived::Scalar Scalar;
00269   typedef typename packet_traits<Scalar>::type PacketScalar;
00270   typedef typename Derived::Index Index;
00271 
00272   static Scalar run(const Derived& mat, const Func& func)
00273   {
00274     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00275     const Index innerSize = mat.innerSize();
00276     const Index outerSize = mat.outerSize();
00277     enum {
00278       packetSize = packet_traits<Scalar>::size
00279     };
00280     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
00281     Scalar res;
00282     if(packetedInnerSize)
00283     {
00284       PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
00285       for(Index j=0; j<outerSize; ++j)
00286         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
00287           packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
00288 
00289       res = func.predux(packet_res);
00290       for(Index j=0; j<outerSize; ++j)
00291         for(Index i=packetedInnerSize; i<innerSize; ++i)
00292           res = func(res, mat.coeffByOuterInner(j,i));
00293     }
00294     else // too small to vectorize anything.
00295          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
00296     {
00297       res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
00298     }
00299 
00300     return res;
00301   }
00302 };
00303 
00304 template<typename Func, typename Derived>
00305 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
00306 {
00307   typedef typename Derived::Scalar Scalar;
00308   typedef typename packet_traits<Scalar>::type PacketScalar;
00309   enum {
00310     PacketSize = packet_traits<Scalar>::size,
00311     Size = Derived::SizeAtCompileTime,
00312     VectorizedSize = (Size / PacketSize) * PacketSize
00313   };
00314   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
00315   {
00316     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00317     Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
00318     if (VectorizedSize != Size)
00319       res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
00320     return res;
00321   }
00322 };
00323 
00324 } // end namespace internal
00325 
00326 /***************************************************************************
00327 * Part 4 : public API
00328 ***************************************************************************/
00329 
00330 
00338 template<typename Derived>
00339 template<typename Func>
00340 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
00341 DenseBase<Derived>::redux(const Func& func) const
00342 {
00343   typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
00344   return internal::redux_impl<Func, ThisNested>
00345             ::run(derived(), func);
00346 }
00347 
00350 template<typename Derived>
00351 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00352 DenseBase<Derived>::minCoeff() const
00353 {
00354   return this->redux(Eigen::internal::scalar_min_op<Scalar>());
00355 }
00356 
00359 template<typename Derived>
00360 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00361 DenseBase<Derived>::maxCoeff() const
00362 {
00363   return this->redux(Eigen::internal::scalar_max_op<Scalar>());
00364 }
00365 
00370 template<typename Derived>
00371 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00372 DenseBase<Derived>::sum() const
00373 {
00374   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
00375     return Scalar(0);
00376   return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
00377 }
00378 
00383 template<typename Derived>
00384 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00385 DenseBase<Derived>::mean() const
00386 {
00387   return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
00388 }
00389 
00397 template<typename Derived>
00398 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00399 DenseBase<Derived>::prod() const
00400 {
00401   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
00402     return Scalar(1);
00403   return this->redux(Eigen::internal::scalar_product_op<Scalar>());
00404 }
00405 
00412 template<typename Derived>
00413 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00414 MatrixBase<Derived>::trace() const
00415 {
00416   return derived().diagonal().sum();
00417 }
00418 
00419 } // end namespace Eigen
00420 
00421 #endif // EIGEN_REDUX_H