SelfadjointMatrixMatrix.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // Eigen is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 3 of the License, or (at your option) any later version.
00010 //
00011 // Alternatively, you can redistribute it and/or
00012 // modify it under the terms of the GNU General Public License as
00013 // published by the Free Software Foundation; either version 2 of
00014 // the License, or (at your option) any later version.
00015 //
00016 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00017 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00019 // GNU General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License and a copy of the GNU General Public License along with
00023 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00024 
00025 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00026 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00027 
00028 namespace Eigen { 
00029 
00030 namespace internal {
00031 
00032 // pack a selfadjoint block diagonal for use with the gebp_kernel
00033 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
00034 struct symm_pack_lhs
00035 {
00036   template<int BlockRows> inline
00037   void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
00038   {
00039     // normal copy
00040     for(Index k=0; k<i; k++)
00041       for(Index w=0; w<BlockRows; w++)
00042         blockA[count++] = lhs(i+w,k);           // normal
00043     // symmetric copy
00044     Index h = 0;
00045     for(Index k=i; k<i+BlockRows; k++)
00046     {
00047       for(Index w=0; w<h; w++)
00048         blockA[count++] = conj(lhs(k, i+w)); // transposed
00049 
00050       blockA[count++] = real(lhs(k,k));   // real (diagonal)
00051 
00052       for(Index w=h+1; w<BlockRows; w++)
00053         blockA[count++] = lhs(i+w, k);          // normal
00054       ++h;
00055     }
00056     // transposed copy
00057     for(Index k=i+BlockRows; k<cols; k++)
00058       for(Index w=0; w<BlockRows; w++)
00059         blockA[count++] = conj(lhs(k, i+w)); // transposed
00060   }
00061   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
00062   {
00063     const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
00064     Index count = 0;
00065     Index peeled_mc = (rows/Pack1)*Pack1;
00066     for(Index i=0; i<peeled_mc; i+=Pack1)
00067     {
00068       pack<Pack1>(blockA, lhs, cols, i, count);
00069     }
00070 
00071     if(rows-peeled_mc>=Pack2)
00072     {
00073       pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
00074       peeled_mc += Pack2;
00075     }
00076 
00077     // do the same with mr==1
00078     for(Index i=peeled_mc; i<rows; i++)
00079     {
00080       for(Index k=0; k<i; k++)
00081         blockA[count++] = lhs(i, k);              // normal
00082 
00083       blockA[count++] = real(lhs(i, i));       // real (diagonal)
00084 
00085       for(Index k=i+1; k<cols; k++)
00086         blockA[count++] = conj(lhs(k, i));     // transposed
00087     }
00088   }
00089 };
00090 
00091 template<typename Scalar, typename Index, int nr, int StorageOrder>
00092 struct symm_pack_rhs
00093 {
00094   enum { PacketSize = packet_traits<Scalar>::size };
00095   void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
00096   {
00097     Index end_k = k2 + rows;
00098     Index count = 0;
00099     const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
00100     Index packet_cols = (cols/nr)*nr;
00101 
00102     // first part: normal case
00103     for(Index j2=0; j2<k2; j2+=nr)
00104     {
00105       for(Index k=k2; k<end_k; k++)
00106       {
00107         blockB[count+0] = rhs(k,j2+0);
00108         blockB[count+1] = rhs(k,j2+1);
00109         if (nr==4)
00110         {
00111           blockB[count+2] = rhs(k,j2+2);
00112           blockB[count+3] = rhs(k,j2+3);
00113         }
00114         count += nr;
00115       }
00116     }
00117 
00118     // second part: diagonal block
00119     for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
00120     {
00121       // again we can split vertically in three different parts (transpose, symmetric, normal)
00122       // transpose
00123       for(Index k=k2; k<j2; k++)
00124       {
00125         blockB[count+0] = conj(rhs(j2+0,k));
00126         blockB[count+1] = conj(rhs(j2+1,k));
00127         if (nr==4)
00128         {
00129           blockB[count+2] = conj(rhs(j2+2,k));
00130           blockB[count+3] = conj(rhs(j2+3,k));
00131         }
00132         count += nr;
00133       }
00134       // symmetric
00135       Index h = 0;
00136       for(Index k=j2; k<j2+nr; k++)
00137       {
00138         // normal
00139         for (Index w=0 ; w<h; ++w)
00140           blockB[count+w] = rhs(k,j2+w);
00141 
00142         blockB[count+h] = real(rhs(k,k));
00143 
00144         // transpose
00145         for (Index w=h+1 ; w<nr; ++w)
00146           blockB[count+w] = conj(rhs(j2+w,k));
00147         count += nr;
00148         ++h;
00149       }
00150       // normal
00151       for(Index k=j2+nr; k<end_k; k++)
00152       {
00153         blockB[count+0] = rhs(k,j2+0);
00154         blockB[count+1] = rhs(k,j2+1);
00155         if (nr==4)
00156         {
00157           blockB[count+2] = rhs(k,j2+2);
00158           blockB[count+3] = rhs(k,j2+3);
00159         }
00160         count += nr;
00161       }
00162     }
00163 
00164     // third part: transposed
00165     for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
00166     {
00167       for(Index k=k2; k<end_k; k++)
00168       {
00169         blockB[count+0] = conj(rhs(j2+0,k));
00170         blockB[count+1] = conj(rhs(j2+1,k));
00171         if (nr==4)
00172         {
00173           blockB[count+2] = conj(rhs(j2+2,k));
00174           blockB[count+3] = conj(rhs(j2+3,k));
00175         }
00176         count += nr;
00177       }
00178     }
00179 
00180     // copy the remaining columns one at a time (=> the same with nr==1)
00181     for(Index j2=packet_cols; j2<cols; ++j2)
00182     {
00183       // transpose
00184       Index half = (std::min)(end_k,j2);
00185       for(Index k=k2; k<half; k++)
00186       {
00187         blockB[count] = conj(rhs(j2,k));
00188         count += 1;
00189       }
00190 
00191       if(half==j2 && half<k2+rows)
00192       {
00193         blockB[count] = real(rhs(j2,j2));
00194         count += 1;
00195       }
00196       else
00197         half--;
00198 
00199       // normal
00200       for(Index k=half+1; k<k2+rows; k++)
00201       {
00202         blockB[count] = rhs(k,j2);
00203         count += 1;
00204       }
00205     }
00206   }
00207 };
00208 
00209 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
00210  * the general matrix matrix product.
00211  */
00212 template <typename Scalar, typename Index,
00213           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00214           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
00215           int ResStorageOrder>
00216 struct product_selfadjoint_matrix;
00217 
00218 template <typename Scalar, typename Index,
00219           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00220           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
00221 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
00222 {
00223 
00224   static EIGEN_STRONG_INLINE void run(
00225     Index rows, Index cols,
00226     const Scalar* lhs, Index lhsStride,
00227     const Scalar* rhs, Index rhsStride,
00228     Scalar* res,       Index resStride,
00229     Scalar alpha)
00230   {
00231     product_selfadjoint_matrix<Scalar, Index,
00232       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00233       RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
00234       EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00235       LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
00236       ColMajor>
00237       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resStride,  alpha);
00238   }
00239 };
00240 
00241 template <typename Scalar, typename Index,
00242           int LhsStorageOrder, bool ConjugateLhs,
00243           int RhsStorageOrder, bool ConjugateRhs>
00244 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
00245 {
00246 
00247   static EIGEN_DONT_INLINE void run(
00248     Index rows, Index cols,
00249     const Scalar* _lhs, Index lhsStride,
00250     const Scalar* _rhs, Index rhsStride,
00251     Scalar* res,        Index resStride,
00252     Scalar alpha)
00253   {
00254     Index size = rows;
00255 
00256     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00257     const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00258 
00259     typedef gebp_traits<Scalar,Scalar> Traits;
00260 
00261     Index kc = size;  // cache block size along the K direction
00262     Index mc = rows;  // cache block size along the M direction
00263     Index nc = cols;  // cache block size along the N direction
00264     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00265     // kc must smaller than mc
00266     kc = (std::min)(kc,mc);
00267 
00268     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00269     std::size_t sizeB = sizeW + kc*cols;
00270     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00271     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00272     Scalar* blockB = allocatedBlockB + sizeW;
00273 
00274     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00275     symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00276     gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00277     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
00278 
00279     for(Index k2=0; k2<size; k2+=kc)
00280     {
00281       const Index actual_kc = (std::min)(k2+kc,size)-k2;
00282 
00283       // we have selected one row panel of rhs and one column panel of lhs
00284       // pack rhs's panel into a sequential chunk of memory
00285       // and expand each coeff to a constant packet for further reuse
00286       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
00287 
00288       // the select lhs's panel has to be split in three different parts:
00289       //  1 - the transposed panel above the diagonal block => transposed packed copy
00290       //  2 - the diagonal block => special packed copy
00291       //  3 - the panel below the diagonal block => generic packed copy
00292       for(Index i2=0; i2<k2; i2+=mc)
00293       {
00294         const Index actual_mc = (std::min)(i2+mc,k2)-i2;
00295         // transposed packed copy
00296         pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
00297 
00298         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00299       }
00300       // the block diagonal
00301       {
00302         const Index actual_mc = (std::min)(k2+kc,size)-k2;
00303         // symmetric packed copy
00304         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
00305 
00306         gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00307       }
00308 
00309       for(Index i2=k2+kc; i2<size; i2+=mc)
00310       {
00311         const Index actual_mc = (std::min)(i2+mc,size)-i2;
00312         gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
00313           (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00314 
00315         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00316       }
00317     }
00318   }
00319 };
00320 
00321 // matrix * selfadjoint product
00322 template <typename Scalar, typename Index,
00323           int LhsStorageOrder, bool ConjugateLhs,
00324           int RhsStorageOrder, bool ConjugateRhs>
00325 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
00326 {
00327 
00328   static EIGEN_DONT_INLINE void run(
00329     Index rows, Index cols,
00330     const Scalar* _lhs, Index lhsStride,
00331     const Scalar* _rhs, Index rhsStride,
00332     Scalar* res,        Index resStride,
00333     Scalar alpha)
00334   {
00335     Index size = cols;
00336 
00337     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00338 
00339     typedef gebp_traits<Scalar,Scalar> Traits;
00340 
00341     Index kc = size; // cache block size along the K direction
00342     Index mc = rows;  // cache block size along the M direction
00343     Index nc = cols;  // cache block size along the N direction
00344     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00345     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00346     std::size_t sizeB = sizeW + kc*cols;
00347     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00348     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00349     Scalar* blockB = allocatedBlockB + sizeW;
00350 
00351     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00352     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00353     symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00354 
00355     for(Index k2=0; k2<size; k2+=kc)
00356     {
00357       const Index actual_kc = (std::min)(k2+kc,size)-k2;
00358 
00359       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
00360 
00361       // => GEPP
00362       for(Index i2=0; i2<rows; i2+=mc)
00363       {
00364         const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00365         pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00366 
00367         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00368       }
00369     }
00370   }
00371 };
00372 
00373 } // end namespace internal
00374 
00375 /***************************************************************************
00376 * Wrapper to product_selfadjoint_matrix
00377 ***************************************************************************/
00378 
00379 namespace internal {
00380 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00381 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
00382   : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
00383 {};
00384 }
00385 
00386 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00387 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
00388   : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
00389 {
00390   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00391 
00392   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00393 
00394   enum {
00395     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
00396     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
00397     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
00398     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
00399   };
00400 
00401   template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00402   {
00403     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00404 
00405     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00406     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00407 
00408     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00409                                * RhsBlasTraits::extractScalarFactor(m_rhs);
00410 
00411     internal::product_selfadjoint_matrix<Scalar, Index,
00412       EIGEN_LOGICAL_XOR(LhsIsUpper,
00413                         internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
00414       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
00415       EIGEN_LOGICAL_XOR(RhsIsUpper,
00416                         internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
00417       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
00418       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor>
00419       ::run(
00420         lhs.rows(), rhs.cols(),                 // sizes
00421         &lhs.coeffRef(0,0),    lhs.outerStride(),  // lhs info
00422         &rhs.coeffRef(0,0),    rhs.outerStride(),  // rhs info
00423         &dst.coeffRef(0,0), dst.outerStride(),  // result info
00424         actualAlpha                             // alpha
00425       );
00426   }
00427 };
00428 
00429 } // end namespace Eigen
00430 
00431 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H