00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00026 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00027
00028 namespace Eigen {
00029
00030 namespace internal {
00031
00032
00033 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
00034 struct symm_pack_lhs
00035 {
00036 template<int BlockRows> inline
00037 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
00038 {
00039
00040 for(Index k=0; k<i; k++)
00041 for(Index w=0; w<BlockRows; w++)
00042 blockA[count++] = lhs(i+w,k);
00043
00044 Index h = 0;
00045 for(Index k=i; k<i+BlockRows; k++)
00046 {
00047 for(Index w=0; w<h; w++)
00048 blockA[count++] = conj(lhs(k, i+w));
00049
00050 blockA[count++] = real(lhs(k,k));
00051
00052 for(Index w=h+1; w<BlockRows; w++)
00053 blockA[count++] = lhs(i+w, k);
00054 ++h;
00055 }
00056
00057 for(Index k=i+BlockRows; k<cols; k++)
00058 for(Index w=0; w<BlockRows; w++)
00059 blockA[count++] = conj(lhs(k, i+w));
00060 }
00061 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
00062 {
00063 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
00064 Index count = 0;
00065 Index peeled_mc = (rows/Pack1)*Pack1;
00066 for(Index i=0; i<peeled_mc; i+=Pack1)
00067 {
00068 pack<Pack1>(blockA, lhs, cols, i, count);
00069 }
00070
00071 if(rows-peeled_mc>=Pack2)
00072 {
00073 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
00074 peeled_mc += Pack2;
00075 }
00076
00077
00078 for(Index i=peeled_mc; i<rows; i++)
00079 {
00080 for(Index k=0; k<i; k++)
00081 blockA[count++] = lhs(i, k);
00082
00083 blockA[count++] = real(lhs(i, i));
00084
00085 for(Index k=i+1; k<cols; k++)
00086 blockA[count++] = conj(lhs(k, i));
00087 }
00088 }
00089 };
00090
00091 template<typename Scalar, typename Index, int nr, int StorageOrder>
00092 struct symm_pack_rhs
00093 {
00094 enum { PacketSize = packet_traits<Scalar>::size };
00095 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
00096 {
00097 Index end_k = k2 + rows;
00098 Index count = 0;
00099 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
00100 Index packet_cols = (cols/nr)*nr;
00101
00102
00103 for(Index j2=0; j2<k2; j2+=nr)
00104 {
00105 for(Index k=k2; k<end_k; k++)
00106 {
00107 blockB[count+0] = rhs(k,j2+0);
00108 blockB[count+1] = rhs(k,j2+1);
00109 if (nr==4)
00110 {
00111 blockB[count+2] = rhs(k,j2+2);
00112 blockB[count+3] = rhs(k,j2+3);
00113 }
00114 count += nr;
00115 }
00116 }
00117
00118
00119 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
00120 {
00121
00122
00123 for(Index k=k2; k<j2; k++)
00124 {
00125 blockB[count+0] = conj(rhs(j2+0,k));
00126 blockB[count+1] = conj(rhs(j2+1,k));
00127 if (nr==4)
00128 {
00129 blockB[count+2] = conj(rhs(j2+2,k));
00130 blockB[count+3] = conj(rhs(j2+3,k));
00131 }
00132 count += nr;
00133 }
00134
00135 Index h = 0;
00136 for(Index k=j2; k<j2+nr; k++)
00137 {
00138
00139 for (Index w=0 ; w<h; ++w)
00140 blockB[count+w] = rhs(k,j2+w);
00141
00142 blockB[count+h] = real(rhs(k,k));
00143
00144
00145 for (Index w=h+1 ; w<nr; ++w)
00146 blockB[count+w] = conj(rhs(j2+w,k));
00147 count += nr;
00148 ++h;
00149 }
00150
00151 for(Index k=j2+nr; k<end_k; k++)
00152 {
00153 blockB[count+0] = rhs(k,j2+0);
00154 blockB[count+1] = rhs(k,j2+1);
00155 if (nr==4)
00156 {
00157 blockB[count+2] = rhs(k,j2+2);
00158 blockB[count+3] = rhs(k,j2+3);
00159 }
00160 count += nr;
00161 }
00162 }
00163
00164
00165 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
00166 {
00167 for(Index k=k2; k<end_k; k++)
00168 {
00169 blockB[count+0] = conj(rhs(j2+0,k));
00170 blockB[count+1] = conj(rhs(j2+1,k));
00171 if (nr==4)
00172 {
00173 blockB[count+2] = conj(rhs(j2+2,k));
00174 blockB[count+3] = conj(rhs(j2+3,k));
00175 }
00176 count += nr;
00177 }
00178 }
00179
00180
00181 for(Index j2=packet_cols; j2<cols; ++j2)
00182 {
00183
00184 Index half = (std::min)(end_k,j2);
00185 for(Index k=k2; k<half; k++)
00186 {
00187 blockB[count] = conj(rhs(j2,k));
00188 count += 1;
00189 }
00190
00191 if(half==j2 && half<k2+rows)
00192 {
00193 blockB[count] = real(rhs(j2,j2));
00194 count += 1;
00195 }
00196 else
00197 half--;
00198
00199
00200 for(Index k=half+1; k<k2+rows; k++)
00201 {
00202 blockB[count] = rhs(k,j2);
00203 count += 1;
00204 }
00205 }
00206 }
00207 };
00208
00209
00210
00211
00212 template <typename Scalar, typename Index,
00213 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00214 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
00215 int ResStorageOrder>
00216 struct product_selfadjoint_matrix;
00217
00218 template <typename Scalar, typename Index,
00219 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00220 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
00221 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
00222 {
00223
00224 static EIGEN_STRONG_INLINE void run(
00225 Index rows, Index cols,
00226 const Scalar* lhs, Index lhsStride,
00227 const Scalar* rhs, Index rhsStride,
00228 Scalar* res, Index resStride,
00229 Scalar alpha)
00230 {
00231 product_selfadjoint_matrix<Scalar, Index,
00232 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00233 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
00234 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00235 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
00236 ColMajor>
00237 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
00238 }
00239 };
00240
00241 template <typename Scalar, typename Index,
00242 int LhsStorageOrder, bool ConjugateLhs,
00243 int RhsStorageOrder, bool ConjugateRhs>
00244 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
00245 {
00246
00247 static EIGEN_DONT_INLINE void run(
00248 Index rows, Index cols,
00249 const Scalar* _lhs, Index lhsStride,
00250 const Scalar* _rhs, Index rhsStride,
00251 Scalar* res, Index resStride,
00252 Scalar alpha)
00253 {
00254 Index size = rows;
00255
00256 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00257 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00258
00259 typedef gebp_traits<Scalar,Scalar> Traits;
00260
00261 Index kc = size;
00262 Index mc = rows;
00263 Index nc = cols;
00264 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00265
00266 kc = (std::min)(kc,mc);
00267
00268 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00269 std::size_t sizeB = sizeW + kc*cols;
00270 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00271 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00272 Scalar* blockB = allocatedBlockB + sizeW;
00273
00274 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00275 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00276 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00277 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
00278
00279 for(Index k2=0; k2<size; k2+=kc)
00280 {
00281 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00282
00283
00284
00285
00286 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
00287
00288
00289
00290
00291
00292 for(Index i2=0; i2<k2; i2+=mc)
00293 {
00294 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
00295
00296 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
00297
00298 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00299 }
00300
00301 {
00302 const Index actual_mc = (std::min)(k2+kc,size)-k2;
00303
00304 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
00305
00306 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00307 }
00308
00309 for(Index i2=k2+kc; i2<size; i2+=mc)
00310 {
00311 const Index actual_mc = (std::min)(i2+mc,size)-i2;
00312 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
00313 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00314
00315 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00316 }
00317 }
00318 }
00319 };
00320
00321
00322 template <typename Scalar, typename Index,
00323 int LhsStorageOrder, bool ConjugateLhs,
00324 int RhsStorageOrder, bool ConjugateRhs>
00325 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
00326 {
00327
00328 static EIGEN_DONT_INLINE void run(
00329 Index rows, Index cols,
00330 const Scalar* _lhs, Index lhsStride,
00331 const Scalar* _rhs, Index rhsStride,
00332 Scalar* res, Index resStride,
00333 Scalar alpha)
00334 {
00335 Index size = cols;
00336
00337 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00338
00339 typedef gebp_traits<Scalar,Scalar> Traits;
00340
00341 Index kc = size;
00342 Index mc = rows;
00343 Index nc = cols;
00344 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00345 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00346 std::size_t sizeB = sizeW + kc*cols;
00347 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00348 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00349 Scalar* blockB = allocatedBlockB + sizeW;
00350
00351 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00352 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00353 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00354
00355 for(Index k2=0; k2<size; k2+=kc)
00356 {
00357 const Index actual_kc = (std::min)(k2+kc,size)-k2;
00358
00359 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
00360
00361
00362 for(Index i2=0; i2<rows; i2+=mc)
00363 {
00364 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00365 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00366
00367 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00368 }
00369 }
00370 }
00371 };
00372
00373 }
00374
00375
00376
00377
00378
00379 namespace internal {
00380 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00381 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
00382 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
00383 {};
00384 }
00385
00386 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00387 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
00388 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
00389 {
00390 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00391
00392 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00393
00394 enum {
00395 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
00396 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
00397 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
00398 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
00399 };
00400
00401 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00402 {
00403 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00404
00405 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00406 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00407
00408 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00409 * RhsBlasTraits::extractScalarFactor(m_rhs);
00410
00411 internal::product_selfadjoint_matrix<Scalar, Index,
00412 EIGEN_LOGICAL_XOR(LhsIsUpper,
00413 internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
00414 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
00415 EIGEN_LOGICAL_XOR(RhsIsUpper,
00416 internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
00417 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
00418 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
00419 ::run(
00420 lhs.rows(), rhs.cols(),
00421 &lhs.coeffRef(0,0), lhs.outerStride(),
00422 &rhs.coeffRef(0,0), rhs.outerStride(),
00423 &dst.coeffRef(0,0), dst.outerStride(),
00424 actualAlpha
00425 );
00426 }
00427 };
00428
00429 }
00430
00431 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H