00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00026 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
00027
00028 namespace Eigen {
00029
00030 namespace internal {
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 template <typename Scalar, typename Index,
00060 int Mode, bool LhsIsTriangular,
00061 int LhsStorageOrder, bool ConjugateLhs,
00062 int RhsStorageOrder, bool ConjugateRhs,
00063 int ResStorageOrder, int Version = Specialized>
00064 struct product_triangular_matrix_matrix;
00065
00066 template <typename Scalar, typename Index,
00067 int Mode, bool LhsIsTriangular,
00068 int LhsStorageOrder, bool ConjugateLhs,
00069 int RhsStorageOrder, bool ConjugateRhs, int Version>
00070 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
00071 LhsStorageOrder,ConjugateLhs,
00072 RhsStorageOrder,ConjugateRhs,RowMajor,Version>
00073 {
00074 static EIGEN_STRONG_INLINE void run(
00075 Index rows, Index cols, Index depth,
00076 const Scalar* lhs, Index lhsStride,
00077 const Scalar* rhs, Index rhsStride,
00078 Scalar* res, Index resStride,
00079 Scalar alpha)
00080 {
00081 product_triangular_matrix_matrix<Scalar, Index,
00082 (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
00083 (!LhsIsTriangular),
00084 RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00085 ConjugateRhs,
00086 LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
00087 ConjugateLhs,
00088 ColMajor>
00089 ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
00090 }
00091 };
00092
00093
00094 template <typename Scalar, typename Index, int Mode,
00095 int LhsStorageOrder, bool ConjugateLhs,
00096 int RhsStorageOrder, bool ConjugateRhs, int Version>
00097 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
00098 LhsStorageOrder,ConjugateLhs,
00099 RhsStorageOrder,ConjugateRhs,ColMajor,Version>
00100 {
00101
00102 typedef gebp_traits<Scalar,Scalar> Traits;
00103 enum {
00104 SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00105 IsLower = (Mode&Lower) == Lower,
00106 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00107 };
00108
00109 static EIGEN_DONT_INLINE void run(
00110 Index _rows, Index _cols, Index _depth,
00111 const Scalar* _lhs, Index lhsStride,
00112 const Scalar* _rhs, Index rhsStride,
00113 Scalar* res, Index resStride,
00114 Scalar alpha)
00115 {
00116
00117 Index diagSize = (std::min)(_rows,_depth);
00118 Index rows = IsLower ? _rows : diagSize;
00119 Index depth = IsLower ? diagSize : _depth;
00120 Index cols = _cols;
00121
00122 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00123 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00124
00125 Index kc = depth;
00126 Index mc = rows;
00127 Index nc = cols;
00128 computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
00129 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00130 std::size_t sizeB = sizeW + kc*cols;
00131 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00132 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00133 Scalar* blockB = allocatedBlockB + sizeW;
00134
00135 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
00136 triangularBuffer.setZero();
00137 if((Mode&ZeroDiag)==ZeroDiag)
00138 triangularBuffer.diagonal().setZero();
00139 else
00140 triangularBuffer.diagonal().setOnes();
00141
00142 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00143 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00144 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00145
00146 for(Index k2=IsLower ? depth : 0;
00147 IsLower ? k2>0 : k2<depth;
00148 IsLower ? k2-=kc : k2+=kc)
00149 {
00150 Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
00151 Index actual_k2 = IsLower ? k2-actual_kc : k2;
00152
00153
00154 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
00155 {
00156 actual_kc = rows-k2;
00157 k2 = k2+actual_kc-kc;
00158 }
00159
00160 pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
00161
00162
00163
00164
00165
00166
00167
00168 if(IsLower || actual_k2<rows)
00169 {
00170
00171 for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
00172 {
00173 Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
00174 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
00175 Index startBlock = actual_k2+k1;
00176 Index blockBOffset = k1;
00177
00178
00179
00180
00181 for (Index k=0;k<actualPanelWidth;++k)
00182 {
00183 if (SetDiag)
00184 triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
00185 for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
00186 triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
00187 }
00188 pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
00189
00190 gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
00191 actualPanelWidth, actual_kc, 0, blockBOffset);
00192
00193
00194 if (lengthTarget>0)
00195 {
00196 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
00197
00198 pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
00199
00200 gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
00201 actualPanelWidth, actual_kc, 0, blockBOffset);
00202 }
00203 }
00204 }
00205
00206 {
00207 Index start = IsLower ? k2 : 0;
00208 Index end = IsLower ? rows : (std::min)(actual_k2,rows);
00209 for(Index i2=start; i2<end; i2+=mc)
00210 {
00211 const Index actual_mc = (std::min)(i2+mc,end)-i2;
00212 gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
00213 (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00214
00215 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00216 }
00217 }
00218 }
00219 }
00220 };
00221
00222
00223 template <typename Scalar, typename Index, int Mode,
00224 int LhsStorageOrder, bool ConjugateLhs,
00225 int RhsStorageOrder, bool ConjugateRhs, int Version>
00226 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
00227 LhsStorageOrder,ConjugateLhs,
00228 RhsStorageOrder,ConjugateRhs,ColMajor,Version>
00229 {
00230 typedef gebp_traits<Scalar,Scalar> Traits;
00231 enum {
00232 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
00233 IsLower = (Mode&Lower) == Lower,
00234 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
00235 };
00236
00237 static EIGEN_DONT_INLINE void run(
00238 Index _rows, Index _cols, Index _depth,
00239 const Scalar* _lhs, Index lhsStride,
00240 const Scalar* _rhs, Index rhsStride,
00241 Scalar* res, Index resStride,
00242 Scalar alpha)
00243 {
00244
00245 Index diagSize = (std::min)(_cols,_depth);
00246 Index rows = _rows;
00247 Index depth = IsLower ? _depth : diagSize;
00248 Index cols = IsLower ? diagSize : _cols;
00249
00250 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00251 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00252
00253 Index kc = depth;
00254 Index mc = rows;
00255 Index nc = cols;
00256 computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
00257
00258 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00259 std::size_t sizeB = sizeW + kc*cols;
00260 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00261 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00262 Scalar* blockB = allocatedBlockB + sizeW;
00263
00264 Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
00265 triangularBuffer.setZero();
00266 if((Mode&ZeroDiag)==ZeroDiag)
00267 triangularBuffer.diagonal().setZero();
00268 else
00269 triangularBuffer.diagonal().setOnes();
00270
00271 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00272 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00273 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00274 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
00275
00276 for(Index k2=IsLower ? 0 : depth;
00277 IsLower ? k2<depth : k2>0;
00278 IsLower ? k2+=kc : k2-=kc)
00279 {
00280 Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
00281 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
00282
00283
00284 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
00285 {
00286 actual_kc = cols-k2;
00287 k2 = actual_k2 + actual_kc - kc;
00288 }
00289
00290
00291 Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
00292
00293 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
00294
00295 Scalar* geb = blockB+ts*ts;
00296
00297 pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
00298
00299
00300 if(ts>0)
00301 {
00302 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00303 {
00304 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00305 Index actual_j2 = actual_k2 + j2;
00306 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
00307 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
00308
00309 pack_rhs_panel(blockB+j2*actual_kc,
00310 &rhs(actual_k2+panelOffset, actual_j2), rhsStride,
00311 panelLength, actualPanelWidth,
00312 actual_kc, panelOffset);
00313
00314
00315 for (Index j=0;j<actualPanelWidth;++j)
00316 {
00317 if (SetDiag)
00318 triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
00319 for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
00320 triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
00321 }
00322
00323 pack_rhs_panel(blockB+j2*actual_kc,
00324 triangularBuffer.data(), triangularBuffer.outerStride(),
00325 actualPanelWidth, actualPanelWidth,
00326 actual_kc, j2);
00327 }
00328 }
00329
00330 for (Index i2=0; i2<rows; i2+=mc)
00331 {
00332 const Index actual_mc = (std::min)(mc,rows-i2);
00333 pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
00334
00335
00336 if(ts>0)
00337 {
00338 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
00339 {
00340 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
00341 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
00342 Index blockOffset = IsLower ? j2 : 0;
00343
00344 gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
00345 blockA, blockB+j2*actual_kc,
00346 actual_mc, panelLength, actualPanelWidth,
00347 alpha,
00348 actual_kc, actual_kc,
00349 blockOffset, blockOffset,
00350 allocatedBlockB);
00351 }
00352 }
00353 gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
00354 blockA, geb, actual_mc, actual_kc, rs,
00355 alpha,
00356 -1, -1, 0, 0, allocatedBlockB);
00357 }
00358 }
00359 }
00360 };
00361
00362
00363
00364
00365
00366 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00367 struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
00368 : traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
00369 {};
00370
00371 }
00372
00373 template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
00374 struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
00375 : public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
00376 {
00377 EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
00378
00379 TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00380
00381 template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
00382 {
00383 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00384 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00385
00386 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00387 * RhsBlasTraits::extractScalarFactor(m_rhs);
00388
00389 internal::product_triangular_matrix_matrix<Scalar, Index,
00390 Mode, LhsIsTriangular,
00391 (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
00392 (internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
00393 (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
00394 ::run(
00395 lhs.rows(), rhs.cols(), lhs.cols(),
00396 &lhs.coeffRef(0,0), lhs.outerStride(),
00397 &rhs.coeffRef(0,0), rhs.outerStride(),
00398 &dst.coeffRef(0,0), dst.outerStride(),
00399 actualAlpha
00400 );
00401 }
00402 };
00403
00404 }
00405
00406 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H