Parallelizer.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // Eigen is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 3 of the License, or (at your option) any later version.
00010 //
00011 // Alternatively, you can redistribute it and/or
00012 // modify it under the terms of the GNU General Public License as
00013 // published by the Free Software Foundation; either version 2 of
00014 // the License, or (at your option) any later version.
00015 //
00016 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00017 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00019 // GNU General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License and a copy of the GNU General Public License along with
00023 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00024 
00025 #ifndef EIGEN_PARALLELIZER_H
00026 #define EIGEN_PARALLELIZER_H
00027 
00028 namespace Eigen { 
00029 
00030 namespace internal {
00031 
00033 inline void manage_multi_threading(Action action, int* v)
00034 {
00035   static EIGEN_UNUSED int m_maxThreads = -1;
00036 
00037   if(action==SetAction)
00038   {
00039     eigen_internal_assert(v!=0);
00040     m_maxThreads = *v;
00041   }
00042   else if(action==GetAction)
00043   {
00044     eigen_internal_assert(v!=0);
00045     #ifdef EIGEN_HAS_OPENMP
00046     if(m_maxThreads>0)
00047       *v = m_maxThreads;
00048     else
00049       *v = omp_get_max_threads();
00050     #else
00051     *v = 1;
00052     #endif
00053   }
00054   else
00055   {
00056     eigen_internal_assert(false);
00057   }
00058 }
00059 
00062 inline int nbThreads()
00063 {
00064   int ret;
00065   manage_multi_threading(GetAction, &ret);
00066   return ret;
00067 }
00068 
00071 inline void setNbThreads(int v)
00072 {
00073   manage_multi_threading(SetAction, &v);
00074 }
00075 
00076 template<typename Index> struct GemmParallelInfo
00077 {
00078   GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
00079 
00080   int volatile sync;
00081   int volatile users;
00082 
00083   Index rhs_start;
00084   Index rhs_length;
00085 };
00086 
00087 template<bool Condition, typename Functor, typename Index>
00088 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
00089 {
00090   // TODO when EIGEN_USE_BLAS is defined,
00091   // we should still enable OMP for other scalar types
00092 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
00093   // FIXME the transpose variable is only needed to properly split
00094   // the matrix product when multithreading is enabled. This is a temporary
00095   // fix to support row-major destination matrices. This whole
00096   // parallelizer mechanism has to be redisigned anyway.
00097   EIGEN_UNUSED_VARIABLE(transpose);
00098   func(0,rows, 0,cols);
00099 #else
00100 
00101   // Dynamically check whether we should enable or disable OpenMP.
00102   // The conditions are:
00103   // - the max number of threads we can create is greater than 1
00104   // - we are not already in a parallel code
00105   // - the sizes are large enough
00106 
00107   // 1- are we already in a parallel session?
00108   // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
00109   if((!Condition) || (omp_get_num_threads()>1))
00110     return func(0,rows, 0,cols);
00111 
00112   Index size = transpose ? cols : rows;
00113 
00114   // 2- compute the maximal number of threads from the size of the product:
00115   // FIXME this has to be fine tuned
00116   Index max_threads = std::max<Index>(1,size / 32);
00117 
00118   // 3 - compute the number of threads we are going to use
00119   Index threads = std::min<Index>(nbThreads(), max_threads);
00120 
00121   if(threads==1)
00122     return func(0,rows, 0,cols);
00123 
00124   func.initParallelSession();
00125 
00126   if(transpose)
00127     std::swap(rows,cols);
00128 
00129   Index blockCols = (cols / threads) & ~Index(0x3);
00130   Index blockRows = (rows / threads) & ~Index(0x7);
00131   
00132   GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
00133 
00134   #pragma omp parallel for schedule(static,1) num_threads(threads)
00135   for(Index i=0; i<threads; ++i)
00136   {
00137     Index r0 = i*blockRows;
00138     Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
00139 
00140     Index c0 = i*blockCols;
00141     Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
00142 
00143     info[i].rhs_start = c0;
00144     info[i].rhs_length = actualBlockCols;
00145 
00146     if(transpose)
00147       func(0, cols, r0, actualBlockRows, info);
00148     else
00149       func(r0, actualBlockRows, 0,cols, info);
00150   }
00151 
00152   delete[] info;
00153 #endif
00154 }
00155 
00156 } // end namespace internal
00157 
00158 } // end namespace Eigen
00159 
00160 #endif // EIGEN_PARALLELIZER_H