Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_PARALLELIZER_H
00026 #define EIGEN_PARALLELIZER_H
00027
00028 namespace Eigen {
00029
00030 namespace internal {
00031
00033 inline void manage_multi_threading(Action action, int* v)
00034 {
00035 static EIGEN_UNUSED int m_maxThreads = -1;
00036
00037 if(action==SetAction)
00038 {
00039 eigen_internal_assert(v!=0);
00040 m_maxThreads = *v;
00041 }
00042 else if(action==GetAction)
00043 {
00044 eigen_internal_assert(v!=0);
00045 #ifdef EIGEN_HAS_OPENMP
00046 if(m_maxThreads>0)
00047 *v = m_maxThreads;
00048 else
00049 *v = omp_get_max_threads();
00050 #else
00051 *v = 1;
00052 #endif
00053 }
00054 else
00055 {
00056 eigen_internal_assert(false);
00057 }
00058 }
00059
00062 inline int nbThreads()
00063 {
00064 int ret;
00065 manage_multi_threading(GetAction, &ret);
00066 return ret;
00067 }
00068
00071 inline void setNbThreads(int v)
00072 {
00073 manage_multi_threading(SetAction, &v);
00074 }
00075
00076 template<typename Index> struct GemmParallelInfo
00077 {
00078 GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
00079
00080 int volatile sync;
00081 int volatile users;
00082
00083 Index rhs_start;
00084 Index rhs_length;
00085 };
00086
00087 template<bool Condition, typename Functor, typename Index>
00088 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
00089 {
00090
00091
00092 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
00093
00094
00095
00096
00097 EIGEN_UNUSED_VARIABLE(transpose);
00098 func(0,rows, 0,cols);
00099 #else
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109 if((!Condition) || (omp_get_num_threads()>1))
00110 return func(0,rows, 0,cols);
00111
00112 Index size = transpose ? cols : rows;
00113
00114
00115
00116 Index max_threads = std::max<Index>(1,size / 32);
00117
00118
00119 Index threads = std::min<Index>(nbThreads(), max_threads);
00120
00121 if(threads==1)
00122 return func(0,rows, 0,cols);
00123
00124 func.initParallelSession();
00125
00126 if(transpose)
00127 std::swap(rows,cols);
00128
00129 Index blockCols = (cols / threads) & ~Index(0x3);
00130 Index blockRows = (rows / threads) & ~Index(0x7);
00131
00132 GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
00133
00134 #pragma omp parallel for schedule(static,1) num_threads(threads)
00135 for(Index i=0; i<threads; ++i)
00136 {
00137 Index r0 = i*blockRows;
00138 Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
00139
00140 Index c0 = i*blockCols;
00141 Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
00142
00143 info[i].rhs_start = c0;
00144 info[i].rhs_length = actualBlockCols;
00145
00146 if(transpose)
00147 func(0, cols, r0, actualBlockRows, info);
00148 else
00149 func(r0, actualBlockRows, 0,cols, info);
00150 }
00151
00152 delete[] info;
00153 #endif
00154 }
00155
00156 }
00157
00158 }
00159
00160 #endif // EIGEN_PARALLELIZER_H