HMSBEAGLE
1.0.0
|
00001 /* 00002 * 00003 * Copyright 2009 Phylogenetic Likelihood Working Group 00004 * 00005 * This file is part of BEAGLE. 00006 * 00007 * BEAGLE is free software: you can redistribute it and/or modify 00008 * it under the terms of the GNU Lesser General Public License as 00009 * published by the Free Software Foundation, either version 3 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * BEAGLE is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with BEAGLE. If not, see 00019 * <http://www.gnu.org/licenses/>. 00020 * 00021 * @brief GPU kernel launcher 00022 * 00023 * @author Marc Suchard 00024 * @author Daniel Ayres 00025 */ 00026 00027 #ifndef __KernelLauncher__ 00028 #define __KernelLauncher__ 00029 00030 #ifdef HAVE_CONFIG_H 00031 #include "libhmsbeagle/config.h" 00032 #endif 00033 00034 #include "libhmsbeagle/GPU/GPUImplDefs.h" 00035 #include "libhmsbeagle/GPU/GPUInterface.h" 00036 00037 class KernelLauncher { 00038 private: 00039 GPUInterface* gpu; 00040 00041 GPUFunction fMatrixMulADB; 00042 GPUFunction fMatrixMulADBFirstDeriv; 00043 GPUFunction fMatrixMulADBSecondDeriv; 00044 00045 GPUFunction fPartialsPartialsByPatternBlockCoherent; 00046 GPUFunction fPartialsPartialsByPatternBlockAutoScaling; 00047 GPUFunction fPartialsPartialsByPatternBlockFixedScaling; 00048 GPUFunction fPartialsPartialsByPatternBlockCheckScaling; 00049 GPUFunction fPartialsPartialsByPatternBlockFixedCheckScaling; 00050 GPUFunction fStatesPartialsByPatternBlockCoherent; 00051 GPUFunction fStatesPartialsByPatternBlockFixedScaling; 00052 GPUFunction fStatesStatesByPatternBlockCoherent; 00053 GPUFunction fStatesStatesByPatternBlockFixedScaling; 00054 GPUFunction fPartialsPartialsEdgeLikelihoods; 00055 GPUFunction fPartialsPartialsEdgeLikelihoodsSecondDeriv; 00056 GPUFunction fStatesPartialsEdgeLikelihoods; 00057 GPUFunction fStatesPartialsEdgeLikelihoodsSecondDeriv; 00058 00059 GPUFunction fIntegrateLikelihoodsDynamicScaling; 00060 GPUFunction fIntegrateLikelihoodsDynamicScalingSecondDeriv; 00061 GPUFunction fAccumulateFactorsDynamicScaling; 00062 GPUFunction fAccumulateFactorsAutoScaling; 00063 GPUFunction fRemoveFactorsDynamicScaling; 00064 GPUFunction fPartialsDynamicScaling; 00065 GPUFunction fPartialsDynamicScalingAccumulate; 00066 GPUFunction fPartialsDynamicScalingAccumulateDifference; 00067 GPUFunction fPartialsDynamicScalingAccumulateReciprocal; 00068 GPUFunction fPartialsDynamicScalingSlow; 00069 GPUFunction fIntegrateLikelihoods; 00070 GPUFunction fIntegrateLikelihoodsSecondDeriv; 00071 GPUFunction fIntegrateLikelihoodsMulti; 00072 GPUFunction fIntegrateLikelihoodsFixedScaleMulti; 00073 GPUFunction fIntegrateLikelihoodsAutoScaling; 00074 00075 GPUFunction fSumSites1; 00076 GPUFunction fSumSites2; 00077 GPUFunction fSumSites3; 00078 00079 Dim3Int bgTransitionProbabilitiesBlock; 00080 Dim3Int bgTransitionProbabilitiesGrid; 00081 Dim3Int bgPeelingBlock; 00082 Dim3Int bgPeelingGrid; 00083 Dim3Int bgLikelihoodBlock; 00084 Dim3Int bgLikelihoodGrid; 00085 Dim3Int bgAccumulateBlock; 00086 Dim3Int bgAccumulateGrid; 00087 Dim3Int bgScaleBlock; 00088 Dim3Int bgScaleGrid; 00089 Dim3Int bgSumSitesBlock; 00090 Dim3Int bgSumSitesGrid; 00091 00092 unsigned int kPaddedStateCount; 00093 unsigned int kCategoryCount; 00094 unsigned int kPatternCount; 00095 unsigned int kPatternBlockSize; 00096 unsigned int kMatrixBlockSize; 00097 unsigned int kSlowReweighing; 00098 unsigned int kMultiplyBlockSize; 00099 unsigned int kSumSitesBlockSize; 00100 long kFlags; 00101 00102 public: 00103 KernelLauncher(GPUInterface* inGpu); 00104 00105 ~KernelLauncher(); 00106 00107 // Kernel links 00108 #ifdef CUDA 00109 void GetTransitionProbabilitiesSquare(GPUPtr dMatrices, 00110 GPUPtr dPtrQueue, 00111 GPUPtr dEvec, 00112 GPUPtr dIevc, 00113 GPUPtr dEigenValues, 00114 GPUPtr distanceQueue, 00115 unsigned int totalMatrix); 00116 00117 void GetTransitionProbabilitiesSquareFirstDeriv(GPUPtr dMatrices, 00118 GPUPtr dPtrQueue, 00119 GPUPtr dEvec, 00120 GPUPtr dIevc, 00121 GPUPtr dEigenValues, 00122 GPUPtr distanceQueue, 00123 unsigned int totalMatrix); 00124 00125 void GetTransitionProbabilitiesSquareSecondDeriv(GPUPtr dMatrices, 00126 GPUPtr dPtrQueue, 00127 GPUPtr dEvec, 00128 GPUPtr dIevc, 00129 GPUPtr dEigenValues, 00130 GPUPtr distanceQueue, 00131 unsigned int totalMatrix); 00132 00133 #else //OpenCL 00134 void GetTransitionProbabilitiesSquare(GPUPtr dPtr, 00135 GPUPtr dEvec, 00136 GPUPtr dIevc, 00137 GPUPtr dEigenValues, 00138 GPUPtr distanceQueue, 00139 unsigned int totalMatrix, 00140 unsigned int index); 00141 #endif 00142 00143 void PartialsPartialsPruningDynamicCheckScaling(GPUPtr partials1, 00144 GPUPtr partials2, 00145 GPUPtr partials3, 00146 GPUPtr matrices1, 00147 GPUPtr matrices2, 00148 int writeScalingIndex, 00149 int readScalingIndex, 00150 int cumulativeScalingIndex, 00151 GPUPtr* dScalingFactors, 00152 GPUPtr* dScalingFactorsMaster, 00153 unsigned int patternCount, 00154 unsigned int categoryCount, 00155 int doRescaling, 00156 int* hRescalingTrigger, 00157 GPUPtr dRescalingTrigger, 00158 int sizeReal); 00159 00160 void PartialsPartialsPruningDynamicScaling(GPUPtr partials1, 00161 GPUPtr partials2, 00162 GPUPtr partials3, 00163 GPUPtr matrices1, 00164 GPUPtr matrices2, 00165 GPUPtr scalingFactors, 00166 GPUPtr cumulativeScaling, 00167 unsigned int patternCount, 00168 unsigned int categoryCount, 00169 int doRescaling); 00170 00171 void StatesPartialsPruningDynamicScaling(GPUPtr states1, 00172 GPUPtr partials2, 00173 GPUPtr partials3, 00174 GPUPtr matrices1, 00175 GPUPtr matrices2, 00176 GPUPtr scalingFactors, 00177 GPUPtr cumulativeScaling, 00178 unsigned int patternCount, 00179 unsigned int categoryCount, 00180 int doRescaling); 00181 00182 void StatesStatesPruningDynamicScaling(GPUPtr states1, 00183 GPUPtr states2, 00184 GPUPtr partials3, 00185 GPUPtr matrices1, 00186 GPUPtr matrices2, 00187 GPUPtr scalingFactors, 00188 GPUPtr cumulativeScaling, 00189 unsigned int patternCount, 00190 unsigned int categoryCount, 00191 int doRescaling); 00192 00193 void IntegrateLikelihoodsDynamicScaling(GPUPtr dResult, 00194 GPUPtr dRootPartials, 00195 GPUPtr dWeights, 00196 GPUPtr dFrequencies, 00197 GPUPtr dRootScalingFactors, 00198 unsigned int patternCount, 00199 unsigned int categoryCount); 00200 00201 void IntegrateLikelihoodsAutoScaling(GPUPtr dResult, 00202 GPUPtr dRootPartials, 00203 GPUPtr dWeights, 00204 GPUPtr dFrequencies, 00205 GPUPtr dRootScalingFactors, 00206 unsigned int patternCount, 00207 unsigned int categoryCount); 00208 00209 void IntegrateLikelihoodsDynamicScalingSecondDeriv(GPUPtr dResult, 00210 GPUPtr dFirstDerivResult, 00211 GPUPtr dSecondDerivResult, 00212 GPUPtr dRootPartials, 00213 GPUPtr dRootFirstDeriv, 00214 GPUPtr dRootSecondDeriv, 00215 GPUPtr dWeights, 00216 GPUPtr dFrequencies, 00217 GPUPtr dRootScalingFactors, 00218 unsigned int patternCount, 00219 unsigned int categoryCount); 00220 00221 void PartialsPartialsEdgeLikelihoods(GPUPtr dPartialsTmp, 00222 GPUPtr dParentPartials, 00223 GPUPtr dChildParials, 00224 GPUPtr dTransMatrix, 00225 unsigned int patternCount, 00226 unsigned int categoryCount); 00227 00228 void PartialsPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp, 00229 GPUPtr dFirstDerivTmp, 00230 GPUPtr dSecondDerivTmp, 00231 GPUPtr dParentPartials, 00232 GPUPtr dChildParials, 00233 GPUPtr dTransMatrix, 00234 GPUPtr dFirstDerivMatrix, 00235 GPUPtr dSecondDerivMatrix, 00236 unsigned int patternCount, 00237 unsigned int categoryCount); 00238 00239 00240 void StatesPartialsEdgeLikelihoods(GPUPtr dPartialsTmp, 00241 GPUPtr dParentPartials, 00242 GPUPtr dChildStates, 00243 GPUPtr dTransMatrix, 00244 unsigned int patternCount, 00245 unsigned int categoryCount); 00246 00247 void StatesPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp, 00248 GPUPtr dFirstDerivTmp, 00249 GPUPtr dSecondDerivTmp, 00250 GPUPtr dParentPartials, 00251 GPUPtr dChildStates, 00252 GPUPtr dTransMatrix, 00253 GPUPtr dFirstDerivMatrix, 00254 GPUPtr dSecondDerivMatrix, 00255 unsigned int patternCount, 00256 unsigned int categoryCount); 00257 00258 void AccumulateFactorsDynamicScaling(GPUPtr dScalingFactors, 00259 GPUPtr dNodePtrQueue, 00260 GPUPtr dRootScalingFactors, 00261 unsigned int nodeCount, 00262 unsigned int patternCount); 00263 00264 void AccumulateFactorsAutoScaling(GPUPtr dScalingFactors, 00265 GPUPtr dNodePtrQueue, 00266 GPUPtr dRootScalingFactors, 00267 unsigned int nodeCount, 00268 unsigned int patternCount, 00269 unsigned int scaleBufferSize); 00270 00271 void RemoveFactorsDynamicScaling(GPUPtr dScalingFactors, 00272 GPUPtr dNodePtrQueue, 00273 GPUPtr dRootScalingFactors, 00274 unsigned int nodeCount, 00275 unsigned int patternCount); 00276 00277 void RescalePartials(GPUPtr partials3, 00278 GPUPtr scalingFactors, 00279 GPUPtr cumulativeScaling, 00280 unsigned int patternCount, 00281 unsigned int categoryCount, 00282 unsigned int fillWithOnes); 00283 00284 void IntegrateLikelihoods(GPUPtr dResult, 00285 GPUPtr dRootPartials, 00286 GPUPtr dWeights, 00287 GPUPtr dFrequencies, 00288 unsigned int patternCount, 00289 unsigned int categoryCount); 00290 00291 void IntegrateLikelihoodsSecondDeriv(GPUPtr dResult, 00292 GPUPtr dFirstDerivResult, 00293 GPUPtr dSecondDerivResult, 00294 GPUPtr dRootPartials, 00295 GPUPtr dRootFirstDeriv, 00296 GPUPtr dRootSecondDeriv, 00297 GPUPtr dWeights, 00298 GPUPtr dFrequencies, 00299 unsigned int patternCount, 00300 unsigned int categoryCount); 00301 00302 void IntegrateLikelihoodsMulti(GPUPtr dResult, 00303 GPUPtr dRootPartials, 00304 GPUPtr dWeights, 00305 GPUPtr dFrequencies, 00306 unsigned int patternCount, 00307 unsigned int categoryCount, 00308 unsigned int takeLog); 00309 00310 void IntegrateLikelihoodsFixedScaleMulti(GPUPtr dResult, 00311 GPUPtr dRootPartials, 00312 GPUPtr dWeights, 00313 GPUPtr dFrequencies, 00314 GPUPtr dScalingFactors, 00315 GPUPtr dPtrQueue, 00316 GPUPtr dMaxScalingFactors, 00317 GPUPtr dIndexMaxScalingFactors, 00318 unsigned int patternCount, 00319 unsigned int categoryCount, 00320 unsigned int subsetCount, 00321 unsigned int subsetIndex); 00322 00323 void SumSites1(GPUPtr dArray1, 00324 GPUPtr dSum1, 00325 GPUPtr dPatternWeights, 00326 unsigned int patternCount); 00327 00328 void SumSites2(GPUPtr dArray1, 00329 GPUPtr dSum1, 00330 GPUPtr dArray2, 00331 GPUPtr dSum2, 00332 GPUPtr dPatternWeights, 00333 unsigned int patternCount); 00334 00335 void SumSites3(GPUPtr dArray1, 00336 GPUPtr dSum1, 00337 GPUPtr dArray2, 00338 GPUPtr dSum2, 00339 GPUPtr dArray3, 00340 GPUPtr dSum3, 00341 GPUPtr dPatternWeights, 00342 unsigned int patternCount); 00343 00344 void SetupKernelBlocksAndGrids(); 00345 00346 protected: 00347 void LoadKernels(); 00348 00349 }; 00350 #endif // __KernelLauncher__