HMSBEAGLE  1.0.0
libhmsbeagle/GPU/KernelLauncher.h
00001 /*
00002  *
00003  * Copyright 2009 Phylogenetic Likelihood Working Group
00004  *
00005  * This file is part of BEAGLE.
00006  *
00007  * BEAGLE is free software: you can redistribute it and/or modify
00008  * it under the terms of the GNU Lesser General Public License as
00009  * published by the Free Software Foundation, either version 3 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * BEAGLE is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with BEAGLE.  If not, see
00019  * <http://www.gnu.org/licenses/>.
00020  *
00021  * @brief GPU kernel launcher
00022  *
00023  * @author Marc Suchard
00024  * @author Daniel Ayres
00025  */
00026 
00027 #ifndef __KernelLauncher__
00028 #define __KernelLauncher__
00029 
00030 #ifdef HAVE_CONFIG_H
00031 #include "libhmsbeagle/config.h"
00032 #endif
00033 
00034 #include "libhmsbeagle/GPU/GPUImplDefs.h"
00035 #include "libhmsbeagle/GPU/GPUInterface.h"
00036 
00037 class KernelLauncher {
00038 private:
00039     GPUInterface* gpu;
00040     
00041     GPUFunction fMatrixMulADB;
00042     GPUFunction fMatrixMulADBFirstDeriv;
00043     GPUFunction fMatrixMulADBSecondDeriv;
00044 
00045     GPUFunction fPartialsPartialsByPatternBlockCoherent;
00046     GPUFunction fPartialsPartialsByPatternBlockAutoScaling;
00047     GPUFunction fPartialsPartialsByPatternBlockFixedScaling;
00048     GPUFunction fPartialsPartialsByPatternBlockCheckScaling;
00049     GPUFunction fPartialsPartialsByPatternBlockFixedCheckScaling;
00050     GPUFunction fStatesPartialsByPatternBlockCoherent;
00051     GPUFunction fStatesPartialsByPatternBlockFixedScaling;
00052     GPUFunction fStatesStatesByPatternBlockCoherent;
00053     GPUFunction fStatesStatesByPatternBlockFixedScaling;
00054     GPUFunction fPartialsPartialsEdgeLikelihoods;
00055     GPUFunction fPartialsPartialsEdgeLikelihoodsSecondDeriv;
00056     GPUFunction fStatesPartialsEdgeLikelihoods;
00057     GPUFunction fStatesPartialsEdgeLikelihoodsSecondDeriv;
00058         
00059     GPUFunction fIntegrateLikelihoodsDynamicScaling;
00060     GPUFunction fIntegrateLikelihoodsDynamicScalingSecondDeriv;
00061     GPUFunction fAccumulateFactorsDynamicScaling;
00062     GPUFunction fAccumulateFactorsAutoScaling;
00063     GPUFunction fRemoveFactorsDynamicScaling;
00064     GPUFunction fPartialsDynamicScaling;
00065     GPUFunction fPartialsDynamicScalingAccumulate;
00066     GPUFunction fPartialsDynamicScalingAccumulateDifference;
00067     GPUFunction fPartialsDynamicScalingAccumulateReciprocal;
00068     GPUFunction fPartialsDynamicScalingSlow;
00069     GPUFunction fIntegrateLikelihoods;
00070     GPUFunction fIntegrateLikelihoodsSecondDeriv;
00071         GPUFunction fIntegrateLikelihoodsMulti;
00072         GPUFunction fIntegrateLikelihoodsFixedScaleMulti;
00073     GPUFunction fIntegrateLikelihoodsAutoScaling;
00074 
00075     GPUFunction fSumSites1;
00076     GPUFunction fSumSites2;
00077     GPUFunction fSumSites3;
00078     
00079     Dim3Int bgTransitionProbabilitiesBlock;
00080     Dim3Int bgTransitionProbabilitiesGrid;
00081     Dim3Int bgPeelingBlock;
00082     Dim3Int bgPeelingGrid;
00083     Dim3Int bgLikelihoodBlock;
00084     Dim3Int bgLikelihoodGrid;
00085     Dim3Int bgAccumulateBlock;
00086     Dim3Int bgAccumulateGrid;
00087     Dim3Int bgScaleBlock;
00088     Dim3Int bgScaleGrid;
00089     Dim3Int bgSumSitesBlock;
00090     Dim3Int bgSumSitesGrid;
00091     
00092     unsigned int kPaddedStateCount;
00093     unsigned int kCategoryCount;
00094     unsigned int kPatternCount;
00095     unsigned int kPatternBlockSize;
00096     unsigned int kMatrixBlockSize;
00097     unsigned int kSlowReweighing;  
00098     unsigned int kMultiplyBlockSize;
00099     unsigned int kSumSitesBlockSize;
00100     long kFlags;
00101     
00102 public:
00103     KernelLauncher(GPUInterface* inGpu);
00104     
00105     ~KernelLauncher();
00106     
00107 // Kernel links
00108 #ifdef CUDA
00109     void GetTransitionProbabilitiesSquare(GPUPtr dMatrices,
00110                                           GPUPtr dPtrQueue,
00111                                           GPUPtr dEvec,
00112                                           GPUPtr dIevc,
00113                                           GPUPtr dEigenValues,
00114                                           GPUPtr distanceQueue,
00115                                           unsigned int totalMatrix);
00116 
00117     void GetTransitionProbabilitiesSquareFirstDeriv(GPUPtr dMatrices,
00118                                                     GPUPtr dPtrQueue,
00119                                                      GPUPtr dEvec,
00120                                                      GPUPtr dIevc,
00121                                                      GPUPtr dEigenValues,
00122                                                      GPUPtr distanceQueue,
00123                                                      unsigned int totalMatrix);    
00124     
00125     void GetTransitionProbabilitiesSquareSecondDeriv(GPUPtr dMatrices,
00126                                                      GPUPtr dPtrQueue,
00127                                           GPUPtr dEvec,
00128                                           GPUPtr dIevc,
00129                                           GPUPtr dEigenValues,
00130                                           GPUPtr distanceQueue,
00131                                           unsigned int totalMatrix);
00132 
00133 #else //OpenCL
00134     void GetTransitionProbabilitiesSquare(GPUPtr dPtr,
00135                                           GPUPtr dEvec,
00136                                           GPUPtr dIevc,
00137                                           GPUPtr dEigenValues,
00138                                           GPUPtr distanceQueue,
00139                                           unsigned int totalMatrix,
00140                                           unsigned int index);    
00141 #endif
00142     
00143     void PartialsPartialsPruningDynamicCheckScaling(GPUPtr partials1,
00144                                                     GPUPtr partials2,
00145                                                     GPUPtr partials3,
00146                                                     GPUPtr matrices1,
00147                                                     GPUPtr matrices2,
00148                                                     int writeScalingIndex,
00149                                                     int readScalingIndex,
00150                                                     int cumulativeScalingIndex,
00151                                                     GPUPtr* dScalingFactors,
00152                                                     GPUPtr* dScalingFactorsMaster,
00153                                                     unsigned int patternCount,
00154                                                     unsigned int categoryCount,
00155                                                     int doRescaling,
00156                                                     int* hRescalingTrigger,
00157                                                     GPUPtr dRescalingTrigger,
00158                                                     int sizeReal);
00159     
00160     void PartialsPartialsPruningDynamicScaling(GPUPtr partials1,
00161                                                GPUPtr partials2,
00162                                                GPUPtr partials3,
00163                                                GPUPtr matrices1,
00164                                                GPUPtr matrices2,
00165                                                GPUPtr scalingFactors,
00166                                                GPUPtr cumulativeScaling,
00167                                                unsigned int patternCount,
00168                                                unsigned int categoryCount,
00169                                                int doRescaling);
00170     
00171     void StatesPartialsPruningDynamicScaling(GPUPtr states1,
00172                                              GPUPtr partials2,
00173                                              GPUPtr partials3,
00174                                              GPUPtr matrices1,
00175                                              GPUPtr matrices2,
00176                                              GPUPtr scalingFactors,
00177                                              GPUPtr cumulativeScaling,
00178                                              unsigned int patternCount,
00179                                              unsigned int categoryCount,
00180                                              int doRescaling);
00181     
00182     void StatesStatesPruningDynamicScaling(GPUPtr states1,
00183                                            GPUPtr states2,
00184                                            GPUPtr partials3,
00185                                            GPUPtr matrices1,
00186                                            GPUPtr matrices2,
00187                                            GPUPtr scalingFactors,
00188                                            GPUPtr cumulativeScaling,
00189                                            unsigned int patternCount,
00190                                            unsigned int categoryCount,
00191                                            int doRescaling);
00192     
00193     void IntegrateLikelihoodsDynamicScaling(GPUPtr dResult,
00194                                             GPUPtr dRootPartials,
00195                                             GPUPtr dWeights,
00196                                             GPUPtr dFrequencies,
00197                                             GPUPtr dRootScalingFactors,
00198                                             unsigned int patternCount,
00199                                             unsigned int categoryCount);
00200     
00201     void IntegrateLikelihoodsAutoScaling(GPUPtr dResult,
00202                                             GPUPtr dRootPartials,
00203                                             GPUPtr dWeights,
00204                                             GPUPtr dFrequencies,
00205                                             GPUPtr dRootScalingFactors,
00206                                             unsigned int patternCount,
00207                                             unsigned int categoryCount);
00208     
00209     void IntegrateLikelihoodsDynamicScalingSecondDeriv(GPUPtr dResult,
00210                                                        GPUPtr dFirstDerivResult,
00211                                                        GPUPtr dSecondDerivResult,
00212                                                        GPUPtr dRootPartials,
00213                                                        GPUPtr dRootFirstDeriv,
00214                                                        GPUPtr dRootSecondDeriv,
00215                                                        GPUPtr dWeights,
00216                                                        GPUPtr dFrequencies,
00217                                                        GPUPtr dRootScalingFactors,
00218                                                        unsigned int patternCount,
00219                                                        unsigned int categoryCount);
00220     
00221     void PartialsPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
00222                                          GPUPtr dParentPartials,
00223                                          GPUPtr dChildParials,
00224                                          GPUPtr dTransMatrix,
00225                                          unsigned int patternCount,
00226                                          unsigned int categoryCount);
00227     
00228     void PartialsPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
00229                                                     GPUPtr dFirstDerivTmp,
00230                                                     GPUPtr dSecondDerivTmp,
00231                                                     GPUPtr dParentPartials,
00232                                                     GPUPtr dChildParials,
00233                                                     GPUPtr dTransMatrix,
00234                                                     GPUPtr dFirstDerivMatrix,
00235                                                     GPUPtr dSecondDerivMatrix,
00236                                                     unsigned int patternCount,
00237                                                     unsigned int categoryCount);
00238     
00239     
00240     void StatesPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
00241                                        GPUPtr dParentPartials,
00242                                        GPUPtr dChildStates,
00243                                        GPUPtr dTransMatrix,
00244                                        unsigned int patternCount,
00245                                        unsigned int categoryCount);
00246     
00247     void StatesPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
00248                                                   GPUPtr dFirstDerivTmp,
00249                                                   GPUPtr dSecondDerivTmp,
00250                                                   GPUPtr dParentPartials,
00251                                                   GPUPtr dChildStates,
00252                                                   GPUPtr dTransMatrix,
00253                                                   GPUPtr dFirstDerivMatrix,
00254                                                   GPUPtr dSecondDerivMatrix,
00255                                                   unsigned int patternCount,
00256                                                   unsigned int categoryCount);
00257     
00258     void AccumulateFactorsDynamicScaling(GPUPtr dScalingFactors,
00259                                          GPUPtr dNodePtrQueue,
00260                                          GPUPtr dRootScalingFactors,
00261                                          unsigned int nodeCount,
00262                                          unsigned int patternCount);
00263 
00264     void AccumulateFactorsAutoScaling(GPUPtr dScalingFactors,
00265                                       GPUPtr dNodePtrQueue,
00266                                       GPUPtr dRootScalingFactors,
00267                                       unsigned int nodeCount,
00268                                       unsigned int patternCount,
00269                                       unsigned int scaleBufferSize);
00270     
00271     void RemoveFactorsDynamicScaling(GPUPtr dScalingFactors,
00272                                      GPUPtr dNodePtrQueue,
00273                                      GPUPtr dRootScalingFactors,
00274                                      unsigned int nodeCount,
00275                                      unsigned int patternCount);    
00276     
00277     void RescalePartials(GPUPtr partials3,
00278                          GPUPtr scalingFactors,
00279                          GPUPtr cumulativeScaling,
00280                          unsigned int patternCount,
00281                          unsigned int categoryCount,
00282                          unsigned int fillWithOnes);
00283 
00284     void IntegrateLikelihoods(GPUPtr dResult,
00285                               GPUPtr dRootPartials,
00286                               GPUPtr dWeights,
00287                               GPUPtr dFrequencies,
00288                               unsigned int patternCount,
00289                               unsigned int categoryCount);
00290     
00291     void IntegrateLikelihoodsSecondDeriv(GPUPtr dResult,
00292                                          GPUPtr dFirstDerivResult,
00293                                          GPUPtr dSecondDerivResult,
00294                                          GPUPtr dRootPartials,
00295                                          GPUPtr dRootFirstDeriv,
00296                                          GPUPtr dRootSecondDeriv,
00297                                          GPUPtr dWeights,
00298                                          GPUPtr dFrequencies,
00299                                          unsigned int patternCount,
00300                                          unsigned int categoryCount);
00301     
00302         void IntegrateLikelihoodsMulti(GPUPtr dResult,
00303                                                                    GPUPtr dRootPartials,
00304                                                                    GPUPtr dWeights,
00305                                                                    GPUPtr dFrequencies,
00306                                                                    unsigned int patternCount,
00307                                                                    unsigned int categoryCount,
00308                                                                    unsigned int takeLog);
00309         
00310         void IntegrateLikelihoodsFixedScaleMulti(GPUPtr dResult,
00311                                                                                          GPUPtr dRootPartials,
00312                                                                                          GPUPtr dWeights,
00313                                                                                          GPUPtr dFrequencies,
00314                                              GPUPtr dScalingFactors,
00315                                                                                          GPUPtr dPtrQueue,
00316                                                                                          GPUPtr dMaxScalingFactors,
00317                                                                                          GPUPtr dIndexMaxScalingFactors,
00318                                                                                          unsigned int patternCount,
00319                                                                                          unsigned int categoryCount,
00320                                                                                          unsigned int subsetCount,
00321                                                                                          unsigned int subsetIndex);
00322     
00323     void SumSites1(GPUPtr dArray1,
00324                   GPUPtr dSum1,
00325                   GPUPtr dPatternWeights,
00326                   unsigned int patternCount);
00327     
00328     void SumSites2(GPUPtr dArray1,
00329                   GPUPtr dSum1,
00330                   GPUPtr dArray2,
00331                   GPUPtr dSum2,
00332                   GPUPtr dPatternWeights,
00333                   unsigned int patternCount);
00334     
00335     void SumSites3(GPUPtr dArray1,
00336                   GPUPtr dSum1,
00337                   GPUPtr dArray2,
00338                   GPUPtr dSum2,
00339                   GPUPtr dArray3,
00340                   GPUPtr dSum3,
00341                   GPUPtr dPatternWeights,
00342                   unsigned int patternCount);
00343         
00344     void SetupKernelBlocksAndGrids();
00345     
00346 protected:
00347     void LoadKernels();
00348 
00349 };
00350 #endif // __KernelLauncher__