HMSBEAGLE  1.0.0
libhmsbeagle/GPU/BeagleGPUImpl.h
00001 /*
00002  * @file BeagleGPUImpl.h
00003  *
00004  * Copyright 2009 Phylogenetic Likelihood Working Group
00005  *
00006  * This file is part of BEAGLE.
00007  *
00008  * BEAGLE is free software: you can redistribute it and/or modify
00009  * it under the terms of the GNU Lesser General Public License as
00010  * published by the Free Software Foundation, either version 3 of
00011  * the License, or (at your option) any later version.
00012  *
00013  * BEAGLE is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with BEAGLE.  If not, see
00020  * <http://www.gnu.org/licenses/>.
00021  *
00022  * 
00023  * @brief GPU implementation header
00024  *
00025  * @author Marc Suchard
00026  * @author Andrew Rambaut
00027  * @author Daniel Ayres
00028  */
00029 
00030 #ifndef __BeagleGPUImpl__
00031 #define __BeagleGPUImpl__
00032 
00033 #ifdef HAVE_CONFIG_H
00034 #include "libhmsbeagle/config.h"
00035 #endif
00036 
00037 #include "libhmsbeagle/BeagleImpl.h"
00038 #include "libhmsbeagle/GPU/GPUImplDefs.h"
00039 #include "libhmsbeagle/GPU/GPUInterface.h"
00040 #include "libhmsbeagle/GPU/KernelLauncher.h"
00041 
00042 #define BEAGLE_GPU_GENERIC      Real
00043 #define BEAGLE_GPU_TEMPLATE     template <typename Real>
00044 
00045 namespace beagle {
00046 namespace gpu {
00047 
00048 BEAGLE_GPU_TEMPLATE
00049 class BeagleGPUImpl : public BeagleImpl {
00050 private:
00051     GPUInterface* gpu;
00052     KernelLauncher* kernels;
00053     
00054     int kInitialized;
00055     
00056     long kFlags;
00057     
00058     int kTipCount;
00059     int kPartialsBufferCount;
00060     int kCompactBufferCount;
00061     int kStateCount;
00062     int kPatternCount;
00063     int kEigenDecompCount;
00064     int kMatrixCount;
00065     int kCategoryCount;
00066  
00067     int kTipPartialsBufferCount;
00068     int kInternalPartialsBufferCount;
00069     int kBufferCount;
00070     int kScaleBufferCount;
00071     
00072     int kPaddedStateCount;
00073     int kPaddedPatternCount;    // total # of patterns with padding so that kPaddedPatternCount
00074                                 //   * kPaddedStateCount is a multiple of 16
00075     int kSumSitesBlockCount;
00076     
00077     int kPartialsSize;
00078     int kMatrixSize;
00079     int kEigenValuesSize;
00080     int kScaleBufferSize;
00081 
00082     int kLastCompactBufferIndex;
00083     int kLastTipPartialsBufferIndex;
00084     
00085     GPUPtr dIntegrationTmp;
00086     GPUPtr dOutFirstDeriv;
00087     GPUPtr dOutSecondDeriv;
00088     GPUPtr dPartialsTmp;
00089     GPUPtr dFirstDerivTmp;
00090     GPUPtr dSecondDerivTmp;
00091     
00092     GPUPtr dSumLogLikelihood;
00093     GPUPtr dSumFirstDeriv;
00094     GPUPtr dSumSecondDeriv;
00095     
00096     GPUPtr dPatternWeights;    
00097         
00098     GPUPtr dBranchLengths;
00099     
00100     GPUPtr dDistanceQueue;
00101     
00102     GPUPtr dPtrQueue;
00103         
00104     GPUPtr dMaxScalingFactors;
00105     GPUPtr dIndexMaxScalingFactors;
00106     
00107     GPUPtr dAccumulatedScalingFactors;
00108     
00109     GPUPtr* dEigenValues;
00110     GPUPtr* dEvec;
00111     GPUPtr* dIevc;
00112     
00113     GPUPtr* dWeights;
00114     GPUPtr* dFrequencies; 
00115 
00116     GPUPtr* dScalingFactors;
00117     
00118     GPUPtr* dStates;
00119     
00120     GPUPtr* dPartials;
00121     GPUPtr* dMatrices;
00122     
00123     GPUPtr* dCompactBuffers;
00124     GPUPtr* dTipPartialsBuffers;
00125     
00126     unsigned int* hPtrQueue;
00127     
00128     double* hCategoryRates; // Can keep in double-precision
00129 
00130     Real* hPatternWeightsCache;
00131         
00132     Real* hDistanceQueue;
00133     
00134     Real* hWeightsCache;
00135     Real* hFrequenciesCache;
00136     Real* hLogLikelihoodsCache;
00137     Real* hPartialsCache;
00138     int* hStatesCache;
00139     Real* hMatrixCache;
00140     
00141     int* hRescalingTrigger;
00142     GPUPtr dRescalingTrigger;
00143     
00144     GPUPtr* dScalingFactorsMaster;
00145     
00146 public:    
00147     BeagleGPUImpl();
00148     
00149     virtual ~BeagleGPUImpl();
00150     
00151     int createInstance(int tipCount,
00152                        int partialsBufferCount,
00153                        int compactBufferCount,
00154                        int stateCount,
00155                        int patternCount,
00156                        int eigenDecompositionCount,
00157                        int matrixCount,
00158                        int categoryCount,
00159                        int scaleBufferCount,
00160                        int resourceNumber,
00161                        long preferenceFlags,
00162                        long requirementFlags);
00163     
00164     int getInstanceDetails(BeagleInstanceDetails* retunInfo);
00165 
00166     int setTipStates(int tipIndex,
00167                      const int* inStates);
00168 
00169     int setTipPartials(int tipIndex,
00170                        const double* inPartials);
00171     
00172     int setPartials(int bufferIndex,
00173                     const double* inPartials);
00174     
00175     int getPartials(int bufferIndex,
00176                                     int scaleIndex,
00177                     double* outPartials);
00178         
00179     int setEigenDecomposition(int eigenIndex,
00180                               const double* inEigenVectors,
00181                               const double* inInverseEigenVectors,
00182                               const double* inEigenValues);
00183     
00184     int setStateFrequencies(int stateFrequenciesIndex,
00185                             const double* inStateFrequencies);    
00186     
00187     int setCategoryWeights(int categoryWeightsIndex,
00188                            const double* inCategoryWeights);
00189     
00190     int setPatternWeights(const double* inPatternWeights);
00191     
00192     
00193     int setCategoryRates(const double* inCategoryRates);
00194     
00195     int setTransitionMatrix(int matrixIndex,
00196                             const double* inMatrix,
00197                             double paddedValue);
00198     
00199     int setTransitionMatrices(const int* matrixIndices,
00200                               const double* inMatrices,
00201                               const double* paddedValues,
00202                               int count);    
00203     
00204     int getTransitionMatrix(int matrixIndex,
00205                             double* outMatrix);
00206 
00207     int updateTransitionMatrices(int eigenIndex,
00208                                  const int* probabilityIndices,
00209                                  const int* firstDerivativeIndices,
00210                                  const int* secondDerivativeIndices,
00211                                  const double* edgeLengths,
00212                                  int count);
00213     
00214     int updatePartials(const int* operations,
00215                        int operationCount,
00216                        int cumulativeScalingIndex);
00217     
00218     int waitForPartials(const int* destinationPartials,
00219                         int destinationPartialsCount);
00220     
00221     int accumulateScaleFactors(const int* scalingIndices,
00222                                int count,
00223                                int cumulativeScalingIndex);
00224     
00225     int removeScaleFactors(const int* scalingIndices,
00226                            int count,
00227                            int cumulativeScalingIndex);
00228     
00229     int resetScaleFactors(int cumulativeScalingIndex);
00230     
00231     int copyScaleFactors(int destScalingIndex,
00232                          int srcScalingIndex);
00233     
00234     int calculateRootLogLikelihoods(const int* bufferIndices,
00235                                     const int* categoryWeightsIndices,
00236                                     const int* stateFrequenciesIndices,
00237                                     const int* cumulativeScaleIndices,
00238                                     int count,
00239                                     double* outSumLogLikelihood);
00240     
00241     int calculateEdgeLogLikelihoods(const int* parentBufferIndices,
00242                                     const int* childBufferIndices,
00243                                     const int* probabilityIndices,
00244                                     const int* firstDerivativeIndices,
00245                                     const int* secondDerivativeIndices,
00246                                     const int* categoryWeightsIndices,
00247                                     const int* stateFrequenciesIndices,
00248                                     const int* cumulativeScaleIndices,
00249                                     int count,
00250                                     double* outSumLogLikelihood,
00251                                     double* outSumFirstDerivative,
00252                                     double* outSumSecondDerivative);
00253 
00254     int getSiteLogLikelihoods(double* outLogLikelihoods);
00255     
00256     int getSiteDerivatives(double* outFirstDerivatives,
00257                            double* outSecondDerivatives);
00258 
00259 private:
00260     char* getInstanceName();
00261 
00262 };
00263 
00264 BEAGLE_GPU_TEMPLATE
00265 class BeagleGPUImplFactory : public BeagleImplFactory {
00266 public:
00267     virtual BeagleImpl* createImpl(int tipCount,
00268                                    int partialsBufferCount,
00269                                    int compactBufferCount,
00270                                    int stateCount,
00271                                    int patternCount,
00272                                    int eigenBufferCount,
00273                                    int matrixBufferCount,
00274                                    int categoryCount,
00275                                    int scaleBufferCount,
00276                                    int resourceNumber,
00277                                    long preferenceFlags,
00278                                    long requirementFlags,
00279                                    int* errorCode);
00280 
00281     virtual const char* getName();
00282     virtual const long getFlags();
00283 };
00284 
00285 template <typename Real>
00286 void modifyFlagsForPrecision(long* flags, Real r);
00287 
00288 }       // namespace gpu
00289 }       // namespace beagle
00290 
00291 #include "libhmsbeagle/GPU/BeagleGPUImpl.hpp"
00292 
00293 #endif // __BeagleGPUImpl__