HMSBEAGLE  1.0.0
libhmsbeagle/CPU/BeagleCPUImpl.h
00001 /*
00002  *  BeagleCPUImpl.h
00003  *  BEAGLE
00004  *
00005  * Copyright 2009 Phylogenetic Likelihood Working Group
00006  *
00007  * This file is part of BEAGLE.
00008  *
00009  * BEAGLE is free software: you can redistribute it and/or modify
00010  * it under the terms of the GNU Lesser General Public License as
00011  * published by the Free Software Foundation, either version 3 of
00012  * the License, or (at your option) any later version.
00013  *
00014  * BEAGLE is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with BEAGLE.  If not, see
00021  * <http://www.gnu.org/licenses/>.
00022  *
00023  * @author Andrew Rambaut
00024  * @author Marc Suchard
00025  * @author Daniel Ayres
00026  */
00027 
00028 #ifndef __BeagleCPUImpl__
00029 #define __BeagleCPUImpl__
00030 
00031 #ifdef HAVE_CONFIG_H
00032 #include "libhmsbeagle/config.h"
00033 #endif
00034 
00035 #include "libhmsbeagle/BeagleImpl.h"
00036 #include "libhmsbeagle/CPU/Precision.h"
00037 #include "libhmsbeagle/CPU/EigenDecomposition.h"
00038 
00039 #include <vector>
00040 
00041 #define BEAGLE_CPU_GENERIC      REALTYPE, T_PAD, P_PAD
00042 #define BEAGLE_CPU_TEMPLATE     template <typename REALTYPE, int T_PAD, int P_PAD>
00043 
00044 #define BEAGLE_CPU_FACTORY_GENERIC      REALTYPE
00045 #define BEAGLE_CPU_FACTORY_TEMPLATE     template <typename REALTYPE>
00046 
00047 
00048 #define T_PAD_DEFAULT   1   // Pad transition matrix rows with an extra 1.0 for ambiguous characters
00049 #define P_PAD_DEFAULT   0   // No partials padding necessary for non-SSE implementations
00050 
00051 
00052 namespace beagle {
00053 namespace cpu {
00054 
00055 BEAGLE_CPU_TEMPLATE
00056 class BeagleCPUImpl : public BeagleImpl {
00057 
00058 protected:
00059     int kBufferCount; 
00060 
00061     int kTipCount; 
00062 
00063     int kPatternCount; 
00064     int kPaddedPatternCount; 
00065     int kExtraPatterns; 
00066     int kMatrixCount; 
00067     int kStateCount; 
00068     int kTransPaddedStateCount;
00069     int kPartialsPaddedStateCount;
00070     int kEigenDecompCount; 
00071     int kCategoryCount;
00072     int kScaleBufferCount;
00073 
00074     int kPartialsSize;  
00075     int kMatrixSize; 
00076     
00077     int kInternalPartialsBufferCount; 
00078 
00079     long kFlags;
00080     
00081     REALTYPE realtypeMin;
00082     int scalingExponentThreshhold;
00083 
00084     EigenDecomposition<BEAGLE_CPU_EIGEN_GENERIC>* gEigenDecomposition;
00085 
00086     double* gCategoryRates; // Kept in double-precision until multiplication by edgelength
00087     double* gPatternWeights;
00088     
00089     REALTYPE** gCategoryWeights;
00090     REALTYPE** gStateFrequencies;
00091     
00092     //@ the size of these pointers are known at alloc-time, so the partials and
00093     //      tipStates field should be switched to vectors of vectors (to make
00094     //      memory management less error prone
00095     REALTYPE** gPartials;
00096     int** gTipStates;
00097     REALTYPE** gScaleBuffers;
00098     
00099     signed short** gAutoScaleBuffers;
00100     
00101     int* gActiveScalingFactors;
00102 
00103     // There will be kMatrixCount transitionMatrices.
00104     // Each kStateCount x (kStateCount+1) matrix that is flattened
00105     //  into a single array
00106     REALTYPE** gTransitionMatrices;
00107 
00108     REALTYPE* integrationTmp;
00109     REALTYPE* firstDerivTmp;
00110     REALTYPE* secondDerivTmp;
00111     
00112     REALTYPE* outLogLikelihoodsTmp;
00113     REALTYPE* outFirstDerivativesTmp;
00114     REALTYPE* outSecondDerivativesTmp;
00115 
00116     REALTYPE* ones;
00117     REALTYPE* zeros;
00118 
00119 public:
00120     virtual ~BeagleCPUImpl();
00121 
00122     // creation of instance
00123     int createInstance(int tipCount,
00124                        int partialsBufferCount,
00125                        int compactBufferCount,
00126                        int stateCount,
00127                        int patternCount,
00128                        int eigenDecompositionCount,
00129                        int matrixCount,
00130                        int categoryCount,
00131                        int scaleBufferCount,
00132                        int resourceNumber,
00133                        long preferenceFlags,
00134                        long requirementFlags);
00135 
00136     // initialization of instance,  returnInfo can be null
00137     int getInstanceDetails(BeagleInstanceDetails* returnInfo);
00138 
00139     // set the states for a given tip
00140     //
00141     // tipIndex the index of the tip
00142     // inStates the array of states: 0 to stateCount - 1, missing = stateCount
00143     int setTipStates(int tipIndex,
00144                      const int* inStates);
00145 
00146     // set the partials for a given tip
00147     //
00148     // tipIndex the index of the tip
00149     // inPartials the array of partials, stateCount x patternCount
00150     int setTipPartials(int tipIndex,
00151                        const double* inPartials);
00152 
00153 
00154     int setPartials(int bufferIndex,
00155                     const double* inPartials);
00156 
00157     int getPartials(int bufferIndex,
00158                                         int scaleBuffer,
00159                     double* outPartials);
00160 
00161     // sets the Eigen decomposition for a given matrix
00162     //
00163     // matrixIndex the matrix index to update
00164     // eigenVectors an array containing the Eigen Vectors
00165     // inverseEigenVectors an array containing the inverse Eigen Vectors
00166     // eigenValues an array containing the Eigen Values
00167     int setEigenDecomposition(int eigenIndex,
00168                               const double* inEigenVectors,
00169                               const double* inInverseEigenVectors,
00170                               const double* inEigenValues);
00171 
00172     int setStateFrequencies(int stateFrequenciesIndex,
00173                             const double* inStateFrequencies);    
00174     
00175     int setCategoryWeights(int categoryWeightsIndex,
00176                            const double* inCategoryWeights);
00177     
00178     int setPatternWeights(const double* inPatternWeights);    
00179     
00180     // set the vector of category rates
00181     //
00182     // categoryRates an array containing categoryCount rate scalers
00183     int setCategoryRates(const double* inCategoryRates);
00184 
00185     int setTransitionMatrix(int matrixIndex,
00186                             const double* inMatrix,
00187                             double paddedValue);
00188     
00189     int setTransitionMatrices(const int* matrixIndices,
00190                               const double* inMatrices,
00191                               const double* paddedValues,
00192                               int count);
00193 
00194     int getTransitionMatrix(int matrixIndex,
00195                                                 double* outMatrix);
00196 
00197     // calculate a transition probability matrices for a given list of node. This will
00198     // calculate for all categories (and all matrices if more than one is being used).
00199     //
00200     // nodeIndices an array of node indices that require transition probability matrices
00201     // edgeLengths an array of expected lengths in substitutions per site
00202     // count the number of elements in the above arrays
00203     int updateTransitionMatrices(int eigenIndex,
00204                                  const int* probabilityIndices,
00205                                  const int* firstDerivativeIndices,
00206                                  const int* secondDerivativeIndices,
00207                                  const double* edgeLengths,
00208                                  int count);
00209 
00210     // calculate or queue for calculation partials using an array of operations
00211     //
00212     // operations an array of triplets of indices: the two source partials and the destination
00213     // dependencies an array of indices specify which operations are dependent on which (optional)
00214     // count the number of operations
00215     // rescale indicate if partials should be rescaled during peeling
00216     int updatePartials(const int* operations,
00217                        int operationCount,
00218                        int cumulativeScalingIndex);
00219 
00220     // Block until all calculations that write to the specified partials have completed.
00221     //
00222     // This function is optional and only has to be called by clients that "recycle" partials.
00223     //
00224     // If used, this function must be called after an updatePartials call and must refer to
00225     //  indices of "destinationPartials" that were used in a previous updatePartials
00226     // call.  The library will block until those partials have been calculated.
00227     //
00228     // destinationPartials - List of the indices of destinationPartials that must be calculated
00229     //                         before the function returns
00230     // destinationPartialsCount - Number of destinationPartials (input)
00231     //
00232     // return error code
00233     int waitForPartials(const int* destinationPartials,
00234                         int destinationPartialsCount);
00235 
00236 
00237     int accumulateScaleFactors(const int* scalingIndices,
00238                                                           int count,
00239                                                           int cumulativeScalingIndex);
00240 
00241     int removeScaleFactors(const int* scalingIndices,
00242                            int count,
00243                            int cumulativeScalingIndex);
00244 
00245     int resetScaleFactors(int cumulativeScalingIndex);
00246 
00247     int copyScaleFactors(int destScalingIndex,
00248                          int srcScalingIndex);    
00249     
00250     // calculate the site log likelihoods at a particular node
00251     //
00252     // rootNodeIndex the index of the root
00253     // outLogLikelihoods an array into which the site log likelihoods will be put
00254     int calculateRootLogLikelihoods(const int* bufferIndices,
00255                                     const int* categoryWeightsIndices,
00256                                     const int* stateFrequenciesIndices,
00257                                     const int* cumulativeScaleIndices,
00258                                     int count,
00259                                     double* outSumLogLikelihood);
00260 
00261     // possible nulls: firstDerivativeIndices, secondDerivativeIndices,
00262     //                 outFirstDerivatives, outSecondDerivatives
00263     int calculateEdgeLogLikelihoods(const int* parentBufferIndices,
00264                                     const int* childBufferIndices,
00265                                     const int* probabilityIndices,
00266                                     const int* firstDerivativeIndices,
00267                                     const int* secondDerivativeIndices,
00268                                     const int* categoryWeightsIndices,
00269                                     const int* stateFrequenciesIndices,
00270                                     const int* cumulativeScaleIndices,
00271                                     int count,
00272                                     double* outSumLogLikelihood,
00273                                     double* outSumFirstDerivative,
00274                                     double* outSumSecondDerivative);
00275     
00276     int getSiteLogLikelihoods(double* outLogLikelihoods);
00277     
00278     int getSiteDerivatives(double* outFirstDerivatives,
00279                            double* outSecondDerivatives);
00280 
00281     int block(void);
00282 
00283         virtual const char* getName();
00284 
00285         virtual const long getFlags();
00286 
00287 protected:
00288     virtual void calcStatesStates(REALTYPE* destP,
00289                                     const int* states1,
00290                                     const REALTYPE* matrices1,
00291                                     const int* states2,
00292                                     const REALTYPE* matrices2);
00293 
00294 
00295     virtual void calcStatesPartials(REALTYPE* destP,
00296                                     const int* states1,
00297                                     const REALTYPE* matrices1,
00298                                     const REALTYPE* partials2,
00299                                     const REALTYPE* matrices2);
00300 
00301     virtual void calcPartialsPartials(REALTYPE* destP,
00302                                       const REALTYPE* partials1,
00303                                       const REALTYPE* matrices1,
00304                                       const REALTYPE* partials2,
00305                                       const REALTYPE* matrices2);
00306 
00307     virtual int calcRootLogLikelihoods(const int bufferIndex,
00308                                         const int categoryWeightsIndex,
00309                                         const int stateFrequenciesIndex,
00310                                         const int scaleBufferIndex,
00311                                         double* outSumLogLikelihood);
00312     
00313     virtual int calcRootLogLikelihoodsMulti(const int* bufferIndices,
00314                                              const int* categoryWeightsIndices,
00315                                              const int* stateFrequenciesIndices,
00316                                              const int* scaleBufferIndices,
00317                                              int count,
00318                                              double* outSumLogLikelihood);
00319     
00320     virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
00321                                         const int childBufferIndex,
00322                                         const int probabilityIndex,
00323                                         const int categoryWeightsIndex,
00324                                         const int stateFrequenciesIndex,
00325                                         const int scalingFactorsIndex,
00326                                         double* outSumLogLikelihood);
00327 
00328     virtual int calcEdgeLogLikelihoodsMulti(const int* parentBufferIndices,
00329                                             const int* childBufferIndices,
00330                                             const int* probabilityIndices,
00331                                             const int* categoryWeightsIndices,
00332                                             const int* stateFrequenciesIndices,
00333                                             const int* scalingFactorsIndices,
00334                                             int count,
00335                                             double* outSumLogLikelihood);
00336     
00337     virtual int calcEdgeLogLikelihoodsFirstDeriv(const int parentBufferIndex,
00338                                                   const int childBufferIndex,
00339                                                   const int probabilityIndex,
00340                                                   const int firstDerivativeIndex,
00341                                                   const int categoryWeightsIndex,
00342                                                   const int stateFrequenciesIndex,
00343                                                   const int scalingFactorsIndex,
00344                                                   double* outSumLogLikelihood,
00345                                                   double* outSumFirstDerivative);
00346         
00347     virtual int calcEdgeLogLikelihoodsSecondDeriv(const int parentBufferIndex,
00348                                                    const int childBufferIndex,
00349                                                    const int probabilityIndex,
00350                                                    const int firstDerivativeIndex,
00351                                                    const int secondDerivativeIndex,
00352                                                    const int categoryWeightsIndex,
00353                                                    const int stateFrequenciesIndex,
00354                                                    const int scalingFactorsIndex,
00355                                                    double* outSumLogLikelihood,
00356                                                    double* outSumFirstDerivative,
00357                                                    double* outSumSecondDerivative);
00358 
00359     virtual void calcStatesStatesFixedScaling(REALTYPE *destP,
00360                                               const int *child0States,
00361                                               const REALTYPE *child0TransMat,
00362                                               const int *child1States,
00363                                               const REALTYPE *child1TransMat,
00364                                               const REALTYPE *scaleFactors);
00365 
00366     virtual void calcStatesPartialsFixedScaling(REALTYPE *destP,
00367                                                 const int *child0States,
00368                                                 const REALTYPE *child0TransMat,
00369                                                 const REALTYPE *child1Partials,
00370                                                 const REALTYPE *child1TransMat,
00371                                                 const REALTYPE *scaleFactors);
00372 
00373     virtual void calcPartialsPartialsFixedScaling(REALTYPE *destP,
00374                                             const REALTYPE *child0States,
00375                                             const REALTYPE *child0TransMat,
00376                                             const REALTYPE *child1Partials,
00377                                             const REALTYPE *child1TransMat,
00378                                             const REALTYPE *scaleFactors);
00379     
00380     virtual void calcPartialsPartialsAutoScaling(REALTYPE* destP,
00381                                                   const REALTYPE* partials1,
00382                                                   const REALTYPE* matrices1,
00383                                                   const REALTYPE* partials2,
00384                                                   const REALTYPE* matrices2,
00385                                                   int* activateScaling);
00386 
00387     virtual void rescalePartials(REALTYPE *destP,
00388                                      REALTYPE *scaleFactors,
00389                                  REALTYPE *cumulativeScaleFactors,
00390                                  const int  fillWithOnes);
00391     
00392     virtual void autoRescalePartials(REALTYPE *destP,
00393                                      signed short *scaleFactors);
00394 
00395     virtual int getPaddedPatternsModulus();
00396 
00397     void* mallocAligned(size_t size);
00398 
00399 };
00400 
00401 BEAGLE_CPU_FACTORY_TEMPLATE
00402 class BeagleCPUImplFactory : public BeagleImplFactory {
00403 public:
00404     virtual BeagleImpl* createImpl(int tipCount,
00405                                    int partialsBufferCount,
00406                                    int compactBufferCount,
00407                                    int stateCount,
00408                                    int patternCount,
00409                                    int eigenBufferCount,
00410                                    int matrixBufferCount,
00411                                    int categoryCount,
00412                                    int scaleBufferCount,
00413                                    int resourceNumber,
00414                                    long preferenceFlags,
00415                                    long requirementFlags,
00416                                    int* errorCode);
00417 
00418     virtual const char* getName();
00419     virtual const long getFlags();
00420 };
00421 
00422 //typedef BeagleCPUImplGeneral<double> BeagleCPUImpl;
00423 
00424 }       // namespace cpu
00425 }       // namespace beagle
00426 
00427 // now that the interface is defined, include the implementation of template functions
00428 #include "libhmsbeagle/CPU/BeagleCPUImpl.hpp"
00429 
00430 #endif // __BeagleCPUImpl__