HMSBEAGLE  1.0.0
libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.h
00001 /*
00002  *  BeagleCPU4StateSSEImpl.h
00003  *  BEAGLE
00004  *
00005  * Copyright 2009 Phylogenetic Likelihood Working Group
00006  *
00007  * This file is part of BEAGLE.
00008  *
00009  * BEAGLE is free software: you can redistribute it and/or modify
00010  * it under the terms of the GNU Lesser General Public License as
00011  * published by the Free Software Foundation, either version 3 of
00012  * the License, or (at your option) any later version.
00013  *
00014  * BEAGLE is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with BEAGLE.  If not, see
00021  * <http://www.gnu.org/licenses/>.
00022  *
00023  * @author Marc Suchard
00024  */
00025 
00026 #ifndef __BeagleCPU4StateSSEImpl__
00027 #define __BeagleCPU4StateSSEImpl__
00028 
00029 #ifdef HAVE_CONFIG_H
00030 #include "libhmsbeagle/config.h"
00031 #endif
00032 
00033 #include "libhmsbeagle/CPU/BeagleCPU4StateImpl.h"
00034 
00035 #include <vector>
00036 
00037 #define RESTRICT __restrict             /* may need to define this instead to 'restrict' */
00038 
00039 #define T_PAD_4_SSE_DEFAULT 2 // Pad transition matrix with 2 rows for SSE
00040 #define P_PAD_4_SSE_DEFAULT 0 // Partials padding not needed for 4 states SSE
00041 
00042 #define BEAGLE_CPU_4_SSE_FLOAT       float, T_PAD, P_PAD
00043 #define BEAGLE_CPU_4_SSE_DOUBLE      double, T_PAD, P_PAD
00044 #define BEAGLE_CPU_4_SSE_TEMPLATE    template <int T_PAD, int P_PAD>
00045 
00046 namespace beagle {
00047 namespace cpu {
00048 
00049 BEAGLE_CPU_TEMPLATE
00050 class BeagleCPU4StateSSEImpl : public BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC> {};
00051     
00052 
00053 BEAGLE_CPU_4_SSE_TEMPLATE
00054 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_FLOAT> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_FLOAT> {
00055     
00056 protected:
00057     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kTipCount;
00058     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gPartials;
00059     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::integrationTmp;
00060     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gTransitionMatrices;
00061     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kPatternCount;
00062     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kPaddedPatternCount;
00063     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kExtraPatterns;
00064     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kStateCount;
00065     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gTipStates;
00066     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kCategoryCount;
00067     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gScaleBuffers;
00068     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gCategoryWeights;
00069     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gStateFrequencies;
00070     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::realtypeMin;
00071     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::outLogLikelihoodsTmp;
00072     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gPatternWeights;
00073     
00074 public:    
00075     virtual const char* getName();
00076     
00077         virtual const long getFlags();
00078     
00079 protected:
00080     virtual int getPaddedPatternsModulus();  
00081     
00082 private:
00083     
00084         virtual void calcStatesStates(float* destP,
00085                                   const int* states1,
00086                                   const float* matrices1,
00087                                   const int* states2,
00088                                   const float* matrices2);
00089     
00090     virtual void calcStatesPartials(float* destP,
00091                                     const int* states1,
00092                                     const float* __restrict matrices1,
00093                                     const float* __restrict partials2,
00094                                     const float* __restrict matrices2);
00095     
00096     virtual void calcStatesPartialsFixedScaling(float* destP,
00097                                                 const int* states1,
00098                                                 const float* __restrict matrices1,
00099                                                 const float* __restrict partials2,
00100                                                 const float* __restrict matrices2,
00101                                                 const float* __restrict scaleFactors);
00102     
00103     virtual void calcPartialsPartials(float* __restrict destP,
00104                                       const float* __restrict partials1,
00105                                       const float* __restrict matrices1,
00106                                       const float* __restrict partials2,
00107                                       const float* __restrict matrices2);
00108     
00109     virtual void calcPartialsPartialsFixedScaling(float* __restrict destP,
00110                                                   const float* __restrict child0Partials,
00111                                                   const float* __restrict child0TransMat,
00112                                                   const float* __restrict child1Partials,
00113                                                   const float* __restrict child1TransMat,
00114                                                   const float* __restrict scaleFactors);
00115     
00116     virtual void calcPartialsPartialsAutoScaling(float* __restrict destP,
00117                                                  const float* __restrict partials1,
00118                                                  const float* __restrict matrices1,
00119                                                  const float* __restrict partials2,
00120                                                  const float* __restrict matrices2,
00121                                                  int* activateScaling);
00122     
00123     virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
00124                                        const int childBufferIndex,
00125                                        const int probabilityIndex,
00126                                        const int categoryWeightsIndex,
00127                                        const int stateFrequenciesIndex,
00128                                        const int scalingFactorsIndex,
00129                                        double* outSumLogLikelihood);
00130     
00131 };
00132     
00133 
00134 BEAGLE_CPU_4_SSE_TEMPLATE
00135 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_DOUBLE> {
00136     
00137 protected:
00138     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kTipCount;
00139     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gPartials;
00140     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::integrationTmp;
00141     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gTransitionMatrices;
00142     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kPatternCount;
00143     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kPaddedPatternCount;
00144     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kExtraPatterns;
00145     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kStateCount;
00146     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gTipStates;
00147     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kCategoryCount;
00148     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gScaleBuffers;
00149     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gCategoryWeights;
00150     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gStateFrequencies;
00151     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::realtypeMin;
00152     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::outLogLikelihoodsTmp;
00153     using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gPatternWeights;
00154     
00155 public:
00156     virtual const char* getName();
00157     
00158         virtual const long getFlags();
00159     
00160 protected:
00161     virtual int getPaddedPatternsModulus();
00162     
00163 private:
00164     
00165     virtual void calcStatesStates(double* destP,
00166                                   const int* states1,
00167                                   const double* matrices1,
00168                                   const int* states2,
00169                                   const double* matrices2);
00170     
00171     virtual void calcStatesPartials(double* destP,
00172                                     const int* states1,
00173                                     const double* __restrict matrices1,
00174                                     const double* __restrict partials2,
00175                                     const double* __restrict matrices2);
00176     
00177     virtual void calcStatesPartialsFixedScaling(double* destP,
00178                                                 const int* states1,
00179                                                 const double* __restrict matrices1,
00180                                                 const double* __restrict partials2,
00181                                                 const double* __restrict matrices2,
00182                                                 const double* __restrict scaleFactors);
00183     
00184     virtual void calcPartialsPartials(double* __restrict destP,
00185                                       const double* __restrict partials1,
00186                                       const double* __restrict matrices1,
00187                                       const double* __restrict partials2,
00188                                       const double* __restrict matrices2);
00189     
00190     virtual void calcPartialsPartialsFixedScaling(double* __restrict destP,
00191                                                   const double* __restrict child0Partials,
00192                                                   const double* __restrict child0TransMat,
00193                                                   const double* __restrict child1Partials,
00194                                                   const double* __restrict child1TransMat,
00195                                                   const double* __restrict scaleFactors);
00196     
00197     virtual void calcPartialsPartialsAutoScaling(double* __restrict destP,
00198                                                  const double* __restrict partials1,
00199                                                  const double* __restrict matrices1,
00200                                                  const double* __restrict partials2,
00201                                                  const double* __restrict matrices2,
00202                                                  int* activateScaling);
00203     
00204     virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
00205                                        const int childBufferIndex,
00206                                        const int probabilityIndex,
00207                                        const int categoryWeightsIndex,
00208                                        const int stateFrequenciesIndex,
00209                                        const int scalingFactorsIndex,
00210                                        double* outSumLogLikelihood);
00211     
00212 };
00213     
00214     
00215 BEAGLE_CPU_FACTORY_TEMPLATE
00216 class BeagleCPU4StateSSEImplFactory : public BeagleImplFactory {
00217 public:
00218     virtual BeagleImpl* createImpl(int tipCount,
00219                                    int partialsBufferCount,
00220                                    int compactBufferCount,
00221                                    int stateCount,
00222                                    int patternCount,
00223                                    int eigenBufferCount,
00224                                    int matrixBufferCount,
00225                                    int categoryCount,
00226                                    int scaleBufferCount,
00227                                    int resourceNumber,
00228                                    long preferenceFlags,
00229                                    long requirementFlags,
00230                                    int* errorCode);
00231 
00232     virtual const char* getName();
00233     virtual const long getFlags();
00234 };
00235 
00236 }       // namespace cpu
00237 }       // namespace beagle
00238 
00239 // now include the file containing template function implementations
00240 #include "libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.hpp"
00241 
00242 
00243 #endif // __BeagleCPU4StateSSEImpl__