HMSBEAGLE  1.0.0
libhmsbeagle/CPU/BeagleCPUSSEImpl.h
00001 /*
00002  *  BeagleCPUSSEImpl.h
00003  *  BEAGLE
00004  *
00005  * Copyright 2010 Phylogenetic Likelihood Working Group
00006  *
00007  * This file is part of BEAGLE.
00008  *
00009  * BEAGLE is free software: you can redistribute it and/or modify
00010  * it under the terms of the GNU Lesser General Public License as
00011  * published by the Free Software Foundation, either version 3 of
00012  * the License, or (at your option) any later version.
00013  *
00014  * BEAGLE is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with BEAGLE.  If not, see
00021  * <http://www.gnu.org/licenses/>.
00022  *
00023  * @author Marc Suchard
00024  */
00025 
00026 #ifndef __BeagleCPUSSEImpl__
00027 #define __BeagleCPUSSEImpl__
00028 
00029 #ifdef HAVE_CONFIG_H
00030 #include "libhmsbeagle/config.h"
00031 #endif
00032 
00033 #include "libhmsbeagle/CPU/BeagleCPUImpl.h"
00034 
00035 #include <vector>
00036 
00037 #define RESTRICT __restrict             /* may need to define this instead to 'restrict' */
00038 
00039 
00040 // Pad transition matrix rows with an extra 1.0 for ambiguous characters
00041 #define T_PAD_SSE_EVEN  2   // for even state counts
00042 #define T_PAD_SSE_ODD   1   // for odd state counts
00043 
00044 // Partials padding
00045 #define P_PAD_SSE_EVEN  0   // for even state counts
00046 #define P_PAD_SSE_ODD   1   // for odd state counts
00047 
00048 
00049 #define BEAGLE_CPU_SSE_FLOAT    float, T_PAD, P_PAD
00050 #define BEAGLE_CPU_SSE_DOUBLE   double, T_PAD, P_PAD
00051 #define BEAGLE_CPU_SSE_TEMPLATE template <int T_PAD, int P_PAD>
00052 
00053 namespace beagle {
00054 namespace cpu {
00055 
00056 BEAGLE_CPU_TEMPLATE
00057 class BeagleCPUSSEImpl : public BeagleCPUImpl<BEAGLE_CPU_GENERIC> {
00058 //    void inline innerPartialsPartals(
00059 //              const double* __restrict partials1,
00060 //              const double* __restrict matrices1,
00061 //              const double* __restrict partials2,
00062 //              const double* __restrict matrices2,
00063 //              V_Real& sum1_vec,
00064 //              V_Real& sum2_vec,
00065 //              V_Real& out,
00066 //              int& v,
00067 //              int& w);
00068 
00069 };
00070 
00071 BEAGLE_CPU_SSE_TEMPLATE
00072 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_FLOAT> : public BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT> {
00073 
00074 protected:
00075         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kTipCount;
00076         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gPartials;
00077         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::integrationTmp;
00078         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gTransitionMatrices;
00079         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPatternCount;
00080         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPaddedPatternCount;
00081         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kExtraPatterns;
00082         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kStateCount;
00083         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gTipStates;
00084         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kCategoryCount;
00085         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gScaleBuffers;
00086         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gCategoryWeights;
00087         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gStateFrequencies;
00088         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::realtypeMin;
00089         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kMatrixSize;
00090         using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPartialsPaddedStateCount;
00091 
00092 public:
00093     virtual const char* getName();
00094     
00095     virtual const long getFlags();
00096 
00097 protected:
00098     virtual int getPaddedPatternsModulus();
00099 
00100 private:
00101         virtual void calcStatesStates(float* destP,
00102                                      const int* states1,
00103                                      const float* matrices1,
00104                                      const int* states2,
00105                                      const float* matrices2);
00106 
00107     virtual void calcStatesPartials(float* destP,
00108                                     const int* states1,
00109                                     const float* matrices1,
00110                                     const float* partials2,
00111                                     const float* matrices2);
00112 
00113     virtual void calcPartialsPartials(float* __restrict destP,
00114                                       const float* __restrict partials1,
00115                                       const float* __restrict matrices1,
00116                                       const float* __restrict partials2,
00117                                       const float* __restrict matrices2);
00118     
00119     virtual void calcPartialsPartialsFixedScaling(float* __restrict destP,
00120                                       const float* __restrict partials1,
00121                                       const float* __restrict matrices1,
00122                                       const float* __restrict partials2,
00123                                       const float* __restrict matrices2,
00124                                       const float* __restrict scaleFactors);
00125 
00126     virtual void calcPartialsPartialsAutoScaling(float* __restrict destP,
00127                                                  const float* __restrict partials1,
00128                                                  const float* __restrict matrices1,
00129                                                  const float* __restrict partials2,
00130                                                  const float* __restrict matrices2,
00131                                                  int* activateScaling);
00132 
00133     virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
00134                                         const int childBufferIndex,
00135                                         const int probabilityIndex,
00136                                         const int categoryWeightsIndex,
00137                                         const int stateFrequenciesIndex,
00138                                         const int scalingFactorsIndex,
00139                                         double* outSumLogLikelihood);
00140 
00141 
00142 };
00143 
00144     
00145 BEAGLE_CPU_SSE_TEMPLATE
00146 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_DOUBLE> : public BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE> {
00147 
00148 protected:
00149         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kTipCount;
00150         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gPartials;
00151         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::integrationTmp;
00152         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gTransitionMatrices;
00153         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPatternCount;
00154         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPaddedPatternCount;
00155         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kExtraPatterns;
00156         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kStateCount;
00157         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gTipStates;
00158         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kCategoryCount;
00159         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gScaleBuffers;
00160         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gCategoryWeights;
00161         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gStateFrequencies;
00162         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::realtypeMin;
00163         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kMatrixSize;
00164         using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPartialsPaddedStateCount;
00165 
00166 public:
00167     virtual const char* getName();
00168     
00169     virtual const long getFlags();
00170 
00171 protected:
00172     virtual int getPaddedPatternsModulus();
00173 
00174 private:
00175         virtual void calcStatesStates(double* destP,
00176                                      const int* states1,
00177                                      const double* matrices1,
00178                                      const int* states2,
00179                                      const double* matrices2);
00180 
00181     virtual void calcStatesPartials(double* destP,
00182                                     const int* states1,
00183                                     const double* matrices1,
00184                                     const double* partials2,
00185                                     const double* matrices2);
00186 
00187     virtual void calcPartialsPartials(double* __restrict destP,
00188                                       const double* __restrict partials1,
00189                                       const double* __restrict matrices1,
00190                                       const double* __restrict partials2,
00191                                       const double* __restrict matrices2);
00192     
00193     virtual void calcPartialsPartialsFixedScaling(double* __restrict destP,
00194                                       const double* __restrict partials1,
00195                                       const double* __restrict matrices1,
00196                                       const double* __restrict partials2,
00197                                       const double* __restrict matrices2,
00198                                       const double* __restrict scaleFactors);
00199 
00200     virtual void calcPartialsPartialsAutoScaling(double* __restrict destP,
00201                                                  const double* __restrict partials1,
00202                                                  const double* __restrict matrices1,
00203                                                  const double* __restrict partials2,
00204                                                  const double* __restrict matrices2,
00205                                                  int* activateScaling);
00206 
00207     virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
00208                                         const int childBufferIndex,
00209                                         const int probabilityIndex,
00210                                         const int categoryWeightsIndex,
00211                                         const int stateFrequenciesIndex,
00212                                         const int scalingFactorsIndex,
00213                                         double* outSumLogLikelihood);
00214 
00215 };
00216     
00217 BEAGLE_CPU_FACTORY_TEMPLATE
00218 class BeagleCPUSSEImplFactory : public BeagleImplFactory {
00219 public:
00220     virtual BeagleImpl* createImpl(int tipCount,
00221                                    int partialsBufferCount,
00222                                    int compactBufferCount,
00223                                    int stateCount,
00224                                    int patternCount,
00225                                    int eigenBufferCount,
00226                                    int matrixBufferCount,
00227                                    int categoryCount,
00228                                    int scaleBufferCount,
00229                                    int resourceNumber,
00230                                    long preferenceFlags,
00231                                    long requirementFlags,
00232                                    int* errorCode);
00233 
00234     virtual const char* getName();
00235     virtual const long getFlags();
00236 };
00237 
00238 }       // namespace cpu
00239 }       // namespace beagle
00240 
00241 // now include the file containing template function implementations
00242 #include "libhmsbeagle/CPU/BeagleCPUSSEImpl.hpp"
00243 
00244 
00245 #endif // __BeagleCPUSSEImpl__