HMSBEAGLE
1.0.0
|
00001 /* 00002 * BeagleCPU4StateSSEImpl.h 00003 * BEAGLE 00004 * 00005 * Copyright 2009 Phylogenetic Likelihood Working Group 00006 * 00007 * This file is part of BEAGLE. 00008 * 00009 * BEAGLE is free software: you can redistribute it and/or modify 00010 * it under the terms of the GNU Lesser General Public License as 00011 * published by the Free Software Foundation, either version 3 of 00012 * the License, or (at your option) any later version. 00013 * 00014 * BEAGLE is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with BEAGLE. If not, see 00021 * <http://www.gnu.org/licenses/>. 00022 * 00023 * @author Marc Suchard 00024 */ 00025 00026 #ifndef __BeagleCPU4StateSSEImpl__ 00027 #define __BeagleCPU4StateSSEImpl__ 00028 00029 #ifdef HAVE_CONFIG_H 00030 #include "libhmsbeagle/config.h" 00031 #endif 00032 00033 #include "libhmsbeagle/CPU/BeagleCPU4StateImpl.h" 00034 00035 #include <vector> 00036 00037 #define RESTRICT __restrict /* may need to define this instead to 'restrict' */ 00038 00039 #define T_PAD_4_SSE_DEFAULT 2 // Pad transition matrix with 2 rows for SSE 00040 #define P_PAD_4_SSE_DEFAULT 0 // Partials padding not needed for 4 states SSE 00041 00042 #define BEAGLE_CPU_4_SSE_FLOAT float, T_PAD, P_PAD 00043 #define BEAGLE_CPU_4_SSE_DOUBLE double, T_PAD, P_PAD 00044 #define BEAGLE_CPU_4_SSE_TEMPLATE template <int T_PAD, int P_PAD> 00045 00046 namespace beagle { 00047 namespace cpu { 00048 00049 BEAGLE_CPU_TEMPLATE 00050 class BeagleCPU4StateSSEImpl : public BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC> {}; 00051 00052 00053 BEAGLE_CPU_4_SSE_TEMPLATE 00054 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_FLOAT> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_FLOAT> { 00055 00056 protected: 00057 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kTipCount; 00058 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gPartials; 00059 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::integrationTmp; 00060 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gTransitionMatrices; 00061 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kPatternCount; 00062 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kPaddedPatternCount; 00063 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kExtraPatterns; 00064 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kStateCount; 00065 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gTipStates; 00066 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::kCategoryCount; 00067 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gScaleBuffers; 00068 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gCategoryWeights; 00069 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gStateFrequencies; 00070 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::realtypeMin; 00071 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::outLogLikelihoodsTmp; 00072 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_FLOAT>::gPatternWeights; 00073 00074 public: 00075 virtual const char* getName(); 00076 00077 virtual const long getFlags(); 00078 00079 protected: 00080 virtual int getPaddedPatternsModulus(); 00081 00082 private: 00083 00084 virtual void calcStatesStates(float* destP, 00085 const int* states1, 00086 const float* matrices1, 00087 const int* states2, 00088 const float* matrices2); 00089 00090 virtual void calcStatesPartials(float* destP, 00091 const int* states1, 00092 const float* __restrict matrices1, 00093 const float* __restrict partials2, 00094 const float* __restrict matrices2); 00095 00096 virtual void calcStatesPartialsFixedScaling(float* destP, 00097 const int* states1, 00098 const float* __restrict matrices1, 00099 const float* __restrict partials2, 00100 const float* __restrict matrices2, 00101 const float* __restrict scaleFactors); 00102 00103 virtual void calcPartialsPartials(float* __restrict destP, 00104 const float* __restrict partials1, 00105 const float* __restrict matrices1, 00106 const float* __restrict partials2, 00107 const float* __restrict matrices2); 00108 00109 virtual void calcPartialsPartialsFixedScaling(float* __restrict destP, 00110 const float* __restrict child0Partials, 00111 const float* __restrict child0TransMat, 00112 const float* __restrict child1Partials, 00113 const float* __restrict child1TransMat, 00114 const float* __restrict scaleFactors); 00115 00116 virtual void calcPartialsPartialsAutoScaling(float* __restrict destP, 00117 const float* __restrict partials1, 00118 const float* __restrict matrices1, 00119 const float* __restrict partials2, 00120 const float* __restrict matrices2, 00121 int* activateScaling); 00122 00123 virtual int calcEdgeLogLikelihoods(const int parentBufferIndex, 00124 const int childBufferIndex, 00125 const int probabilityIndex, 00126 const int categoryWeightsIndex, 00127 const int stateFrequenciesIndex, 00128 const int scalingFactorsIndex, 00129 double* outSumLogLikelihood); 00130 00131 }; 00132 00133 00134 BEAGLE_CPU_4_SSE_TEMPLATE 00135 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_DOUBLE> { 00136 00137 protected: 00138 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kTipCount; 00139 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gPartials; 00140 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::integrationTmp; 00141 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gTransitionMatrices; 00142 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kPatternCount; 00143 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kPaddedPatternCount; 00144 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kExtraPatterns; 00145 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kStateCount; 00146 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gTipStates; 00147 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::kCategoryCount; 00148 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gScaleBuffers; 00149 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gCategoryWeights; 00150 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gStateFrequencies; 00151 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::realtypeMin; 00152 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::outLogLikelihoodsTmp; 00153 using BeagleCPUImpl<BEAGLE_CPU_4_SSE_DOUBLE>::gPatternWeights; 00154 00155 public: 00156 virtual const char* getName(); 00157 00158 virtual const long getFlags(); 00159 00160 protected: 00161 virtual int getPaddedPatternsModulus(); 00162 00163 private: 00164 00165 virtual void calcStatesStates(double* destP, 00166 const int* states1, 00167 const double* matrices1, 00168 const int* states2, 00169 const double* matrices2); 00170 00171 virtual void calcStatesPartials(double* destP, 00172 const int* states1, 00173 const double* __restrict matrices1, 00174 const double* __restrict partials2, 00175 const double* __restrict matrices2); 00176 00177 virtual void calcStatesPartialsFixedScaling(double* destP, 00178 const int* states1, 00179 const double* __restrict matrices1, 00180 const double* __restrict partials2, 00181 const double* __restrict matrices2, 00182 const double* __restrict scaleFactors); 00183 00184 virtual void calcPartialsPartials(double* __restrict destP, 00185 const double* __restrict partials1, 00186 const double* __restrict matrices1, 00187 const double* __restrict partials2, 00188 const double* __restrict matrices2); 00189 00190 virtual void calcPartialsPartialsFixedScaling(double* __restrict destP, 00191 const double* __restrict child0Partials, 00192 const double* __restrict child0TransMat, 00193 const double* __restrict child1Partials, 00194 const double* __restrict child1TransMat, 00195 const double* __restrict scaleFactors); 00196 00197 virtual void calcPartialsPartialsAutoScaling(double* __restrict destP, 00198 const double* __restrict partials1, 00199 const double* __restrict matrices1, 00200 const double* __restrict partials2, 00201 const double* __restrict matrices2, 00202 int* activateScaling); 00203 00204 virtual int calcEdgeLogLikelihoods(const int parentBufferIndex, 00205 const int childBufferIndex, 00206 const int probabilityIndex, 00207 const int categoryWeightsIndex, 00208 const int stateFrequenciesIndex, 00209 const int scalingFactorsIndex, 00210 double* outSumLogLikelihood); 00211 00212 }; 00213 00214 00215 BEAGLE_CPU_FACTORY_TEMPLATE 00216 class BeagleCPU4StateSSEImplFactory : public BeagleImplFactory { 00217 public: 00218 virtual BeagleImpl* createImpl(int tipCount, 00219 int partialsBufferCount, 00220 int compactBufferCount, 00221 int stateCount, 00222 int patternCount, 00223 int eigenBufferCount, 00224 int matrixBufferCount, 00225 int categoryCount, 00226 int scaleBufferCount, 00227 int resourceNumber, 00228 long preferenceFlags, 00229 long requirementFlags, 00230 int* errorCode); 00231 00232 virtual const char* getName(); 00233 virtual const long getFlags(); 00234 }; 00235 00236 } // namespace cpu 00237 } // namespace beagle 00238 00239 // now include the file containing template function implementations 00240 #include "libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.hpp" 00241 00242 00243 #endif // __BeagleCPU4StateSSEImpl__