HMSBEAGLE
1.0.0
|
00001 /* 00002 * BeagleCPUSSEImpl.h 00003 * BEAGLE 00004 * 00005 * Copyright 2010 Phylogenetic Likelihood Working Group 00006 * 00007 * This file is part of BEAGLE. 00008 * 00009 * BEAGLE is free software: you can redistribute it and/or modify 00010 * it under the terms of the GNU Lesser General Public License as 00011 * published by the Free Software Foundation, either version 3 of 00012 * the License, or (at your option) any later version. 00013 * 00014 * BEAGLE is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with BEAGLE. If not, see 00021 * <http://www.gnu.org/licenses/>. 00022 * 00023 * @author Marc Suchard 00024 */ 00025 00026 #ifndef __BeagleCPUSSEImpl__ 00027 #define __BeagleCPUSSEImpl__ 00028 00029 #ifdef HAVE_CONFIG_H 00030 #include "libhmsbeagle/config.h" 00031 #endif 00032 00033 #include "libhmsbeagle/CPU/BeagleCPUImpl.h" 00034 00035 #include <vector> 00036 00037 #define RESTRICT __restrict /* may need to define this instead to 'restrict' */ 00038 00039 00040 // Pad transition matrix rows with an extra 1.0 for ambiguous characters 00041 #define T_PAD_SSE_EVEN 2 // for even state counts 00042 #define T_PAD_SSE_ODD 1 // for odd state counts 00043 00044 // Partials padding 00045 #define P_PAD_SSE_EVEN 0 // for even state counts 00046 #define P_PAD_SSE_ODD 1 // for odd state counts 00047 00048 00049 #define BEAGLE_CPU_SSE_FLOAT float, T_PAD, P_PAD 00050 #define BEAGLE_CPU_SSE_DOUBLE double, T_PAD, P_PAD 00051 #define BEAGLE_CPU_SSE_TEMPLATE template <int T_PAD, int P_PAD> 00052 00053 namespace beagle { 00054 namespace cpu { 00055 00056 BEAGLE_CPU_TEMPLATE 00057 class BeagleCPUSSEImpl : public BeagleCPUImpl<BEAGLE_CPU_GENERIC> { 00058 // void inline innerPartialsPartals( 00059 // const double* __restrict partials1, 00060 // const double* __restrict matrices1, 00061 // const double* __restrict partials2, 00062 // const double* __restrict matrices2, 00063 // V_Real& sum1_vec, 00064 // V_Real& sum2_vec, 00065 // V_Real& out, 00066 // int& v, 00067 // int& w); 00068 00069 }; 00070 00071 BEAGLE_CPU_SSE_TEMPLATE 00072 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_FLOAT> : public BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT> { 00073 00074 protected: 00075 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kTipCount; 00076 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gPartials; 00077 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::integrationTmp; 00078 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gTransitionMatrices; 00079 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPatternCount; 00080 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPaddedPatternCount; 00081 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kExtraPatterns; 00082 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kStateCount; 00083 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gTipStates; 00084 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kCategoryCount; 00085 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gScaleBuffers; 00086 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gCategoryWeights; 00087 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::gStateFrequencies; 00088 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::realtypeMin; 00089 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kMatrixSize; 00090 using BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT>::kPartialsPaddedStateCount; 00091 00092 public: 00093 virtual const char* getName(); 00094 00095 virtual const long getFlags(); 00096 00097 protected: 00098 virtual int getPaddedPatternsModulus(); 00099 00100 private: 00101 virtual void calcStatesStates(float* destP, 00102 const int* states1, 00103 const float* matrices1, 00104 const int* states2, 00105 const float* matrices2); 00106 00107 virtual void calcStatesPartials(float* destP, 00108 const int* states1, 00109 const float* matrices1, 00110 const float* partials2, 00111 const float* matrices2); 00112 00113 virtual void calcPartialsPartials(float* __restrict destP, 00114 const float* __restrict partials1, 00115 const float* __restrict matrices1, 00116 const float* __restrict partials2, 00117 const float* __restrict matrices2); 00118 00119 virtual void calcPartialsPartialsFixedScaling(float* __restrict destP, 00120 const float* __restrict partials1, 00121 const float* __restrict matrices1, 00122 const float* __restrict partials2, 00123 const float* __restrict matrices2, 00124 const float* __restrict scaleFactors); 00125 00126 virtual void calcPartialsPartialsAutoScaling(float* __restrict destP, 00127 const float* __restrict partials1, 00128 const float* __restrict matrices1, 00129 const float* __restrict partials2, 00130 const float* __restrict matrices2, 00131 int* activateScaling); 00132 00133 virtual int calcEdgeLogLikelihoods(const int parentBufferIndex, 00134 const int childBufferIndex, 00135 const int probabilityIndex, 00136 const int categoryWeightsIndex, 00137 const int stateFrequenciesIndex, 00138 const int scalingFactorsIndex, 00139 double* outSumLogLikelihood); 00140 00141 00142 }; 00143 00144 00145 BEAGLE_CPU_SSE_TEMPLATE 00146 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_DOUBLE> : public BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE> { 00147 00148 protected: 00149 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kTipCount; 00150 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gPartials; 00151 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::integrationTmp; 00152 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gTransitionMatrices; 00153 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPatternCount; 00154 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPaddedPatternCount; 00155 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kExtraPatterns; 00156 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kStateCount; 00157 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gTipStates; 00158 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kCategoryCount; 00159 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gScaleBuffers; 00160 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gCategoryWeights; 00161 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::gStateFrequencies; 00162 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::realtypeMin; 00163 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kMatrixSize; 00164 using BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE>::kPartialsPaddedStateCount; 00165 00166 public: 00167 virtual const char* getName(); 00168 00169 virtual const long getFlags(); 00170 00171 protected: 00172 virtual int getPaddedPatternsModulus(); 00173 00174 private: 00175 virtual void calcStatesStates(double* destP, 00176 const int* states1, 00177 const double* matrices1, 00178 const int* states2, 00179 const double* matrices2); 00180 00181 virtual void calcStatesPartials(double* destP, 00182 const int* states1, 00183 const double* matrices1, 00184 const double* partials2, 00185 const double* matrices2); 00186 00187 virtual void calcPartialsPartials(double* __restrict destP, 00188 const double* __restrict partials1, 00189 const double* __restrict matrices1, 00190 const double* __restrict partials2, 00191 const double* __restrict matrices2); 00192 00193 virtual void calcPartialsPartialsFixedScaling(double* __restrict destP, 00194 const double* __restrict partials1, 00195 const double* __restrict matrices1, 00196 const double* __restrict partials2, 00197 const double* __restrict matrices2, 00198 const double* __restrict scaleFactors); 00199 00200 virtual void calcPartialsPartialsAutoScaling(double* __restrict destP, 00201 const double* __restrict partials1, 00202 const double* __restrict matrices1, 00203 const double* __restrict partials2, 00204 const double* __restrict matrices2, 00205 int* activateScaling); 00206 00207 virtual int calcEdgeLogLikelihoods(const int parentBufferIndex, 00208 const int childBufferIndex, 00209 const int probabilityIndex, 00210 const int categoryWeightsIndex, 00211 const int stateFrequenciesIndex, 00212 const int scalingFactorsIndex, 00213 double* outSumLogLikelihood); 00214 00215 }; 00216 00217 BEAGLE_CPU_FACTORY_TEMPLATE 00218 class BeagleCPUSSEImplFactory : public BeagleImplFactory { 00219 public: 00220 virtual BeagleImpl* createImpl(int tipCount, 00221 int partialsBufferCount, 00222 int compactBufferCount, 00223 int stateCount, 00224 int patternCount, 00225 int eigenBufferCount, 00226 int matrixBufferCount, 00227 int categoryCount, 00228 int scaleBufferCount, 00229 int resourceNumber, 00230 long preferenceFlags, 00231 long requirementFlags, 00232 int* errorCode); 00233 00234 virtual const char* getName(); 00235 virtual const long getFlags(); 00236 }; 00237 00238 } // namespace cpu 00239 } // namespace beagle 00240 00241 // now include the file containing template function implementations 00242 #include "libhmsbeagle/CPU/BeagleCPUSSEImpl.hpp" 00243 00244 00245 #endif // __BeagleCPUSSEImpl__