HMSBEAGLE
1.0.0
|
00001 /* 00002 * 00003 * Copyright 2009 Phylogenetic Likelihood Working Group 00004 * 00005 * This file is part of BEAGLE. 00006 * 00007 * BEAGLE is free software: you can redistribute it and/or modify 00008 * it under the terms of the GNU Lesser General Public License as 00009 * published by the Free Software Foundation, either version 3 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * BEAGLE is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with BEAGLE. If not, see 00019 * <http://www.gnu.org/licenses/>. 00020 * 00021 * @author Marc Suchard 00022 * @author Dat Huynh 00023 * @author Daniel Ayres 00024 */ 00025 00026 #ifndef __GPUInterface__ 00027 #define __GPUInterface__ 00028 00029 #include <cstdio> 00030 00031 #ifdef HAVE_CONFIG_H 00032 #include "libhmsbeagle/config.h" 00033 #endif 00034 00035 #include <map> 00036 00037 #include "libhmsbeagle/GPU/GPUImplDefs.h" 00038 #include "libhmsbeagle/GPU/KernelResource.h" 00039 00040 #ifdef CUDA 00041 #include <cuda.h> 00042 # ifdef BEAGLE_XCODE 00043 #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels_xcode.h" 00044 # else 00045 #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels.h" 00046 # endif 00047 typedef CUdeviceptr GPUPtr; 00048 typedef CUfunction GPUFunction; 00049 #else 00050 #ifdef OPENCL 00051 #include <OpenCL/opencl.h> 00052 #include "libhmsbeagle/GPU/BeagleOpenCL_kernels.h" 00053 typedef cl_mem GPUPtr; 00054 typedef cl_kernel GPUFunction; 00055 #endif 00056 #endif 00057 00058 class GPUInterface { 00059 private: 00060 #ifdef CUDA 00061 CUdevice cudaDevice; 00062 CUcontext cudaContext; 00063 CUmodule cudaModule; 00064 const char* GetCUDAErrorDescription(int errorCode); 00065 #else 00066 #ifdef OPENCL 00067 cl_device_id openClDeviceId; // compute device id 00068 cl_context openClContext; // compute context 00069 cl_command_queue openClCommandQueue; // compute command queue 00070 cl_program openClProgram; // compute program 00071 cl_uint openClNumDevices; 00072 const char* GetCLErrorDescription(int errorCode); 00073 #endif 00074 #endif 00075 public: 00076 GPUInterface(); 00077 00078 ~GPUInterface(); 00079 00080 int Initialize(); 00081 00082 int GetDeviceCount(); 00083 00084 void SetDevice(int deviceNumber, 00085 int paddedStateCount, 00086 int categoryCount, 00087 int patternCount, 00088 long flags); 00089 00090 void Synchronize(); 00091 00092 GPUFunction GetFunction(const char* functionName); 00093 00094 void LaunchKernel(GPUFunction deviceFunction, 00095 Dim3Int block, 00096 Dim3Int grid, 00097 int parameterCountV, 00098 int totalParameterCount, 00099 ...); // parameters 00100 00101 void* MallocHost(size_t memSize); 00102 00103 void* CallocHost(size_t size, size_t length); 00104 00105 void* AllocatePinnedHostMemory(size_t memSize, 00106 bool writeCombined, 00107 bool mapped); 00108 00109 GPUPtr AllocateMemory(size_t memSize); 00110 00111 GPUPtr AllocateRealMemory(size_t length); 00112 00113 GPUPtr AllocateIntMemory(size_t length); 00114 00115 void MemsetShort(GPUPtr dest, 00116 unsigned short val, 00117 size_t count); 00118 00119 void MemcpyHostToDevice(GPUPtr dest, 00120 const void* src, 00121 size_t memSize); 00122 00123 void MemcpyDeviceToHost(void* dest, 00124 const GPUPtr src, 00125 size_t memSize); 00126 00127 void MemcpyDeviceToDevice(GPUPtr dest, 00128 GPUPtr src, 00129 size_t memSize); 00130 00131 void FreeHostMemory(void* hPtr); 00132 00133 void FreePinnedHostMemory(void* hPtr); 00134 00135 void FreeMemory(GPUPtr dPtr); 00136 00137 GPUPtr GetDevicePointer(void* hPtr); 00138 00139 unsigned int GetAvailableMemory(); 00140 00141 void GetDeviceName(int deviceNumber, 00142 char* deviceName, 00143 int nameLength); 00144 00145 void GetDeviceDescription(int deviceNumber, 00146 char* deviceDescription); 00147 00148 bool GetSupportsDoublePrecision(int deviceNumber); 00149 00150 template<typename Real> 00151 void PrintfDeviceVector(GPUPtr dPtr, int length, Real r) { 00152 PrintfDeviceVector(dPtr,length,-1, 0, r); 00153 } 00154 00155 template<typename Real> 00156 void PrintfDeviceVector(GPUPtr dPtr, 00157 int length, double checkValue, Real r); 00158 00159 template<typename Real> 00160 void PrintfDeviceVector(GPUPtr dPtr, 00161 int length, 00162 double checkValue, 00163 int *signal, 00164 Real r) { 00165 Real* hPtr = (Real*) malloc(sizeof(Real) * length); 00166 00167 MemcpyDeviceToHost(hPtr, dPtr, sizeof(Real) * length); 00168 printfVector(hPtr, length); 00169 00170 if (checkValue != -1) { 00171 double sum = 0; 00172 for(int i=0; i<length; i++) { 00173 sum += hPtr[i]; 00174 if( (hPtr[i] > checkValue) && (hPtr[i]-checkValue > 1.0E-4)) { 00175 fprintf(stderr,"Check value exception! (%d) %2.5e > %2.5e (diff = %2.5e)\n", 00176 i,hPtr[i],checkValue, (hPtr[i]-checkValue)); 00177 if( signal != 0 ) 00178 *signal = 1; 00179 } 00180 if (hPtr[i] != hPtr[i]) { 00181 fprintf(stderr,"NaN found!\n"); 00182 if( signal != 0 ) 00183 *signal = 1; 00184 } 00185 } 00186 if (sum == 0) { 00187 fprintf(stderr,"Zero-sum vector!\n"); 00188 if( signal != 0 ) 00189 *signal = 1; 00190 } 00191 } 00192 free(hPtr); 00193 } 00194 00195 void PrintfDeviceInt(GPUPtr dPtr, 00196 int length); 00197 00198 void DestroyKernelMap(); 00199 00200 KernelResource* kernelResource; 00201 00202 protected: 00203 void InitializeKernelMap(); 00204 00205 std::map<int, int>* resourceMap; 00206 00207 bool supportDoublePrecision; 00208 }; 00209 00210 #endif // __GPUInterface__