HMSBEAGLE  1.0.0
libhmsbeagle/GPU/GPUInterface.h
00001 /*
00002  *
00003  * Copyright 2009 Phylogenetic Likelihood Working Group
00004  *
00005  * This file is part of BEAGLE.
00006  *
00007  * BEAGLE is free software: you can redistribute it and/or modify
00008  * it under the terms of the GNU Lesser General Public License as
00009  * published by the Free Software Foundation, either version 3 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * BEAGLE is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with BEAGLE.  If not, see
00019  * <http://www.gnu.org/licenses/>.
00020  *
00021  * @author Marc Suchard
00022  * @author Dat Huynh
00023  * @author Daniel Ayres
00024  */
00025 
00026 #ifndef __GPUInterface__
00027 #define __GPUInterface__
00028 
00029 #include <cstdio>
00030 
00031 #ifdef HAVE_CONFIG_H
00032 #include "libhmsbeagle/config.h"
00033 #endif
00034 
00035 #include <map>
00036 
00037 #include "libhmsbeagle/GPU/GPUImplDefs.h"
00038 #include "libhmsbeagle/GPU/KernelResource.h"
00039 
00040 #ifdef CUDA
00041     #include <cuda.h>
00042 #   ifdef BEAGLE_XCODE
00043         #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels_xcode.h"
00044 #   else
00045         #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels.h"
00046 #   endif
00047     typedef CUdeviceptr GPUPtr;
00048     typedef CUfunction GPUFunction;
00049 #else
00050 #ifdef OPENCL
00051     #include <OpenCL/opencl.h>
00052     #include "libhmsbeagle/GPU/BeagleOpenCL_kernels.h"
00053     typedef cl_mem GPUPtr;
00054     typedef cl_kernel GPUFunction;
00055 #endif
00056 #endif
00057 
00058 class GPUInterface {
00059 private:
00060 #ifdef CUDA
00061     CUdevice cudaDevice;
00062     CUcontext cudaContext;
00063     CUmodule cudaModule;
00064     const char* GetCUDAErrorDescription(int errorCode);
00065 #else
00066 #ifdef OPENCL
00067     cl_device_id openClDeviceId;             // compute device id 
00068     cl_context openClContext;                // compute context
00069     cl_command_queue openClCommandQueue;     // compute command queue
00070     cl_program openClProgram;                // compute program
00071     cl_uint openClNumDevices;
00072     const char* GetCLErrorDescription(int errorCode);
00073 #endif
00074 #endif
00075 public:
00076     GPUInterface();
00077     
00078     ~GPUInterface();
00079     
00080     int Initialize();
00081 
00082     int GetDeviceCount();
00083 
00084     void SetDevice(int deviceNumber, 
00085                    int paddedStateCount, 
00086                    int categoryCount, 
00087                    int patternCount,
00088                    long flags);
00089     
00090     void Synchronize();
00091     
00092     GPUFunction GetFunction(const char* functionName);
00093     
00094     void LaunchKernel(GPUFunction deviceFunction,
00095                                Dim3Int block,
00096                                Dim3Int grid,
00097                                int parameterCountV,
00098                                int totalParameterCount,
00099                                ...); // parameters
00100 
00101     void* MallocHost(size_t memSize);
00102     
00103     void* CallocHost(size_t size, size_t length);
00104     
00105     void* AllocatePinnedHostMemory(size_t memSize,
00106                                    bool writeCombined,
00107                                    bool mapped);
00108     
00109     GPUPtr AllocateMemory(size_t memSize);
00110     
00111     GPUPtr AllocateRealMemory(size_t length);
00112 
00113     GPUPtr AllocateIntMemory(size_t length);
00114     
00115     void MemsetShort(GPUPtr dest,
00116                      unsigned short val,
00117                      size_t count);
00118 
00119     void MemcpyHostToDevice(GPUPtr dest,
00120                             const void* src,
00121                             size_t memSize);
00122 
00123     void MemcpyDeviceToHost(void* dest,
00124                             const GPUPtr src,
00125                             size_t memSize);
00126     
00127     void MemcpyDeviceToDevice(GPUPtr dest,
00128                               GPUPtr src,
00129                               size_t memSize);
00130 
00131     void FreeHostMemory(void* hPtr);
00132     
00133     void FreePinnedHostMemory(void* hPtr);
00134     
00135     void FreeMemory(GPUPtr dPtr);
00136     
00137     GPUPtr GetDevicePointer(void* hPtr);
00138     
00139     unsigned int GetAvailableMemory();
00140     
00141     void GetDeviceName(int deviceNumber,
00142                        char* deviceName,
00143                        int nameLength);
00144     
00145     void GetDeviceDescription(int deviceNumber,
00146                               char* deviceDescription);
00147     
00148     bool GetSupportsDoublePrecision(int deviceNumber);
00149 
00150     template<typename Real>
00151     void PrintfDeviceVector(GPUPtr dPtr, int length, Real r) {
00152         PrintfDeviceVector(dPtr,length,-1, 0, r);
00153     }
00154     
00155     template<typename Real>
00156     void PrintfDeviceVector(GPUPtr dPtr,
00157                             int length, double checkValue, Real r);
00158     
00159     template<typename Real>
00160     void PrintfDeviceVector(GPUPtr dPtr,
00161                             int length,
00162                             double checkValue,
00163                             int *signal,
00164                             Real r) {
00165         Real* hPtr = (Real*) malloc(sizeof(Real) * length);
00166 
00167         MemcpyDeviceToHost(hPtr, dPtr, sizeof(Real) * length);
00168         printfVector(hPtr, length);
00169 
00170         if (checkValue != -1) {
00171                 double sum = 0;
00172                 for(int i=0; i<length; i++) {
00173                         sum += hPtr[i];
00174                         if( (hPtr[i] > checkValue) && (hPtr[i]-checkValue > 1.0E-4)) {
00175                                 fprintf(stderr,"Check value exception!  (%d) %2.5e > %2.5e (diff = %2.5e)\n",
00176                                                 i,hPtr[i],checkValue, (hPtr[i]-checkValue));
00177                                 if( signal != 0 )
00178                                         *signal = 1;
00179                         }
00180                         if (hPtr[i] != hPtr[i]) {
00181                                 fprintf(stderr,"NaN found!\n");
00182                                 if( signal != 0 )
00183                                         *signal = 1;
00184                         }
00185                 }
00186                 if (sum == 0) {
00187                         fprintf(stderr,"Zero-sum vector!\n");
00188                         if( signal != 0 )
00189                                 *signal = 1;
00190                 }
00191         }
00192         free(hPtr);
00193     }
00194 
00195     void PrintfDeviceInt(GPUPtr dPtr,
00196                    int length);
00197     
00198     void DestroyKernelMap();
00199     
00200     KernelResource* kernelResource;
00201     
00202 protected:
00203         void InitializeKernelMap();
00204     
00205     std::map<int, int>* resourceMap;
00206 
00207     bool supportDoublePrecision;
00208 };
00209 
00210 #endif // __GPUInterface__