HMSBEAGLE
1.0.0
|
00001 /* 00002 * BeagleCPU4StateSSEImpl.h 00003 * BEAGLE 00004 * 00005 * Copyright 2009 Phylogenetic Likelihood Working Group 00006 * 00007 * This file is part of BEAGLE. 00008 * 00009 * BEAGLE is free software: you can redistribute it and/or modify 00010 * it under the terms of the GNU Lesser General Public License as 00011 * published by the Free Software Foundation, either version 3 of 00012 * the License, or (at your option) any later version. 00013 * 00014 * BEAGLE is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with BEAGLE. If not, see 00021 * <http://www.gnu.org/licenses/>. 00022 * 00023 * @author Marc Suchard 00024 */ 00025 00026 #ifndef __SSEDefinitions__ 00027 #define __SSEDefinitions__ 00028 00029 #ifdef HAVE_CONFIG_H 00030 #include "libhmsbeagle/config.h" 00031 #endif 00032 00033 #define DLS_USE_SSE2 00034 00035 #if defined(DLS_USE_SSE2) 00036 # if !defined(DLS_MACOS) 00037 # include <emmintrin.h> 00038 # endif 00039 # include <xmmintrin.h> 00040 #endif 00041 typedef double VecEl_t; 00042 00043 #ifdef __GNUC__ 00044 #define ALIGN16 __attribute__((aligned(16))) 00045 #else 00046 #define ALIGN16 __declspec(align(16)) 00047 #endif 00048 00049 #define USE_DOUBLE_PREC 00050 #if defined(USE_DOUBLE_PREC) 00051 typedef double RealType; 00052 typedef __m128d V_Real; 00053 # define REALS_PER_VEC 2 /* number of elements per vector */ 00054 # define VEC_LOAD(a) _mm_load_pd(a) 00055 # define VEC_LOAD_SCALAR(a) _mm_load1_pd(a) 00056 # define VEC_STORE(a, b) _mm_store_pd((a), (b)) 00057 # define VEC_STORE_SCALAR(a, b) _mm_store_sd((a), (b)) 00058 # define VEC_MULT(a, b) _mm_mul_pd((a), (b)) 00059 # define VEC_DIV(a, b) _mm_div_pd((a), (b)) 00060 # define VEC_MADD(a, b, c) _mm_add_pd(_mm_mul_pd((a), (b)), (c)) 00061 # define VEC_SPLAT(a) _mm_set1_pd(a) 00062 # define VEC_ADD(a, b) _mm_add_pd(a, b) 00063 # define VEC_SWAP(a) _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0,1)) 00064 # define VEC_SETZERO() _mm_setzero_pd() 00065 # define VEC_SET1(a) _mm_set_sd((a)) 00066 # define VEC_SET(a, b) _mm_set_pd((a), (b)) 00067 # define VEC_MOVE(a, b) _mm_move_sd((a), (b)) 00068 #else 00069 typedef float RealType; 00070 typedef __m128 V_Real; 00071 # define REALS_PER_VEC 4 /* number of elements per vector */ 00072 # define VEC_MULT(a, b) _mm_mul_ps((a), (b)) 00073 # define VEC_MADD(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c)) 00074 # define VEC_SPLAT(a) _mm_set1_ps(a) 00075 # define VEC_ADD(a, b) _mm_add_ps(a, b) 00076 #endif 00077 typedef union /* for copying individual elements to and from vector floats */ 00078 { 00079 RealType x[REALS_PER_VEC]; 00080 V_Real vx; 00081 } 00082 VecUnion; 00083 00084 #ifdef __GNUC__ 00085 #define cpuid(func,ax,bx,cx,dx)\ 00086 __asm__ __volatile__ ("cpuid":\ 00087 "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); 00088 #endif 00089 00090 #ifdef _WIN32 00091 00092 #endif 00093 00094 int CPUSupportsSSE() { 00095 //int a,b,c,d; 00096 //cpuid(0,a,b,c,d); 00097 //fprintf(stderr,"a = %d\nb = %d\nc = %d\nd = %d\n",a,b,c,d); 00098 return 1; 00099 } 00100 00101 #endif // __SSEDefinitions__