HMSBEAGLE  1.0.0
libhmsbeagle/CPU/SSEDefinitions.h
00001 /*
00002  *  BeagleCPU4StateSSEImpl.h
00003  *  BEAGLE
00004  *
00005  * Copyright 2009 Phylogenetic Likelihood Working Group
00006  *
00007  * This file is part of BEAGLE.
00008  *
00009  * BEAGLE is free software: you can redistribute it and/or modify
00010  * it under the terms of the GNU Lesser General Public License as
00011  * published by the Free Software Foundation, either version 3 of
00012  * the License, or (at your option) any later version.
00013  *
00014  * BEAGLE is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with BEAGLE.  If not, see
00021  * <http://www.gnu.org/licenses/>.
00022  *
00023  * @author Marc Suchard
00024  */
00025 
00026 #ifndef __SSEDefinitions__
00027 #define __SSEDefinitions__
00028 
00029 #ifdef HAVE_CONFIG_H
00030 #include "libhmsbeagle/config.h"
00031 #endif
00032 
00033 #define DLS_USE_SSE2
00034 
00035 #if defined(DLS_USE_SSE2)
00036 #       if !defined(DLS_MACOS)
00037 #               include <emmintrin.h>
00038 #       endif
00039 #       include <xmmintrin.h>
00040 #endif
00041 typedef double VecEl_t;
00042 
00043 #ifdef __GNUC__
00044 #define ALIGN16 __attribute__((aligned(16)))
00045 #else
00046 #define ALIGN16 __declspec(align(16))
00047 #endif
00048 
00049 #define USE_DOUBLE_PREC
00050 #if defined(USE_DOUBLE_PREC)
00051         typedef double RealType;
00052         typedef __m128d V_Real;
00053 #       define REALS_PER_VEC    2       /* number of elements per vector */
00054 #       define VEC_LOAD(a)                      _mm_load_pd(a)
00055 #       define VEC_LOAD_SCALAR(a)       _mm_load1_pd(a)
00056 #       define VEC_STORE(a, b)          _mm_store_pd((a), (b))
00057 #   define VEC_STORE_SCALAR(a, b) _mm_store_sd((a), (b))
00058 #       define VEC_MULT(a, b)           _mm_mul_pd((a), (b))
00059 #       define VEC_DIV(a, b)            _mm_div_pd((a), (b))
00060 #       define VEC_MADD(a, b, c)        _mm_add_pd(_mm_mul_pd((a), (b)), (c))
00061 #       define VEC_SPLAT(a)                     _mm_set1_pd(a)
00062 #       define VEC_ADD(a, b)            _mm_add_pd(a, b)
00063 #   define VEC_SWAP(a)                  _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0,1))
00064 #       define VEC_SETZERO()            _mm_setzero_pd()
00065 #       define VEC_SET1(a)                      _mm_set_sd((a))
00066 #       define VEC_SET(a, b)            _mm_set_pd((a), (b))
00067 #   define VEC_MOVE(a, b)               _mm_move_sd((a), (b))
00068 #else
00069         typedef float RealType;
00070         typedef __m128  V_Real;
00071 #       define REALS_PER_VEC    4       /* number of elements per vector */
00072 #       define VEC_MULT(a, b)           _mm_mul_ps((a), (b))
00073 #       define VEC_MADD(a, b, c)        _mm_add_ps(_mm_mul_ps((a), (b)), (c))
00074 #       define VEC_SPLAT(a)                     _mm_set1_ps(a)
00075 #       define VEC_ADD(a, b)            _mm_add_ps(a, b)
00076 #endif
00077 typedef union                   /* for copying individual elements to and from vector floats */
00078         {
00079         RealType        x[REALS_PER_VEC];
00080         V_Real          vx;
00081         }
00082         VecUnion;
00083 
00084 #ifdef __GNUC__
00085     #define cpuid(func,ax,bx,cx,dx)\
00086             __asm__ __volatile__ ("cpuid":\
00087             "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
00088 #endif
00089 
00090 #ifdef _WIN32
00091 
00092 #endif
00093 
00094 int CPUSupportsSSE() {
00095     //int a,b,c,d;
00096     //cpuid(0,a,b,c,d);
00097     //fprintf(stderr,"a = %d\nb = %d\nc = %d\nd = %d\n",a,b,c,d);
00098     return 1;
00099 }
00100 
00101 #endif // __SSEDefinitions__