00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifndef EIGEN_MEMORY_H
00035 #define EIGEN_MEMORY_H
00036
00037
00038
00039
00040
00041
00042
00043
00044 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
00045 && defined(__LP64__)
00046 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
00047 #else
00048 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
00049 #endif
00050
00051
00052
00053
00054
00055 #if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
00056 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
00057 #else
00058 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
00059 #endif
00060
00061 #if defined(__APPLE__) \
00062 || defined(_WIN64) \
00063 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
00064 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
00065 #define EIGEN_MALLOC_ALREADY_ALIGNED 1
00066 #else
00067 #define EIGEN_MALLOC_ALREADY_ALIGNED 0
00068 #endif
00069
00070 #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
00071 && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
00072 #define EIGEN_HAS_POSIX_MEMALIGN 1
00073 #else
00074 #define EIGEN_HAS_POSIX_MEMALIGN 0
00075 #endif
00076
00077 #ifdef EIGEN_VECTORIZE_SSE
00078 #define EIGEN_HAS_MM_MALLOC 1
00079 #else
00080 #define EIGEN_HAS_MM_MALLOC 0
00081 #endif
00082
00083 namespace Eigen {
00084
00085 namespace internal {
00086
00087 inline void throw_std_bad_alloc()
00088 {
00089 #ifdef EIGEN_EXCEPTIONS
00090 throw std::bad_alloc();
00091 #else
00092 std::size_t huge = -1;
00093 new int[huge];
00094 #endif
00095 }
00096
00097
00098
00099
00100
00101
00102
00106 inline void* handmade_aligned_malloc(size_t size)
00107 {
00108 void *original = std::malloc(size+16);
00109 if (original == 0) return 0;
00110 void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
00111 *(reinterpret_cast<void**>(aligned) - 1) = original;
00112 return aligned;
00113 }
00114
00116 inline void handmade_aligned_free(void *ptr)
00117 {
00118 if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
00119 }
00120
00126 inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
00127 {
00128 if (ptr == 0) return handmade_aligned_malloc(size);
00129 void *original = *(reinterpret_cast<void**>(ptr) - 1);
00130 original = std::realloc(original,size+16);
00131 if (original == 0) return 0;
00132 void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
00133 *(reinterpret_cast<void**>(aligned) - 1) = original;
00134 return aligned;
00135 }
00136
00137
00138
00139
00140
00141 void* aligned_malloc(size_t size);
00142 void aligned_free(void *ptr);
00143
00149 inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size)
00150 {
00151 if (ptr==0)
00152 return aligned_malloc(size);
00153
00154 if (size==0)
00155 {
00156 aligned_free(ptr);
00157 return 0;
00158 }
00159
00160 void* newptr = aligned_malloc(size);
00161 if (newptr == 0)
00162 {
00163 #ifdef EIGEN_HAS_ERRNO
00164 errno = ENOMEM;
00165 #endif
00166 return 0;
00167 }
00168
00169 if (ptr != 0)
00170 {
00171 std::memcpy(newptr, ptr, (std::min)(size,old_size));
00172 aligned_free(ptr);
00173 }
00174
00175 return newptr;
00176 }
00177
00178
00179
00180
00181
00182 #ifdef EIGEN_NO_MALLOC
00183 inline void check_that_malloc_is_allowed()
00184 {
00185 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
00186 }
00187 #elif defined EIGEN_RUNTIME_NO_MALLOC
00188 inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
00189 {
00190 static bool value = true;
00191 if (update == 1)
00192 value = new_value;
00193 return value;
00194 }
00195 inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
00196 inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
00197 inline void check_that_malloc_is_allowed()
00198 {
00199 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
00200 }
00201 #else
00202 inline void check_that_malloc_is_allowed()
00203 {}
00204 #endif
00205
00209 inline void* aligned_malloc(size_t size)
00210 {
00211 check_that_malloc_is_allowed();
00212
00213 void *result;
00214 #if !EIGEN_ALIGN
00215 result = std::malloc(size);
00216 #elif EIGEN_MALLOC_ALREADY_ALIGNED
00217 result = std::malloc(size);
00218 #elif EIGEN_HAS_POSIX_MEMALIGN
00219 if(posix_memalign(&result, 16, size)) result = 0;
00220 #elif EIGEN_HAS_MM_MALLOC
00221 result = _mm_malloc(size, 16);
00222 #elif (defined _MSC_VER)
00223 result = _aligned_malloc(size, 16);
00224 #else
00225 result = handmade_aligned_malloc(size);
00226 #endif
00227
00228 if(!result && size)
00229 throw_std_bad_alloc();
00230
00231 return result;
00232 }
00233
00235 inline void aligned_free(void *ptr)
00236 {
00237 #if !EIGEN_ALIGN
00238 std::free(ptr);
00239 #elif EIGEN_MALLOC_ALREADY_ALIGNED
00240 std::free(ptr);
00241 #elif EIGEN_HAS_POSIX_MEMALIGN
00242 std::free(ptr);
00243 #elif EIGEN_HAS_MM_MALLOC
00244 _mm_free(ptr);
00245 #elif defined(_MSC_VER)
00246 _aligned_free(ptr);
00247 #else
00248 handmade_aligned_free(ptr);
00249 #endif
00250 }
00251
00257 inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
00258 {
00259 EIGEN_UNUSED_VARIABLE(old_size);
00260
00261 void *result;
00262 #if !EIGEN_ALIGN
00263 result = std::realloc(ptr,new_size);
00264 #elif EIGEN_MALLOC_ALREADY_ALIGNED
00265 result = std::realloc(ptr,new_size);
00266 #elif EIGEN_HAS_POSIX_MEMALIGN
00267 result = generic_aligned_realloc(ptr,new_size,old_size);
00268 #elif EIGEN_HAS_MM_MALLOC
00269
00270
00271
00272 #if defined(_MSC_VER) && defined(_mm_free)
00273 result = _aligned_realloc(ptr,new_size,16);
00274 #else
00275 result = generic_aligned_realloc(ptr,new_size,old_size);
00276 #endif
00277 #elif defined(_MSC_VER)
00278 result = _aligned_realloc(ptr,new_size,16);
00279 #else
00280 result = handmade_aligned_realloc(ptr,new_size,old_size);
00281 #endif
00282
00283 if (!result && new_size)
00284 throw_std_bad_alloc();
00285
00286 return result;
00287 }
00288
00289
00290
00291
00292
00296 template<bool Align> inline void* conditional_aligned_malloc(size_t size)
00297 {
00298 return aligned_malloc(size);
00299 }
00300
00301 template<> inline void* conditional_aligned_malloc<false>(size_t size)
00302 {
00303 check_that_malloc_is_allowed();
00304
00305 void *result = std::malloc(size);
00306 if(!result && size)
00307 throw_std_bad_alloc();
00308 return result;
00309 }
00310
00312 template<bool Align> inline void conditional_aligned_free(void *ptr)
00313 {
00314 aligned_free(ptr);
00315 }
00316
00317 template<> inline void conditional_aligned_free<false>(void *ptr)
00318 {
00319 std::free(ptr);
00320 }
00321
00322 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size)
00323 {
00324 return aligned_realloc(ptr, new_size, old_size);
00325 }
00326
00327 template<> inline void* conditional_aligned_realloc<false>(void* ptr, size_t new_size, size_t)
00328 {
00329 return std::realloc(ptr, new_size);
00330 }
00331
00332
00333
00334
00335
00339 template<typename T> inline T* construct_elements_of_array(T *ptr, size_t size)
00340 {
00341 for (size_t i=0; i < size; ++i) ::new (ptr + i) T;
00342 return ptr;
00343 }
00344
00348 template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
00349 {
00350
00351 if(ptr)
00352 while(size) ptr[--size].~T();
00353 }
00354
00355
00356
00357
00358
00359 template<typename T>
00360 EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
00361 {
00362 if(size > size_t(-1) / sizeof(T))
00363 throw_std_bad_alloc();
00364 }
00365
00370 template<typename T> inline T* aligned_new(size_t size)
00371 {
00372 check_size_for_overflow<T>(size);
00373 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
00374 return construct_elements_of_array(result, size);
00375 }
00376
00377 template<typename T, bool Align> inline T* conditional_aligned_new(size_t size)
00378 {
00379 check_size_for_overflow<T>(size);
00380 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
00381 return construct_elements_of_array(result, size);
00382 }
00383
00387 template<typename T> inline void aligned_delete(T *ptr, size_t size)
00388 {
00389 destruct_elements_of_array<T>(ptr, size);
00390 aligned_free(ptr);
00391 }
00392
00396 template<typename T, bool Align> inline void conditional_aligned_delete(T *ptr, size_t size)
00397 {
00398 destruct_elements_of_array<T>(ptr, size);
00399 conditional_aligned_free<Align>(ptr);
00400 }
00401
00402 template<typename T, bool Align> inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size)
00403 {
00404 check_size_for_overflow<T>(new_size);
00405 check_size_for_overflow<T>(old_size);
00406 if(new_size < old_size)
00407 destruct_elements_of_array(pts+new_size, old_size-new_size);
00408 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
00409 if(new_size > old_size)
00410 construct_elements_of_array(result+old_size, new_size-old_size);
00411 return result;
00412 }
00413
00414
00415 template<typename T, bool Align> inline T* conditional_aligned_new_auto(size_t size)
00416 {
00417 check_size_for_overflow<T>(size);
00418 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
00419 if(NumTraits<T>::RequireInitialization)
00420 construct_elements_of_array(result, size);
00421 return result;
00422 }
00423
00424 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size)
00425 {
00426 check_size_for_overflow<T>(new_size);
00427 check_size_for_overflow<T>(old_size);
00428 if(NumTraits<T>::RequireInitialization && (new_size < old_size))
00429 destruct_elements_of_array(pts+new_size, old_size-new_size);
00430 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
00431 if(NumTraits<T>::RequireInitialization && (new_size > old_size))
00432 construct_elements_of_array(result+old_size, new_size-old_size);
00433 return result;
00434 }
00435
00436 template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *ptr, size_t size)
00437 {
00438 if(NumTraits<T>::RequireInitialization)
00439 destruct_elements_of_array<T>(ptr, size);
00440 conditional_aligned_free<Align>(ptr);
00441 }
00442
00443
00444
00461 template<typename Scalar, typename Index>
00462 static inline Index first_aligned(const Scalar* array, Index size)
00463 {
00464 typedef typename packet_traits<Scalar>::type Packet;
00465 enum { PacketSize = packet_traits<Scalar>::size,
00466 PacketAlignedMask = PacketSize-1
00467 };
00468
00469 if(PacketSize==1)
00470 {
00471
00472
00473 return 0;
00474 }
00475 else if(size_t(array) & (sizeof(Scalar)-1))
00476 {
00477
00478
00479 return size;
00480 }
00481 else
00482 {
00483 return std::min<Index>( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
00484 & PacketAlignedMask, size);
00485 }
00486 }
00487
00488
00489
00490
00491 template<typename T, bool UseMemcpy> struct smart_copy_helper;
00492
00493 template<typename T> void smart_copy(const T* start, const T* end, T* target)
00494 {
00495 smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
00496 }
00497
00498 template<typename T> struct smart_copy_helper<T,true> {
00499 static inline void run(const T* start, const T* end, T* target)
00500 { memcpy(target, start, std::ptrdiff_t(end)-std::ptrdiff_t(start)); }
00501 };
00502
00503 template<typename T> struct smart_copy_helper<T,false> {
00504 static inline void run(const T* start, const T* end, T* target)
00505 { std::copy(start, end, target); }
00506 };
00507
00508
00509
00510
00511
00512
00513
00514
00515 #ifndef EIGEN_ALLOCA
00516 #if (defined __linux__)
00517 #define EIGEN_ALLOCA alloca
00518 #elif defined(_MSC_VER)
00519 #define EIGEN_ALLOCA _alloca
00520 #endif
00521 #endif
00522
00523
00524
00525 template<typename T> class aligned_stack_memory_handler
00526 {
00527 public:
00528
00529
00530
00531
00532
00533
00534 aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc)
00535 : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
00536 {
00537 if(NumTraits<T>::RequireInitialization && m_ptr)
00538 Eigen::internal::construct_elements_of_array(m_ptr, size);
00539 }
00540 ~aligned_stack_memory_handler()
00541 {
00542 if(NumTraits<T>::RequireInitialization && m_ptr)
00543 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
00544 if(m_deallocate)
00545 Eigen::internal::aligned_free(m_ptr);
00546 }
00547 protected:
00548 T* m_ptr;
00549 size_t m_size;
00550 bool m_deallocate;
00551 };
00552
00553 }
00554
00570 #ifdef EIGEN_ALLOCA
00571
00572 #ifdef __arm__
00573 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
00574 #else
00575 #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
00576 #endif
00577
00578 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
00579 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
00580 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
00581 : reinterpret_cast<TYPE*>( \
00582 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
00583 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
00584 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
00585
00586 #else
00587
00588 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
00589 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
00590 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
00591 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
00592
00593 #endif
00594
00595
00596
00597
00598
00599
00600 #if EIGEN_ALIGN
00601 #ifdef EIGEN_EXCEPTIONS
00602 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00603 void* operator new(size_t size, const std::nothrow_t&) throw() { \
00604 try { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
00605 catch (...) { return 0; } \
00606 return 0; \
00607 }
00608 #else
00609 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00610 void* operator new(size_t size, const std::nothrow_t&) throw() { \
00611 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00612 }
00613 #endif
00614
00615 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
00616 void *operator new(size_t size) { \
00617 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00618 } \
00619 void *operator new[](size_t size) { \
00620 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
00621 } \
00622 void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
00623 void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
00624 \
00625 \
00626 \
00627 static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
00628 void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
00629 \
00630 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
00631 void operator delete(void *ptr, const std::nothrow_t&) throw() { \
00632 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
00633 } \
00634 typedef void eigen_aligned_operator_new_marker_type;
00635 #else
00636 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
00637 #endif
00638
00639 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
00640 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
00641 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0)))
00642
00643
00644
00661 template<class T>
00662 class aligned_allocator
00663 {
00664 public:
00665 typedef size_t size_type;
00666 typedef std::ptrdiff_t difference_type;
00667 typedef T* pointer;
00668 typedef const T* const_pointer;
00669 typedef T& reference;
00670 typedef const T& const_reference;
00671 typedef T value_type;
00672
00673 template<class U>
00674 struct rebind
00675 {
00676 typedef aligned_allocator<U> other;
00677 };
00678
00679 pointer address( reference value ) const
00680 {
00681 return &value;
00682 }
00683
00684 const_pointer address( const_reference value ) const
00685 {
00686 return &value;
00687 }
00688
00689 aligned_allocator()
00690 {
00691 }
00692
00693 aligned_allocator( const aligned_allocator& )
00694 {
00695 }
00696
00697 template<class U>
00698 aligned_allocator( const aligned_allocator<U>& )
00699 {
00700 }
00701
00702 ~aligned_allocator()
00703 {
00704 }
00705
00706 size_type max_size() const
00707 {
00708 return (std::numeric_limits<size_type>::max)();
00709 }
00710
00711 pointer allocate( size_type num, const void* hint = 0 )
00712 {
00713 EIGEN_UNUSED_VARIABLE(hint);
00714 internal::check_size_for_overflow<T>(num);
00715 return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
00716 }
00717
00718 void construct( pointer p, const T& value )
00719 {
00720 ::new( p ) T( value );
00721 }
00722
00723
00724 #if (__cplusplus >= 201103L)
00725 template<typename... Args>
00726 void construct(pointer p, Args&&... args)
00727 {
00728 ::new(p) T(std::forward<Args>(args)...);
00729 }
00730 #endif
00731
00732 void destroy( pointer p )
00733 {
00734 p->~T();
00735 }
00736
00737 void deallocate( pointer p, size_type )
00738 {
00739 internal::aligned_free( p );
00740 }
00741
00742 bool operator!=(const aligned_allocator<T>& ) const
00743 { return false; }
00744
00745 bool operator==(const aligned_allocator<T>& ) const
00746 { return true; }
00747 };
00748
00749
00750
00751 #if !defined(EIGEN_NO_CPUID)
00752 # if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
00753 # if defined(__PIC__) && defined(__i386__)
00754
00755 # define EIGEN_CPUID(abcd,func,id) \
00756 __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
00757 # else
00758
00759 # define EIGEN_CPUID(abcd,func,id) \
00760 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
00761 # endif
00762 # elif defined(_MSC_VER)
00763 # if (_MSC_VER > 1500)
00764 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
00765 # endif
00766 # endif
00767 #endif
00768
00769 namespace internal {
00770
00771 #ifdef EIGEN_CPUID
00772
00773 inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
00774 {
00775 return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
00776 }
00777
00778 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
00779 {
00780 int abcd[4];
00781 l1 = l2 = l3 = 0;
00782 int cache_id = 0;
00783 int cache_type = 0;
00784 do {
00785 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00786 EIGEN_CPUID(abcd,0x4,cache_id);
00787 cache_type = (abcd[0] & 0x0F) >> 0;
00788 if(cache_type==1||cache_type==3)
00789 {
00790 int cache_level = (abcd[0] & 0xE0) >> 5;
00791 int ways = (abcd[1] & 0xFFC00000) >> 22;
00792 int partitions = (abcd[1] & 0x003FF000) >> 12;
00793 int line_size = (abcd[1] & 0x00000FFF) >> 0;
00794 int sets = (abcd[2]);
00795
00796 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
00797
00798 switch(cache_level)
00799 {
00800 case 1: l1 = cache_size; break;
00801 case 2: l2 = cache_size; break;
00802 case 3: l3 = cache_size; break;
00803 default: break;
00804 }
00805 }
00806 cache_id++;
00807 } while(cache_type>0 && cache_id<16);
00808 }
00809
00810 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
00811 {
00812 int abcd[4];
00813 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00814 l1 = l2 = l3 = 0;
00815 EIGEN_CPUID(abcd,0x00000002,0);
00816 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
00817 bool check_for_p2_core2 = false;
00818 for(int i=0; i<14; ++i)
00819 {
00820 switch(bytes[i])
00821 {
00822 case 0x0A: l1 = 8; break;
00823 case 0x0C: l1 = 16; break;
00824 case 0x0E: l1 = 24; break;
00825 case 0x10: l1 = 16; break;
00826 case 0x15: l1 = 16; break;
00827 case 0x2C: l1 = 32; break;
00828 case 0x30: l1 = 32; break;
00829 case 0x60: l1 = 16; break;
00830 case 0x66: l1 = 8; break;
00831 case 0x67: l1 = 16; break;
00832 case 0x68: l1 = 32; break;
00833 case 0x1A: l2 = 96; break;
00834 case 0x22: l3 = 512; break;
00835 case 0x23: l3 = 1024; break;
00836 case 0x25: l3 = 2048; break;
00837 case 0x29: l3 = 4096; break;
00838 case 0x39: l2 = 128; break;
00839 case 0x3A: l2 = 192; break;
00840 case 0x3B: l2 = 128; break;
00841 case 0x3C: l2 = 256; break;
00842 case 0x3D: l2 = 384; break;
00843 case 0x3E: l2 = 512; break;
00844 case 0x40: l2 = 0; break;
00845 case 0x41: l2 = 128; break;
00846 case 0x42: l2 = 256; break;
00847 case 0x43: l2 = 512; break;
00848 case 0x44: l2 = 1024; break;
00849 case 0x45: l2 = 2048; break;
00850 case 0x46: l3 = 4096; break;
00851 case 0x47: l3 = 8192; break;
00852 case 0x48: l2 = 3072; break;
00853 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;
00854 case 0x4A: l3 = 6144; break;
00855 case 0x4B: l3 = 8192; break;
00856 case 0x4C: l3 = 12288; break;
00857 case 0x4D: l3 = 16384; break;
00858 case 0x4E: l2 = 6144; break;
00859 case 0x78: l2 = 1024; break;
00860 case 0x79: l2 = 128; break;
00861 case 0x7A: l2 = 256; break;
00862 case 0x7B: l2 = 512; break;
00863 case 0x7C: l2 = 1024; break;
00864 case 0x7D: l2 = 2048; break;
00865 case 0x7E: l2 = 256; break;
00866 case 0x7F: l2 = 512; break;
00867 case 0x80: l2 = 512; break;
00868 case 0x81: l2 = 128; break;
00869 case 0x82: l2 = 256; break;
00870 case 0x83: l2 = 512; break;
00871 case 0x84: l2 = 1024; break;
00872 case 0x85: l2 = 2048; break;
00873 case 0x86: l2 = 512; break;
00874 case 0x87: l2 = 1024; break;
00875 case 0x88: l3 = 2048; break;
00876 case 0x89: l3 = 4096; break;
00877 case 0x8A: l3 = 8192; break;
00878 case 0x8D: l3 = 3072; break;
00879
00880 default: break;
00881 }
00882 }
00883 if(check_for_p2_core2 && l2 == l3)
00884 l3 = 0;
00885 l1 *= 1024;
00886 l2 *= 1024;
00887 l3 *= 1024;
00888 }
00889
00890 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
00891 {
00892 if(max_std_funcs>=4)
00893 queryCacheSizes_intel_direct(l1,l2,l3);
00894 else
00895 queryCacheSizes_intel_codes(l1,l2,l3);
00896 }
00897
00898 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
00899 {
00900 int abcd[4];
00901 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00902 EIGEN_CPUID(abcd,0x80000005,0);
00903 l1 = (abcd[2] >> 24) * 1024;
00904 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
00905 EIGEN_CPUID(abcd,0x80000006,0);
00906 l2 = (abcd[2] >> 16) * 1024;
00907 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
00908 }
00909 #endif
00910
00913 inline void queryCacheSizes(int& l1, int& l2, int& l3)
00914 {
00915 #ifdef EIGEN_CPUID
00916 int abcd[4];
00917
00918
00919 EIGEN_CPUID(abcd,0x0,0);
00920 int max_std_funcs = abcd[1];
00921 if(cpuid_is_vendor(abcd,"GenuineIntel"))
00922 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
00923 else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
00924 queryCacheSizes_amd(l1,l2,l3);
00925 else
00926
00927 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940 #else
00941 l1 = l2 = l3 = -1;
00942 #endif
00943 }
00944
00947 inline int queryL1CacheSize()
00948 {
00949 int l1(-1), l2, l3;
00950 queryCacheSizes(l1,l2,l3);
00951 return l1;
00952 }
00953
00956 inline int queryTopLevelCacheSize()
00957 {
00958 int l1, l2(-1), l3(-1);
00959 queryCacheSizes(l1,l2,l3);
00960 return (std::max)(l2,l3);
00961 }
00962
00963 }
00964
00965 }
00966
00967 #endif // EIGEN_MEMORY_H