SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Subset support written (W) 2011 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 * Copyright (C) 2010 Berlin Institute of Technology 00012 */ 00013 00014 #ifndef _SPARSEFEATURES__H__ 00015 #define _SPARSEFEATURES__H__ 00016 00017 #include <shogun/lib/common.h> 00018 #include <shogun/lib/Cache.h> 00019 #include <shogun/io/File.h> 00020 00021 #include <shogun/features/Labels.h> 00022 #include <shogun/features/Features.h> 00023 #include <shogun/features/DotFeatures.h> 00024 #include <shogun/features/SimpleFeatures.h> 00025 00026 namespace shogun 00027 { 00028 00029 class CFile; 00030 class CLabels; 00031 class CFeatures; 00032 class CDotFeatures; 00033 template <class ST> class CSimpleFeatures; 00034 00052 template <class ST> class CSparseFeatures : public CDotFeatures 00053 { 00054 public: 00059 CSparseFeatures(int32_t size=0); 00060 00069 CSparseFeatures(SGSparseVector<ST>* src, 00070 int32_t num_feat, int32_t num_vec,bool copy=false); 00071 00077 CSparseFeatures(SGSparseMatrix<ST> sparse); 00078 00084 CSparseFeatures(SGMatrix<ST> dense); 00085 00087 CSparseFeatures(const CSparseFeatures & orig); 00088 00093 CSparseFeatures(CFile* loader); 00094 00096 virtual ~CSparseFeatures(); 00097 00102 void free_sparse_feature_matrix(); 00103 00108 void free_sparse_features(); 00109 00114 virtual CFeatures* duplicate() const; 00115 00125 ST get_feature(int32_t num, int32_t index); 00126 00135 ST* get_full_feature_vector(int32_t num, int32_t& len); 00136 00142 SGVector<ST> get_full_feature_vector(int32_t num); 00143 00149 virtual int32_t get_nnz_features_for_vector(int32_t num); 00150 00160 SGSparseVector<ST> get_sparse_feature_vector(int32_t num); 00161 00172 static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen, 00173 SGSparseVectorEntry<ST>* bvec, int32_t blen); 00174 00187 ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b); 00188 00200 void add_to_dense_vec(float64_t alpha, int32_t num, 00201 float64_t* vec, int32_t dim, bool abs_val=false); 00202 00210 void free_sparse_feature_vector(SGSparseVector<ST> vec, int32_t num); 00211 00221 SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec); 00222 00230 SGSparseMatrix<ST> get_sparse_feature_matrix(); 00231 00237 static void clean_tsparse(SGSparseVector<ST>* sfm, int32_t num_vec); 00238 00245 CSparseFeatures<ST>* get_transposed(); 00246 00258 SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec); 00259 00267 void set_sparse_feature_matrix(SGSparseMatrix<ST> sm); 00268 00275 SGMatrix<ST> get_full_feature_matrix(); 00276 00286 virtual bool set_full_feature_matrix(SGMatrix<ST> full); 00287 00295 virtual bool apply_preprocessor(bool force_preprocessing=false); 00296 00301 virtual int32_t get_size(); 00302 00310 bool obtain_from_simple(CSimpleFeatures<ST>* sf); 00311 00316 virtual int32_t get_num_vectors() const; 00317 00322 int32_t get_num_features(); 00323 00335 int32_t set_num_features(int32_t num); 00336 00341 virtual EFeatureClass get_feature_class(); 00342 00347 virtual EFeatureType get_feature_type(); 00348 00356 void free_feature_vector(SGSparseVector<ST> vec, int32_t num); 00357 00362 int64_t get_num_nonzero_entries(); 00363 00371 float64_t* compute_squared(float64_t* sq); 00372 00387 float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs, 00388 float64_t* sq_lhs, int32_t idx_a, 00389 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs, 00390 int32_t idx_b); 00391 00398 void load(CFile* loader); 00399 00406 void save(CFile* writer); 00407 00417 CLabels* load_svmlight_file(char* fname, bool do_sort_features=true); 00418 00424 void sort_features(); 00425 00434 bool write_svmlight_file(char* fname, CLabels* label); 00435 00443 virtual int32_t get_dim_feature_space() const; 00444 00454 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2); 00455 00464 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len); 00465 00466 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00467 00468 struct sparse_feature_iterator 00469 { 00471 SGSparseVector<ST> sv; 00472 00474 int32_t index; 00475 00477 void print_info() 00478 { 00479 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n", 00480 sv.features, sv.vec_index, sv.num_feat_entries, index); 00481 } 00482 }; 00483 #endif 00484 00496 virtual void* get_feature_iterator(int32_t vector_index); 00497 00508 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator); 00509 00515 virtual void free_feature_iterator(void* iterator); 00516 00523 virtual CFeatures* copy_subset(SGVector<index_t> indices); 00524 00526 inline virtual const char* get_name() const { return "SparseFeatures"; } 00527 00528 protected: 00539 virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num, 00540 int32_t& len, SGSparseVectorEntry<ST>* target=NULL); 00541 00542 private: 00543 void init(); 00544 00545 protected: 00546 00548 int32_t num_vectors; 00549 00551 int32_t num_features; 00552 00554 SGSparseVector<ST>* sparse_feature_matrix; 00555 00557 CCache< SGSparseVectorEntry<ST> >* feature_cache; 00558 }; 00559 } 00560 #endif /* _SPARSEFEATURES__H__ */