SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Shashwat Lal Das 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 #ifndef _STREAMING_SPARSEFEATURES__H__ 00011 #define _STREAMING_SPARSEFEATURES__H__ 00012 00013 #include <shogun/lib/common.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/features/StreamingDotFeatures.h> 00016 #include <shogun/lib/DataType.h> 00017 #include <shogun/io/InputParser.h> 00018 00019 namespace shogun 00020 { 00043 template <class T> class CStreamingSparseFeatures : public CStreamingDotFeatures 00044 { 00045 public: 00046 00054 CStreamingSparseFeatures(); 00055 00064 CStreamingSparseFeatures(CStreamingFile* file, 00065 bool is_labelled, 00066 int32_t size); 00067 00073 ~CStreamingSparseFeatures(); 00074 00084 virtual void set_vector_reader(); 00085 00095 virtual void set_vector_and_label_reader(); 00096 00102 virtual void start_parser(); 00103 00109 virtual void end_parser(); 00110 00119 virtual bool get_next_example(); 00120 00127 T get_feature(int32_t index); 00128 00134 SGSparseVector<T> get_vector(); 00135 00143 virtual float64_t get_label(); 00144 00151 virtual void release_example(); 00152 00157 virtual void reset_stream(); 00158 00170 int32_t set_num_features(int32_t num); 00171 00179 virtual int32_t get_dim_feature_space() const; 00180 00189 virtual void expand_if_required(float32_t*& vec, int32_t &len); 00190 00199 virtual void expand_if_required(float64_t*& vec, int32_t &len); 00200 00211 virtual float32_t dot(CStreamingDotFeatures *df); 00212 00223 static T sparse_dot(T alpha, SGSparseVectorEntry<T>* avec, int32_t alen, SGSparseVectorEntry<T>* bvec, int32_t blen); 00224 00234 T dense_dot(T alpha, T* vec, int32_t dim, T b); 00235 00244 virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len); 00245 00254 virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len); 00255 00265 virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00266 00276 virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val=false); 00277 00283 int64_t get_num_nonzero_entries(); 00284 00290 float32_t compute_squared(); 00291 00297 void sort_features(); 00298 00304 virtual int32_t get_num_features(); 00305 00311 virtual int32_t get_nnz_features_for_vector(); 00312 00318 virtual EFeatureType get_feature_type(); 00319 00325 virtual EFeatureClass get_feature_class(); 00326 00332 virtual CFeatures* duplicate() const; 00333 00339 inline virtual const char* get_name() const { return "StreamingSparseFeatures"; } 00340 00346 virtual int32_t get_num_vectors() const; 00347 00353 virtual int32_t get_size(); 00354 00355 private: 00360 virtual void init(); 00361 00369 virtual void init(CStreamingFile *file, bool is_labelled, int32_t size); 00370 00371 protected: 00373 CInputParser< SGSparseVectorEntry<T> > parser; 00374 00376 CStreamingFile* working_file; 00377 00379 SGSparseVector<T> current_sgvector; 00380 00382 SGSparseVectorEntry<T>* current_vector; 00383 00385 index_t current_vec_index; 00386 00388 float64_t current_label; 00389 00391 int32_t current_length; 00392 00394 int32_t current_num_features; 00395 }; 00396 00397 } 00398 #endif // _STREAMING_SPARSEFEATURES__H__