SHOGUN
v1.1.0
|
00001 /* 00002 * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights 00003 * embodied in the content of this file are licensed under the BSD 00004 * (revised) open source license. 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * Written (W) 2011 Shashwat Lal Das 00012 * Adaptation of Vowpal Wabbit v5.1. 00013 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society. 00014 */ 00015 00016 #ifndef _STREAMING_VWFEATURES__H__ 00017 #define _STREAMING_VWFEATURES__H__ 00018 00019 #include <shogun/lib/common.h> 00020 #include <shogun/lib/DataType.h> 00021 #include <shogun/mathematics/Math.h> 00022 00023 #include <shogun/io/InputParser.h> 00024 #include <shogun/io/StreamingVwFile.h> 00025 #include <shogun/io/StreamingVwCacheFile.h> 00026 #include <shogun/features/StreamingDotFeatures.h> 00027 #include <shogun/classifier/vw/vw_common.h> 00028 #include <shogun/classifier/vw/vw_math.h> 00029 00030 namespace shogun 00031 { 00039 class CStreamingVwFeatures : public CStreamingDotFeatures 00040 { 00041 public: 00042 00050 CStreamingVwFeatures(); 00051 00060 CStreamingVwFeatures(CStreamingVwFile* file, 00061 bool is_labelled, int32_t size); 00062 00071 CStreamingVwFeatures(CStreamingVwCacheFile* file, 00072 bool is_labelled, int32_t size); 00073 00079 ~CStreamingVwFeatures(); 00080 00086 CFeatures* duplicate() const; 00087 00097 virtual void set_vector_reader(); 00098 00108 virtual void set_vector_and_label_reader(); 00109 00115 virtual void start_parser(); 00116 00122 virtual void end_parser(); 00123 00128 virtual void reset_stream(); 00129 00134 virtual CVwEnvironment* get_env(); 00135 00141 virtual void set_env(CVwEnvironment* vw_env); 00142 00151 virtual bool get_next_example(); 00152 00158 virtual VwExample* get_example(); 00159 00167 virtual float64_t get_label(); 00168 00175 virtual void release_example(); 00176 00185 virtual void expand_if_required(float32_t*& vec, int32_t& len); 00186 00195 virtual void expand_if_required(float64_t*& vec, int32_t& len); 00196 00204 virtual int32_t get_dim_feature_space() const; 00205 00214 virtual float32_t real_weight(float32_t w, float32_t gravity); 00215 00226 virtual float32_t dot(CStreamingDotFeatures *df); 00227 00236 virtual float32_t dense_dot(VwExample* &ex, const float32_t* vec2); 00237 00247 virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len); 00248 00258 virtual float32_t dense_dot(SGSparseVector<float32_t>* vec1, const float32_t* vec2); 00259 00270 virtual float32_t dense_dot_truncated(const float32_t* vec2, VwExample* &ex, float32_t gravity); 00271 00282 virtual void add_to_dense_vec(float32_t alpha, VwExample* &ex, 00283 float32_t* vec2, int32_t vec2_len, bool abs_val = false); 00284 00294 virtual void add_to_dense_vec(float32_t alpha, 00295 float32_t* vec2, int32_t vec2_len, bool abs_val = false); 00296 00301 virtual int32_t get_nnz_features_for_vector(); 00302 00308 virtual int32_t get_num_features(); 00309 00315 virtual inline EFeatureType get_feature_type(); 00316 00322 virtual EFeatureClass get_feature_class(); 00323 00329 inline virtual const char* get_name() const { return "StreamingVwFeatures"; } 00330 00336 inline virtual int32_t get_num_vectors() const; 00337 00343 virtual int32_t get_size(); 00344 00345 private: 00350 virtual void init(); 00351 00359 virtual void init(CStreamingVwFile *file, bool is_labelled, int32_t size); 00360 00368 virtual void init(CStreamingVwCacheFile *file, bool is_labelled, int32_t size); 00369 00376 virtual void setup_example(VwExample* ae); 00377 00378 protected: 00379 00381 CInputParser<VwExample> parser; 00382 00384 vw_size_t example_count; 00385 00387 float64_t current_label; 00388 00390 int32_t current_length; 00391 00393 CVwEnvironment* env; 00394 00396 VwExample* current_example; 00397 }; 00398 } 00399 #endif // _STREAMING_VWFEATURES__H__