SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 00011 #ifndef _HASHEDWDFEATURES_H___ 00012 #define _HASHEDWDFEATURES_H___ 00013 00014 #include <shogun/lib/common.h> 00015 #include <shogun/features/DotFeatures.h> 00016 #include <shogun/features/StringFeatures.h> 00017 #include <shogun/lib/Hash.h> 00018 00019 namespace shogun 00020 { 00021 template<class ST> class CStringFeatures; 00022 00028 class CHashedWDFeatures: public CDotFeatures 00029 { 00030 public: 00032 CHashedWDFeatures(); 00033 00042 CHashedWDFeatures(CStringFeatures<uint8_t>* str, int32_t start_order, 00043 int32_t order, int32_t from_order, int32_t hash_bits=12); 00044 00046 CHashedWDFeatures(const CHashedWDFeatures & orig); 00047 00049 virtual ~CHashedWDFeatures(); 00050 00058 inline virtual int32_t get_dim_feature_space() const 00059 { 00060 return w_dim; 00061 } 00062 00070 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2); 00071 00078 virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, 00079 int32_t vec2_len); 00080 00089 virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, 00090 float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00091 00097 virtual int32_t get_nnz_features_for_vector(int32_t num); 00098 00099 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00100 00101 struct hashed_wd_feature_iterator 00102 { 00104 uint16_t* vec; 00106 int32_t vidx; 00108 int32_t vlen; 00110 bool vfree; 00111 00113 int32_t index; 00114 00115 }; 00116 #endif 00117 00127 virtual void* get_feature_iterator(int32_t vector_index); 00128 00139 virtual bool get_next_feature(int32_t& index, float64_t& value, 00140 void* iterator); 00141 00147 virtual void free_feature_iterator(void* iterator); 00148 00153 virtual CFeatures* duplicate() const; 00154 00159 inline virtual EFeatureType get_feature_type() 00160 { 00161 return F_UNKNOWN; 00162 } 00163 00168 inline virtual EFeatureClass get_feature_class() 00169 { 00170 return C_WD; 00171 } 00172 00173 inline virtual int32_t get_num_vectors() const 00174 { 00175 return num_strings; 00176 } 00177 00178 inline virtual int32_t get_size() 00179 { 00180 return sizeof(float64_t); 00181 } 00182 00185 void set_normalization_const(float64_t n=0); 00186 00188 inline float64_t get_normalization_const() 00189 { 00190 return normalization_const; 00191 } 00192 00194 inline virtual const char* get_name() const 00195 { 00196 return "HashedWDFeatures"; 00197 } 00198 00199 protected: 00200 00202 void set_wd_weights(); 00203 00204 protected: 00206 CStringFeatures<uint8_t>* strings; 00207 00209 int32_t degree; 00211 int32_t start_degree; 00213 int32_t from_degree; 00215 int32_t string_length; 00217 int32_t num_strings; 00219 int32_t alphabet_size; 00221 int32_t w_dim; 00223 int32_t partial_w_dim; 00225 float64_t* wd_weights; 00227 uint32_t mask; 00229 int32_t m_hash_bits; 00230 00232 float64_t normalization_const; 00233 }; 00234 } 00235 #endif // _HASHEDWDFEATURES_H___