SHOGUN
v1.1.0
|
00001 #include <shogun/mathematics/Math.h> 00002 #include <shogun/features/StreamingSimpleFeatures.h> 00003 #include <shogun/io/StreamingFileFromSimpleFeatures.h> 00004 00005 namespace shogun 00006 { 00007 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures() : CStreamingDotFeatures() 00008 { 00009 set_read_functions(); 00010 init(); 00011 parser.set_free_vector_after_release(false); 00012 } 00013 00014 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CStreamingFile* file, 00015 bool is_labelled, 00016 int32_t size) 00017 : CStreamingDotFeatures() 00018 { 00019 init(file, is_labelled, size); 00020 set_read_functions(); 00021 parser.set_free_vector_after_release(false); 00022 } 00023 00024 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CSimpleFeatures<T>* simple_features, 00025 float64_t* lab) 00026 : CStreamingDotFeatures() 00027 { 00028 CStreamingFileFromSimpleFeatures<T>* file; 00029 bool is_labelled; 00030 int32_t size = 1024; 00031 00032 if (lab) 00033 { 00034 is_labelled = true; 00035 file = new CStreamingFileFromSimpleFeatures<T>(simple_features, lab); 00036 } 00037 else 00038 { 00039 is_labelled = false; 00040 file = new CStreamingFileFromSimpleFeatures<T>(simple_features); 00041 } 00042 00043 SG_REF(file); 00044 00045 init(file, is_labelled, size); 00046 set_read_functions(); 00047 parser.set_free_vector_after_release(false); 00048 parser.set_free_vectors_on_destruct(false); 00049 seekable=true; 00050 } 00051 00052 template <class T> CStreamingSimpleFeatures<T>::~CStreamingSimpleFeatures() 00053 { 00054 parser.end_parser(); 00055 } 00056 00057 template <class T> void CStreamingSimpleFeatures<T>::reset_stream() 00058 { 00059 if (seekable) 00060 { 00061 ((CStreamingFileFromSimpleFeatures<T>*) working_file)->reset_stream(); 00062 parser.exit_parser(); 00063 parser.init(working_file, has_labels, 1); 00064 parser.set_free_vector_after_release(false); 00065 parser.start_parser(); 00066 } 00067 } 00068 00069 template <class T> float32_t CStreamingSimpleFeatures<T>::dense_dot(const float32_t* vec2, int32_t vec2_len) 00070 { 00071 ASSERT(vec2_len==current_length); 00072 float32_t result=0; 00073 00074 for (int32_t i=0; i<current_length; i++) 00075 result+=current_vector[i]*vec2[i]; 00076 00077 return result; 00078 } 00079 00080 template <class T> float64_t CStreamingSimpleFeatures<T>::dense_dot(const float64_t* vec2, int32_t vec2_len) 00081 { 00082 ASSERT(vec2_len==current_length); 00083 float64_t result=0; 00084 00085 for (int32_t i=0; i<current_length; i++) 00086 result+=current_vector[i]*vec2[i]; 00087 00088 return result; 00089 } 00090 00091 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len , bool abs_val) 00092 { 00093 ASSERT(vec2_len==current_length); 00094 00095 if (abs_val) 00096 { 00097 for (int32_t i=0; i<current_length; i++) 00098 vec2[i]+=alpha*CMath::abs(current_vector[i]); 00099 } 00100 else 00101 { 00102 for (int32_t i=0; i<current_length; i++) 00103 vec2[i]+=alpha*current_vector[i]; 00104 } 00105 } 00106 00107 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len , bool abs_val) 00108 { 00109 ASSERT(vec2_len==current_length); 00110 00111 if (abs_val) 00112 { 00113 for (int32_t i=0; i<current_length; i++) 00114 vec2[i]+=alpha*CMath::abs(current_vector[i]); 00115 } 00116 else 00117 { 00118 for (int32_t i=0; i<current_length; i++) 00119 vec2[i]+=alpha*current_vector[i]; 00120 } 00121 } 00122 00123 template <class T> int32_t CStreamingSimpleFeatures<T>::get_nnz_features_for_vector() 00124 { 00125 return current_length; 00126 } 00127 00128 template <class T> CFeatures* CStreamingSimpleFeatures<T>::duplicate() const 00129 { 00130 return new CStreamingSimpleFeatures<T>(*this); 00131 } 00132 00133 template <class T> int32_t CStreamingSimpleFeatures<T>::get_num_vectors() const 00134 { 00135 if (current_vector) 00136 return 1; 00137 return 0; 00138 } 00139 00140 template <class T> int32_t CStreamingSimpleFeatures<T>::get_size() 00141 { 00142 return sizeof(T); 00143 } 00144 00145 template <class T> 00146 void CStreamingSimpleFeatures<T>::set_vector_reader() 00147 { 00148 parser.set_read_vector(&CStreamingFile::get_vector); 00149 } 00150 00151 template <class T> 00152 void CStreamingSimpleFeatures<T>::set_vector_and_label_reader() 00153 { 00154 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label); 00155 } 00156 00157 #define GET_FEATURE_TYPE(f_type, sg_type) \ 00158 template<> EFeatureType CStreamingSimpleFeatures<sg_type>::get_feature_type() \ 00159 { \ 00160 return f_type; \ 00161 } 00162 00163 GET_FEATURE_TYPE(F_BOOL, bool) 00164 GET_FEATURE_TYPE(F_CHAR, char) 00165 GET_FEATURE_TYPE(F_BYTE, uint8_t) 00166 GET_FEATURE_TYPE(F_BYTE, int8_t) 00167 GET_FEATURE_TYPE(F_SHORT, int16_t) 00168 GET_FEATURE_TYPE(F_WORD, uint16_t) 00169 GET_FEATURE_TYPE(F_INT, int32_t) 00170 GET_FEATURE_TYPE(F_UINT, uint32_t) 00171 GET_FEATURE_TYPE(F_LONG, int64_t) 00172 GET_FEATURE_TYPE(F_ULONG, uint64_t) 00173 GET_FEATURE_TYPE(F_SHORTREAL, float32_t) 00174 GET_FEATURE_TYPE(F_DREAL, float64_t) 00175 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t) 00176 #undef GET_FEATURE_TYPE 00177 00178 00179 template <class T> 00180 void CStreamingSimpleFeatures<T>::init() 00181 { 00182 working_file=NULL; 00183 current_vector=NULL; 00184 seekable=false; 00185 current_length=-1; 00186 } 00187 00188 template <class T> 00189 void CStreamingSimpleFeatures<T>::init(CStreamingFile* file, 00190 bool is_labelled, 00191 int32_t size) 00192 { 00193 init(); 00194 has_labels = is_labelled; 00195 working_file = file; 00196 parser.init(file, is_labelled, size); 00197 seekable=false; 00198 } 00199 00200 template <class T> 00201 void CStreamingSimpleFeatures<T>::start_parser() 00202 { 00203 if (!parser.is_running()) 00204 parser.start_parser(); 00205 } 00206 00207 template <class T> 00208 void CStreamingSimpleFeatures<T>::end_parser() 00209 { 00210 parser.end_parser(); 00211 } 00212 00213 template <class T> 00214 bool CStreamingSimpleFeatures<T>::get_next_example() 00215 { 00216 bool ret_value; 00217 ret_value = (bool) parser.get_next_example(current_vector, 00218 current_length, 00219 current_label); 00220 00221 return ret_value; 00222 } 00223 00224 template <class T> 00225 SGVector<T> CStreamingSimpleFeatures<T>::get_vector() 00226 { 00227 current_sgvector.vector=current_vector; 00228 current_sgvector.vlen=current_length; 00229 00230 return current_sgvector; 00231 } 00232 00233 template <class T> 00234 float64_t CStreamingSimpleFeatures<T>::get_label() 00235 { 00236 ASSERT(has_labels); 00237 00238 return current_label; 00239 } 00240 00241 template <class T> 00242 void CStreamingSimpleFeatures<T>::release_example() 00243 { 00244 parser.finalize_example(); 00245 } 00246 00247 template <class T> 00248 int32_t CStreamingSimpleFeatures<T>::get_dim_feature_space() const 00249 { 00250 return current_length; 00251 } 00252 00253 template <class T> 00254 float32_t CStreamingSimpleFeatures<T>::dot(CStreamingDotFeatures* df) 00255 { 00256 ASSERT(df); 00257 ASSERT(df->get_feature_type() == get_feature_type()); 00258 ASSERT(df->get_feature_class() == get_feature_class()); 00259 CStreamingSimpleFeatures<T>* sf = (CStreamingSimpleFeatures<T>*) df; 00260 00261 SGVector<T> other_vector=sf->get_vector(); 00262 00263 float32_t result = CMath::dot(current_vector, other_vector.vector, current_length); 00264 00265 return result; 00266 } 00267 00268 template <class T> 00269 float32_t CStreamingSimpleFeatures<T>::dot(SGVector<T> sgvec1) 00270 { 00271 int32_t len1; 00272 len1=sgvec1.vlen; 00273 00274 if (len1 != current_length) 00275 SG_ERROR("Lengths %d and %d not equal while computing dot product!\n", len1, current_length); 00276 00277 float32_t result=CMath::dot(current_vector, sgvec1.vector, len1); 00278 return result; 00279 } 00280 00281 template <class T> 00282 int32_t CStreamingSimpleFeatures<T>::get_num_features() 00283 { 00284 return current_length; 00285 } 00286 00287 template <class T> 00288 EFeatureClass CStreamingSimpleFeatures<T>::get_feature_class() 00289 { 00290 return C_STREAMING_SIMPLE; 00291 } 00292 00293 template class CStreamingSimpleFeatures<bool>; 00294 template class CStreamingSimpleFeatures<char>; 00295 template class CStreamingSimpleFeatures<int8_t>; 00296 template class CStreamingSimpleFeatures<uint8_t>; 00297 template class CStreamingSimpleFeatures<int16_t>; 00298 template class CStreamingSimpleFeatures<uint16_t>; 00299 template class CStreamingSimpleFeatures<int32_t>; 00300 template class CStreamingSimpleFeatures<uint32_t>; 00301 template class CStreamingSimpleFeatures<int64_t>; 00302 template class CStreamingSimpleFeatures<uint64_t>; 00303 template class CStreamingSimpleFeatures<float32_t>; 00304 template class CStreamingSimpleFeatures<float64_t>; 00305 template class CStreamingSimpleFeatures<floatmax_t>; 00306 }