SHOGUN
v1.1.0
|
00001 /* 00002 * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights 00003 * embodied in the content of this file are licensed under the BSD 00004 * (revised) open source license. 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * Written (W) 2011 Shashwat Lal Das 00012 * Adaptation of Vowpal Wabbit v5.1. 00013 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society. 00014 */ 00015 00016 #include <shogun/classifier/vw/VowpalWabbit.h> 00017 00018 using namespace shogun; 00019 00020 CVowpalWabbit::CVowpalWabbit() 00021 : COnlineLinearMachine() 00022 { 00023 reg=NULL; 00024 learner=NULL; 00025 init(NULL); 00026 } 00027 00028 CVowpalWabbit::CVowpalWabbit(CStreamingVwFeatures* feat) 00029 : COnlineLinearMachine() 00030 { 00031 reg=NULL; 00032 learner=NULL; 00033 init(feat); 00034 } 00035 00036 CVowpalWabbit::~CVowpalWabbit() 00037 { 00038 SG_UNREF(env); 00039 SG_UNREF(reg); 00040 SG_UNREF(learner); 00041 } 00042 00043 void CVowpalWabbit::reinitialize_weights() 00044 { 00045 if (reg->weight_vectors) 00046 { 00047 if (reg->weight_vectors[0]) 00048 SG_FREE(reg->weight_vectors[0]); 00049 SG_FREE(reg->weight_vectors); 00050 } 00051 00052 reg->init(env); 00053 w = reg->weight_vectors[0]; 00054 } 00055 00056 void CVowpalWabbit::set_adaptive(bool adaptive_learning) 00057 { 00058 if (adaptive_learning) 00059 { 00060 env->adaptive = true; 00061 env->set_stride(2); 00062 env->power_t = 0.; 00063 reinitialize_weights(); 00064 } 00065 else 00066 env->adaptive = false; 00067 } 00068 00069 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive) 00070 { 00071 if (exact_adaptive) 00072 { 00073 set_adaptive(true); 00074 env->exact_adaptive_norm = true; 00075 } 00076 else 00077 env->exact_adaptive_norm = false; 00078 } 00079 00080 void CVowpalWabbit::load_regressor(char* file_name) 00081 { 00082 reg->load_regressor(file_name); 00083 w = reg->weight_vectors[0]; 00084 w_dim = 1 << env->num_bits; 00085 } 00086 00087 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text) 00088 { 00089 reg_name = file_name; 00090 reg_dump_text = is_text; 00091 } 00092 00093 void CVowpalWabbit::set_prediction_out(char* file_name) 00094 { 00095 save_predictions = true; 00096 prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666); 00097 if (prediction_fd < 0) 00098 SG_SERROR("Unable to open prediction file %s for writing!\n", file_name); 00099 } 00100 00101 void CVowpalWabbit::add_quadratic_pair(char* pair) 00102 { 00103 env->pairs.push_back(pair); 00104 } 00105 00106 bool CVowpalWabbit::train_machine(CFeatures* feat) 00107 { 00108 ASSERT(features || feat); 00109 if (feat && (features != (CStreamingVwFeatures*) feat)) 00110 { 00111 SG_UNREF(features); 00112 init((CStreamingVwFeatures*) feat); 00113 } 00114 00115 set_learner(); 00116 00117 VwExample* example = NULL; 00118 vw_size_t current_pass = 0; 00119 00120 const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n"; 00121 00122 if (!quiet) 00123 { 00124 SG_SPRINT(header_fmt, 00125 "average", "since", "example", "example", 00126 "current", "current", "current"); 00127 SG_SPRINT(header_fmt, 00128 "loss", "last", "counter", "weight", "label", "predict", "features"); 00129 } 00130 00131 features->start_parser(); 00132 while (env->passes_complete < env->num_passes) 00133 { 00134 while (features->get_next_example()) 00135 { 00136 example = features->get_example(); 00137 00138 // Check if we shouldn't train (generally used for cache creation) 00139 if (!no_training) 00140 { 00141 if (example->pass != current_pass) 00142 { 00143 env->eta *= env->eta_decay_rate; 00144 current_pass = example->pass; 00145 } 00146 00147 predict_and_finalize(example); 00148 00149 learner->train(example, example->eta_round); 00150 example->eta_round = 0.; 00151 00152 output_example(example); 00153 } 00154 00155 features->release_example(); 00156 } 00157 env->passes_complete++; 00158 if (env->passes_complete < env->num_passes) 00159 features->reset_stream(); 00160 } 00161 features->end_parser(); 00162 00163 if (env->l1_regularization > 0.) 00164 { 00165 uint32_t length = 1 << env->num_bits; 00166 vw_size_t stride = env->stride; 00167 float32_t gravity = env->l1_regularization * env->update_sum; 00168 for (uint32_t i = 0; i < length; i++) 00169 reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity); 00170 } 00171 00172 if (reg_name != NULL) 00173 reg->dump_regressor(reg_name, reg_dump_text); 00174 00175 return true; 00176 } 00177 00178 float32_t CVowpalWabbit::predict_and_finalize(VwExample* ex) 00179 { 00180 float32_t prediction; 00181 if (env->l1_regularization != 0.) 00182 prediction = inline_l1_predict(ex); 00183 else 00184 prediction = inline_predict(ex); 00185 00186 ex->final_prediction = 0; 00187 ex->final_prediction += prediction; 00188 ex->final_prediction = finalize_prediction(ex->final_prediction); 00189 float32_t t = ex->example_t; 00190 00191 if (ex->ld->label != FLT_MAX) 00192 { 00193 ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight; 00194 float64_t update = 0.; 00195 if (env->adaptive && env->exact_adaptive_norm) 00196 { 00197 float32_t sum_abs_x = 0.; 00198 float32_t exact_norm = compute_exact_norm(ex, sum_abs_x); 00199 update = (env->eta * exact_norm)/sum_abs_x; 00200 env->update_sum += update; 00201 ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm); 00202 } 00203 else 00204 { 00205 update = (env->eta)/pow(t, env->power_t) * ex->ld->weight; 00206 ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq); 00207 } 00208 env->update_sum += update; 00209 } 00210 00211 return prediction; 00212 } 00213 00214 void CVowpalWabbit::init(CStreamingVwFeatures* feat) 00215 { 00216 features = feat; 00217 env = feat->get_env(); 00218 reg = new CVwRegressor(env); 00219 SG_REF(env); 00220 SG_REF(reg); 00221 00222 quiet = true; 00223 no_training = false; 00224 dump_interval = exp(1.); 00225 sum_loss_since_last_dump = 0.; 00226 reg_name = NULL; 00227 reg_dump_text = true; 00228 save_predictions = false; 00229 prediction_fd = -1; 00230 00231 w = reg->weight_vectors[0]; 00232 w_dim = 1 << env->num_bits; 00233 bias = 0.; 00234 } 00235 00236 void CVowpalWabbit::set_learner() 00237 { 00238 if (env->adaptive) 00239 learner = new CVwAdaptiveLearner(reg, env); 00240 else 00241 learner = new CVwNonAdaptiveLearner(reg, env); 00242 SG_REF(learner); 00243 } 00244 00245 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex) 00246 { 00247 vw_size_t thread_num = 0; 00248 00249 float32_t prediction = ex->ld->get_initial(); 00250 00251 float32_t* weights = reg->weight_vectors[thread_num]; 00252 vw_size_t thread_mask = env->thread_mask; 00253 00254 prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum); 00255 00256 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00257 { 00258 char* i = env->pairs.get_element(k); 00259 00260 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00261 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00262 temp.end = ex->atomics[(int32_t)(i[0])].end; 00263 for (; temp.begin != temp.end; temp.begin++) 00264 prediction += one_pf_quad_predict_trunc(weights, *temp.begin, 00265 ex->atomics[(int32_t)(i[1])], thread_mask, 00266 env->l1_regularization * env->update_sum); 00267 } 00268 00269 return prediction; 00270 } 00271 00272 float32_t CVowpalWabbit::inline_predict(VwExample* &ex) 00273 { 00274 vw_size_t thread_num = 0; 00275 float32_t prediction = ex->ld->initial; 00276 00277 float32_t* weights = reg->weight_vectors[thread_num]; 00278 vw_size_t thread_mask = env->thread_mask; 00279 prediction += features->dense_dot(weights, 0); 00280 00281 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00282 { 00283 char* i = env->pairs.get_element(k); 00284 00285 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00286 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00287 temp.end = ex->atomics[(int32_t)(i[0])].end; 00288 for (; temp.begin != temp.end; temp.begin++) 00289 prediction += one_pf_quad_predict(weights, *temp.begin, 00290 ex->atomics[(int32_t)(i[1])], 00291 thread_mask); 00292 } 00293 00294 return prediction; 00295 } 00296 00297 float32_t CVowpalWabbit::finalize_prediction(float32_t ret) 00298 { 00299 if (isnan(ret)) 00300 return 0.5; 00301 if (ret > env->max_label) 00302 return env->max_label; 00303 if (ret < env->min_label) 00304 return env->min_label; 00305 00306 return ret; 00307 } 00308 00309 void CVowpalWabbit::output_example(VwExample* &example) 00310 { 00311 if (!quiet) 00312 { 00313 sum_loss_since_last_dump += example->loss; 00314 if (env->weighted_examples + example->ld->weight > dump_interval) 00315 { 00316 print_update(example); 00317 dump_interval *= 2; 00318 } 00319 } 00320 00321 if (save_predictions) 00322 { 00323 float32_t wt = 0.; 00324 if (reg->weight_vectors) 00325 wt = reg->weight_vectors[0][0]; 00326 00327 output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag); 00328 } 00329 } 00330 00331 void CVowpalWabbit::print_update(VwExample* &ex) 00332 { 00333 SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f %8.4f %8.4f %8lu\n", 00334 (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight), 00335 sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples), 00336 env->example_number + 1, 00337 env->weighted_examples + ex->ld->weight, 00338 ex->ld->label, 00339 ex->final_prediction, 00340 (long unsigned int)ex->num_features); 00341 sum_loss_since_last_dump = 0.0; 00342 old_weighted_examples = env->weighted_examples + ex->ld->weight; 00343 } 00344 00345 00346 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag) 00347 { 00348 if (f >= 0) 00349 { 00350 char temp[30]; 00351 int32_t num = sprintf(temp, "%f", res); 00352 ssize_t t; 00353 t = write(f, temp, num); 00354 if (t != num) 00355 SG_SERROR("Write error!\n"); 00356 00357 if (tag.begin != tag.end) 00358 { 00359 temp[0] = ' '; 00360 t = write(f, temp, 1); 00361 if (t != 1) 00362 SG_SERROR("Write error!\n"); 00363 00364 t = write(f, tag.begin, sizeof(char)*tag.index()); 00365 if (t != (ssize_t) (sizeof(char)*tag.index())) 00366 SG_SERROR("Write error!\n"); 00367 } 00368 00369 temp[0] = '\n'; 00370 t = write(f, temp, 1); 00371 if (t != 1) 00372 SG_SERROR("Write error!\n"); 00373 } 00374 } 00375 00376 void CVowpalWabbit::set_verbose(bool verbose) 00377 { 00378 quiet=verbose==false; 00379 } 00380 00381 00382 float32_t CVowpalWabbit::compute_exact_norm(VwExample* &ex, float32_t& sum_abs_x) 00383 { 00384 // We must traverse the features in _precisely_ the same order as during training. 00385 vw_size_t thread_mask = env->thread_mask; 00386 vw_size_t thread_num = 0; 00387 00388 float32_t g = reg->loss->get_square_grad(ex->final_prediction, ex->ld->label) * ex->ld->weight; 00389 if (g == 0) return 0.; 00390 00391 float32_t xGx = 0.; 00392 00393 float32_t* weights = reg->weight_vectors[thread_num]; 00394 for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++) 00395 { 00396 for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++) 00397 { 00398 float32_t* w_vec = &weights[f->weight_index & thread_mask]; 00399 float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x); 00400 xGx += t * f->x; 00401 sum_abs_x += fabsf(f->x); 00402 } 00403 } 00404 00405 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00406 { 00407 char* i = env->pairs.get_element(k); 00408 00409 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00410 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00411 temp.end = ex->atomics[(int32_t)(i[0])].end; 00412 for (; temp.begin != temp.end; temp.begin++) 00413 xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x); 00414 } 00415 00416 return xGx; 00417 } 00418 00419 float32_t CVowpalWabbit::compute_exact_norm_quad(float32_t* weights, VwFeature& page_feature, v_array<VwFeature> &offer_features, 00420 vw_size_t mask, float32_t g, float32_t& sum_abs_x) 00421 { 00422 vw_size_t halfhash = quadratic_constant * page_feature.weight_index; 00423 float32_t xGx = 0.; 00424 float32_t update2 = g * page_feature.x * page_feature.x; 00425 for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++) 00426 { 00427 float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask]; 00428 float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x); 00429 xGx += t * elem->x; 00430 sum_abs_x += fabsf(elem->x); 00431 } 00432 return xGx; 00433 }