SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/features/DotFeatures.h> 00012 #include <shogun/io/SGIO.h> 00013 #include <shogun/lib/Signal.h> 00014 #include <shogun/lib/Time.h> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/base/Parallel.h> 00017 #include <shogun/base/Parameter.h> 00018 00019 #ifdef HAVE_PTHREAD 00020 #include <pthread.h> 00021 #endif 00022 00023 using namespace shogun; 00024 00025 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00026 struct DF_THREAD_PARAM 00027 { 00028 CDotFeatures* df; 00029 int32_t* sub_index; 00030 float64_t* output; 00031 int32_t start; 00032 int32_t stop; 00033 float64_t* alphas; 00034 float64_t* vec; 00035 int32_t dim; 00036 float64_t bias; 00037 bool progress; 00038 }; 00039 #endif // DOXYGEN_SHOULD_SKIP_THIS 00040 00041 00042 CDotFeatures::CDotFeatures(int32_t size) 00043 :CFeatures(size), combined_weight(1.0) 00044 { 00045 init(); 00046 } 00047 00048 00049 CDotFeatures::CDotFeatures(const CDotFeatures & orig) 00050 :CFeatures(orig), combined_weight(orig.combined_weight) 00051 { 00052 init(); 00053 } 00054 00055 00056 CDotFeatures::CDotFeatures(CFile* loader) 00057 :CFeatures(loader) 00058 { 00059 init(); 00060 } 00061 00062 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00063 { 00064 ASSERT(output); 00065 // write access is internally between output[start..stop] so the following 00066 // line is necessary to write to output[0...(stop-start-1)] 00067 output-=start; 00068 ASSERT(start>=0); 00069 ASSERT(start<stop); 00070 ASSERT(stop<=get_num_vectors()); 00071 00072 int32_t num_vectors=stop-start; 00073 ASSERT(num_vectors>0); 00074 00075 int32_t num_threads=parallel->get_num_threads(); 00076 ASSERT(num_threads>0); 00077 00078 CSignal::clear_cancel(); 00079 00080 #ifdef HAVE_PTHREAD 00081 if (num_threads < 2) 00082 { 00083 #endif 00084 DF_THREAD_PARAM params; 00085 params.df=this; 00086 params.sub_index=NULL; 00087 params.output=output; 00088 params.start=start; 00089 params.stop=stop; 00090 params.alphas=alphas; 00091 params.vec=vec; 00092 params.dim=dim; 00093 params.bias=b; 00094 params.progress=false; //true; 00095 dense_dot_range_helper((void*) ¶ms); 00096 #ifdef HAVE_PTHREAD 00097 } 00098 else 00099 { 00100 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00101 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads); 00102 int32_t step= num_vectors/num_threads; 00103 00104 int32_t t; 00105 00106 for (t=0; t<num_threads-1; t++) 00107 { 00108 params[t].df = this; 00109 params[t].sub_index=NULL; 00110 params[t].output = output; 00111 params[t].start = start+t*step; 00112 params[t].stop = start+(t+1)*step; 00113 params[t].alphas=alphas; 00114 params[t].vec=vec; 00115 params[t].dim=dim; 00116 params[t].bias=b; 00117 params[t].progress = false; 00118 pthread_create(&threads[t], NULL, 00119 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]); 00120 } 00121 00122 params[t].df = this; 00123 params[t].output = output; 00124 params[t].sub_index=NULL; 00125 params[t].start = start+t*step; 00126 params[t].stop = stop; 00127 params[t].alphas=alphas; 00128 params[t].vec=vec; 00129 params[t].dim=dim; 00130 params[t].bias=b; 00131 params[t].progress = false; //true; 00132 dense_dot_range_helper((void*) ¶ms[t]); 00133 00134 for (t=0; t<num_threads-1; t++) 00135 pthread_join(threads[t], NULL); 00136 00137 SG_FREE(params); 00138 SG_FREE(threads); 00139 } 00140 #endif 00141 00142 #ifndef WIN32 00143 if ( CSignal::cancel_computations() ) 00144 SG_INFO( "prematurely stopped. \n"); 00145 #endif 00146 } 00147 00148 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00149 { 00150 ASSERT(sub_index); 00151 ASSERT(output); 00152 00153 int32_t num_threads=parallel->get_num_threads(); 00154 ASSERT(num_threads>0); 00155 00156 CSignal::clear_cancel(); 00157 00158 #ifdef HAVE_PTHREAD 00159 if (num_threads < 2) 00160 { 00161 #endif 00162 DF_THREAD_PARAM params; 00163 params.df=this; 00164 params.sub_index=sub_index; 00165 params.output=output; 00166 params.start=0; 00167 params.stop=num; 00168 params.alphas=alphas; 00169 params.vec=vec; 00170 params.dim=dim; 00171 params.bias=b; 00172 params.progress=false; //true; 00173 dense_dot_range_helper((void*) ¶ms); 00174 #ifdef HAVE_PTHREAD 00175 } 00176 else 00177 { 00178 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00179 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads); 00180 int32_t step= num/num_threads; 00181 00182 int32_t t; 00183 00184 for (t=0; t<num_threads-1; t++) 00185 { 00186 params[t].df = this; 00187 params[t].sub_index=sub_index; 00188 params[t].output = output; 00189 params[t].start = t*step; 00190 params[t].stop = (t+1)*step; 00191 params[t].alphas=alphas; 00192 params[t].vec=vec; 00193 params[t].dim=dim; 00194 params[t].bias=b; 00195 params[t].progress = false; 00196 pthread_create(&threads[t], NULL, 00197 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]); 00198 } 00199 00200 params[t].df = this; 00201 params[t].sub_index=sub_index; 00202 params[t].output = output; 00203 params[t].start = t*step; 00204 params[t].stop = num; 00205 params[t].alphas=alphas; 00206 params[t].vec=vec; 00207 params[t].dim=dim; 00208 params[t].bias=b; 00209 params[t].progress = false; //true; 00210 dense_dot_range_helper((void*) ¶ms[t]); 00211 00212 for (t=0; t<num_threads-1; t++) 00213 pthread_join(threads[t], NULL); 00214 00215 SG_FREE(params); 00216 SG_FREE(threads); 00217 } 00218 #endif 00219 00220 #ifndef WIN32 00221 if ( CSignal::cancel_computations() ) 00222 SG_INFO( "prematurely stopped. \n"); 00223 #endif 00224 } 00225 00226 void* CDotFeatures::dense_dot_range_helper(void* p) 00227 { 00228 DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p; 00229 CDotFeatures* df=par->df; 00230 int32_t* sub_index=par->sub_index; 00231 float64_t* output=par->output; 00232 int32_t start=par->start; 00233 int32_t stop=par->stop; 00234 float64_t* alphas=par->alphas; 00235 float64_t* vec=par->vec; 00236 int32_t dim=par->dim; 00237 float64_t bias=par->bias; 00238 bool progress=par->progress; 00239 00240 if (sub_index) 00241 { 00242 #ifdef WIN32 00243 for (int32_t i=start; i<stop i++) 00244 #else 00245 for (int32_t i=start; i<stop && 00246 !CSignal::cancel_computations(); i++) 00247 #endif 00248 { 00249 if (alphas) 00250 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias; 00251 else 00252 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias; 00253 if (progress) 00254 df->display_progress(start, stop, i); 00255 } 00256 00257 } 00258 else 00259 { 00260 #ifdef WIN32 00261 for (int32_t i=start; i<stop i++) 00262 #else 00263 for (int32_t i=start; i<stop && 00264 !CSignal::cancel_computations(); i++) 00265 #endif 00266 { 00267 if (alphas) 00268 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias; 00269 else 00270 output[i]=df->dense_dot(i, vec, dim)+bias; 00271 if (progress) 00272 df->display_progress(start, stop, i); 00273 } 00274 } 00275 00276 return NULL; 00277 } 00278 00279 SGMatrix<float64_t> CDotFeatures::get_computed_dot_feature_matrix() 00280 { 00281 SGMatrix<float64_t> m; 00282 00283 int64_t offs=0; 00284 int32_t num=get_num_vectors(); 00285 int32_t dim=get_dim_feature_space(); 00286 ASSERT(num>0); 00287 ASSERT(dim>0); 00288 00289 int64_t sz=((uint64_t) num)* dim; 00290 00291 m.do_free=true; 00292 m.num_cols=dim; 00293 m.num_rows=num; 00294 m.matrix=SG_MALLOC(float64_t, sz); 00295 memset(m.matrix, 0, sz*sizeof(float64_t)); 00296 00297 for (int32_t i=0; i<num; i++) 00298 { 00299 add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim); 00300 offs+=dim; 00301 } 00302 00303 return m; 00304 } 00305 00306 SGVector<float64_t> CDotFeatures::get_computed_dot_feature_vector(int32_t num) 00307 { 00308 SGVector<float64_t> v; 00309 00310 int32_t dim=get_dim_feature_space(); 00311 ASSERT(num>=0 && num<=get_num_vectors()); 00312 ASSERT(dim>0); 00313 00314 v.do_free=true; 00315 v.vlen=dim; 00316 v.vector=SG_MALLOC(float64_t, dim); 00317 memset(v.vector, 0, dim*sizeof(float64_t)); 00318 00319 add_to_dense_vec(1.0, num, v.vector, dim); 00320 return v; 00321 } 00322 00323 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats) 00324 { 00325 int32_t num=get_num_vectors(); 00326 int32_t d=get_dim_feature_space(); 00327 float64_t* w= SG_MALLOC(float64_t, d); 00328 CMath::fill_vector(w, d, 0.0); 00329 00330 CTime t; 00331 float64_t start_cpu=t.get_runtime(); 00332 float64_t start_wall=t.get_curtime(); 00333 for (int32_t r=0; r<repeats; r++) 00334 { 00335 for (int32_t i=0; i<num; i++) 00336 add_to_dense_vec(1.172343*(r+1), i, w, d); 00337 } 00338 00339 SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n", 00340 repeats, num, (t.get_runtime()-start_cpu)/repeats, 00341 (t.get_curtime()-start_wall)/repeats); 00342 00343 SG_FREE(w); 00344 } 00345 00346 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats) 00347 { 00348 int32_t num=get_num_vectors(); 00349 int32_t d=get_dim_feature_space(); 00350 float64_t* w= SG_MALLOC(float64_t, d); 00351 float64_t* out= SG_MALLOC(float64_t, num); 00352 float64_t* alphas= SG_MALLOC(float64_t, num); 00353 CMath::range_fill_vector(w, d, 17.0); 00354 CMath::range_fill_vector(alphas, num, 1.2345); 00355 //CMath::fill_vector(w, d, 17.0); 00356 //CMath::fill_vector(alphas, num, 1.2345); 00357 00358 CTime t; 00359 float64_t start_cpu=t.get_runtime(); 00360 float64_t start_wall=t.get_curtime(); 00361 00362 for (int32_t r=0; r<repeats; r++) 00363 dense_dot_range(out, 0, num, alphas, w, d, 23); 00364 00365 #ifdef DEBUG_DOTFEATURES 00366 CMath::display_vector(out, 40, "dense_dot_range"); 00367 float64_t* out2= SG_MALLOC(float64_t, num); 00368 00369 for (int32_t r=0; r<repeats; r++) 00370 { 00371 CMath::fill_vector(out2, num, 0.0); 00372 for (int32_t i=0; i<num; i++) 00373 out2[i]+=dense_dot(i, w, d)*alphas[i]+23; 00374 } 00375 CMath::display_vector(out2, 40, "dense_dot"); 00376 for (int32_t i=0; i<num; i++) 00377 out2[i]-=out[i]; 00378 CMath::display_vector(out2, 40, "diff"); 00379 #endif 00380 SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n", 00381 repeats, num, (t.get_runtime()-start_cpu)/repeats, 00382 (t.get_curtime()-start_wall)/repeats); 00383 00384 SG_FREE(alphas); 00385 SG_FREE(out); 00386 SG_FREE(w); 00387 } 00388 00389 SGVector<float64_t> CDotFeatures::get_mean() 00390 { 00391 int32_t num=get_num_vectors(); 00392 int32_t dim=get_dim_feature_space(); 00393 ASSERT(num>0); 00394 ASSERT(dim>0); 00395 00396 SGVector<float64_t> mean(dim); 00397 memset(mean.vector, 0, sizeof(float64_t)*dim); 00398 00399 for (int i = 0; i < num; i++) 00400 add_to_dense_vec(1, i, mean.vector, dim); 00401 for (int j = 0; j < dim; j++) 00402 mean.vector[j] /= num; 00403 00404 return mean; 00405 } 00406 00407 SGMatrix<float64_t> CDotFeatures::get_cov() 00408 { 00409 int32_t num=get_num_vectors(); 00410 int32_t dim=get_dim_feature_space(); 00411 ASSERT(num>0); 00412 ASSERT(dim>0); 00413 00414 SGMatrix<float64_t> cov(dim, dim); 00415 00416 memset(cov.matrix, 0, sizeof(float64_t)*dim*dim); 00417 00418 SGVector<float64_t> mean = get_mean(); 00419 00420 for (int i = 0; i < num; i++) 00421 { 00422 SGVector<float64_t> v = get_computed_dot_feature_vector(i); 00423 CMath::add<float64_t>(v.vector, 1, v.vector, -1, mean.vector, v.vlen); 00424 for (int m = 0; m < v.vlen; m++) 00425 { 00426 for (int n = 0; n <= m ; n++) 00427 { 00428 (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n]; 00429 } 00430 } 00431 v.free_vector(); 00432 } 00433 for (int m = 0; m < dim; m++) 00434 { 00435 for (int n = 0; n <= m ; n++) 00436 { 00437 (cov.matrix)[m*dim+n] /= num; 00438 } 00439 } 00440 for (int m = 0; m < dim-1; m++) 00441 { 00442 for (int n = m+1; n < dim; n++) 00443 { 00444 (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m]; 00445 } 00446 } 00447 mean.destroy_vector(); 00448 return cov; 00449 } 00450 00451 void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v) 00452 { 00453 int32_t num_vectors=stop-start; 00454 int32_t i=v-start; 00455 00456 if ( (i% (num_vectors/100+1))== 0) 00457 SG_PROGRESS(v, 0.0, num_vectors-1); 00458 } 00459 00460 void CDotFeatures::init() 00461 { 00462 set_property(FP_DOT); 00463 m_parameters->add(&combined_weight, "combined_weight", 00464 "Feature weighting in combined dot features."); 00465 }