SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/machine/KernelMachine.h> 00012 #include <shogun/lib/Signal.h> 00013 #include <shogun/base/Parameter.h> 00014 00015 using namespace shogun; 00016 00017 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00018 struct S_THREAD_PARAM 00019 { 00020 CKernelMachine* kernel_machine; 00021 CLabels* result; 00022 int32_t start; 00023 int32_t end; 00024 bool verbose; 00025 }; 00026 #endif // DOXYGEN_SHOULD_SKIP_THIS 00027 00028 CKernelMachine::CKernelMachine() 00029 : CMachine(), kernel(NULL), use_batch_computation(true), use_linadd(true), use_bias(true) 00030 { 00031 SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE); 00032 SG_ADD(&use_batch_computation, "use_batch_computation", 00033 "Batch computation is enabled.", MS_NOT_AVAILABLE); 00034 SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE); 00035 SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE); 00036 SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE); 00037 SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.", 00038 MS_NOT_AVAILABLE); 00039 SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE); 00040 00041 m_bias=0.0; 00042 } 00043 00044 CKernelMachine::~CKernelMachine() 00045 { 00046 SG_UNREF(kernel); 00047 00048 SG_FREE(m_alpha.vector); 00049 SG_FREE(m_svs.vector); 00050 } 00051 00052 bool CKernelMachine::init_kernel_optimization() 00053 { 00054 int32_t num_sv=get_num_support_vectors(); 00055 00056 if (kernel && kernel->has_property(KP_LINADD) && num_sv>0) 00057 { 00058 int32_t * sv_idx = SG_MALLOC(int32_t, num_sv); 00059 float64_t* sv_weight = SG_MALLOC(float64_t, num_sv); 00060 00061 for(int32_t i=0; i<num_sv; i++) 00062 { 00063 sv_idx[i] = get_support_vector(i) ; 00064 sv_weight[i] = get_alpha(i) ; 00065 } 00066 00067 bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ; 00068 00069 SG_FREE(sv_idx); 00070 SG_FREE(sv_weight); 00071 00072 if (!ret) 00073 SG_ERROR( "initialization of kernel optimization failed\n"); 00074 00075 return ret; 00076 } 00077 else 00078 SG_ERROR( "initialization of kernel optimization failed\n"); 00079 00080 return false; 00081 } 00082 00083 CLabels* CKernelMachine::apply() 00084 { 00085 CLabels* lab=NULL; 00086 00087 if (!kernel) 00088 SG_ERROR( "Kernelmachine can not proceed without kernel!\n"); 00089 00090 if ( kernel && kernel->get_num_vec_rhs()>0 ) 00091 { 00092 int32_t num_vectors=kernel->get_num_vec_rhs(); 00093 00094 lab=new CLabels(num_vectors); 00095 SG_DEBUG( "computing output on %d test examples\n", num_vectors); 00096 00097 CSignal::clear_cancel(); 00098 00099 if (io->get_show_progress()) 00100 io->enable_progress(); 00101 else 00102 io->disable_progress(); 00103 00104 if (kernel->has_property(KP_BATCHEVALUATION) && 00105 get_batch_computation_enabled()) 00106 { 00107 float64_t* output=SG_MALLOC(float64_t, num_vectors); 00108 memset(output, 0, sizeof(float64_t)*num_vectors); 00109 00110 if (get_num_support_vectors()>0) 00111 { 00112 int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors()); 00113 float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors()); 00114 int32_t* idx=SG_MALLOC(int32_t, num_vectors); 00115 00116 //compute output for all vectors v[0]...v[num_vectors-1] 00117 for (int32_t i=0; i<num_vectors; i++) 00118 idx[i]=i; 00119 00120 for (int32_t i=0; i<get_num_support_vectors(); i++) 00121 { 00122 sv_idx[i] = get_support_vector(i) ; 00123 sv_weight[i] = get_alpha(i) ; 00124 } 00125 00126 kernel->compute_batch(num_vectors, idx, 00127 output, get_num_support_vectors(), sv_idx, sv_weight); 00128 SG_FREE(sv_idx); 00129 SG_FREE(sv_weight); 00130 SG_FREE(idx); 00131 } 00132 00133 for (int32_t i=0; i<num_vectors; i++) 00134 lab->set_label(i, get_bias()+output[i]); 00135 00136 SG_FREE(output); 00137 } 00138 else 00139 { 00140 int32_t num_threads=parallel->get_num_threads(); 00141 ASSERT(num_threads>0); 00142 00143 if (num_threads < 2) 00144 { 00145 S_THREAD_PARAM params; 00146 params.kernel_machine=this; 00147 params.result=lab; 00148 params.start=0; 00149 params.end=num_vectors; 00150 params.verbose=true; 00151 apply_helper((void*) ¶ms); 00152 } 00153 #ifdef HAVE_PTHREAD 00154 else 00155 { 00156 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00157 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, num_threads); 00158 int32_t step= num_vectors/num_threads; 00159 00160 int32_t t; 00161 00162 for (t=0; t<num_threads-1; t++) 00163 { 00164 params[t].kernel_machine = this; 00165 params[t].result = lab; 00166 params[t].start = t*step; 00167 params[t].end = (t+1)*step; 00168 params[t].verbose = false; 00169 pthread_create(&threads[t], NULL, 00170 CKernelMachine::apply_helper, (void*)¶ms[t]); 00171 } 00172 00173 params[t].kernel_machine = this; 00174 params[t].result = lab; 00175 params[t].start = t*step; 00176 params[t].end = num_vectors; 00177 params[t].verbose = true; 00178 apply_helper((void*) ¶ms[t]); 00179 00180 for (t=0; t<num_threads-1; t++) 00181 pthread_join(threads[t], NULL); 00182 00183 SG_FREE(params); 00184 SG_FREE(threads); 00185 } 00186 #endif 00187 } 00188 00189 #ifndef WIN32 00190 if ( CSignal::cancel_computations() ) 00191 SG_INFO( "prematurely stopped. \n"); 00192 else 00193 #endif 00194 SG_DONE(); 00195 } 00196 else 00197 return NULL; 00198 00199 return lab; 00200 } 00201 00202 float64_t CKernelMachine::apply(int32_t num) 00203 { 00204 ASSERT(kernel); 00205 00206 if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized())) 00207 { 00208 float64_t score = kernel->compute_optimized(num); 00209 return score+get_bias(); 00210 } 00211 else 00212 { 00213 float64_t score=0; 00214 for(int32_t i=0; i<get_num_support_vectors(); i++) 00215 score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i); 00216 00217 return score+get_bias(); 00218 } 00219 } 00220 00221 00222 CLabels* CKernelMachine::apply(CFeatures* data) 00223 { 00224 if (!kernel) 00225 SG_ERROR("No kernel assigned!\n"); 00226 00227 CFeatures* lhs=kernel->get_lhs(); 00228 if (!lhs || !lhs->get_num_vectors()) 00229 { 00230 SG_UNREF(lhs); 00231 SG_ERROR("No vectors on left hand side\n"); 00232 } 00233 kernel->init(lhs, data); 00234 SG_UNREF(lhs); 00235 00236 return apply(); 00237 } 00238 00239 void* CKernelMachine::apply_helper(void* p) 00240 { 00241 S_THREAD_PARAM* params= (S_THREAD_PARAM*) p; 00242 CLabels* result=params->result; 00243 CKernelMachine* kernel_machine=params->kernel_machine; 00244 00245 #ifdef WIN32 00246 for (int32_t vec=params->start; vec<params->end; vec++) 00247 #else 00248 for (int32_t vec=params->start; vec<params->end && 00249 !CSignal::cancel_computations(); vec++) 00250 #endif 00251 { 00252 if (params->verbose) 00253 { 00254 int32_t num_vectors=params->end - params->start; 00255 int32_t v=vec-params->start; 00256 if ( (v% (num_vectors/100+1))== 0) 00257 SG_SPROGRESS(v, 0.0, num_vectors-1); 00258 } 00259 00260 result->set_label(vec, kernel_machine->apply(vec)); 00261 } 00262 00263 return NULL; 00264 } 00265 00266 void CKernelMachine::store_model_features() 00267 { 00268 if (!kernel) 00269 SG_ERROR("kernel is needed to store SV features.\n"); 00270 00271 CFeatures* lhs=kernel->get_lhs(); 00272 CFeatures* rhs=kernel->get_rhs(); 00273 00274 if (!lhs) 00275 SG_ERROR("kernel lhs is needed to store SV features.\n"); 00276 00277 /* copy sv feature data */ 00278 CFeatures* sv_features=lhs->copy_subset(m_svs); 00279 SG_UNREF(lhs); 00280 00281 /* now sv indices are just the identity */ 00282 CMath::range_fill_vector(m_svs.vector, m_svs.vlen, 0); 00283 00284 /* set new lhs to kernel */ 00285 kernel->init(sv_features, rhs); 00286 00287 SG_UNREF(rhs); 00288 }