SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/config.h> 00013 #include <shogun/lib/common.h> 00014 #include <shogun/io/SGIO.h> 00015 #include <shogun/io/File.h> 00016 #include <shogun/lib/Time.h> 00017 #include <shogun/lib/Signal.h> 00018 00019 #include <shogun/base/Parallel.h> 00020 00021 #include <shogun/kernel/Kernel.h> 00022 #include <shogun/kernel/IdentityKernelNormalizer.h> 00023 #include <shogun/features/Features.h> 00024 #include <shogun/base/Parameter.h> 00025 00026 #include <shogun/classifier/svm/SVM.h> 00027 00028 #include <string.h> 00029 #include <unistd.h> 00030 #include <math.h> 00031 00032 #ifdef HAVE_PTHREAD 00033 #include <pthread.h> 00034 #endif 00035 00036 using namespace shogun; 00037 00038 CKernel::CKernel() : CSGObject() 00039 { 00040 init(); 00041 register_params(); 00042 } 00043 00044 CKernel::CKernel(int32_t size) : CSGObject() 00045 { 00046 init(); 00047 00048 if (size<10) 00049 size=10; 00050 00051 cache_size=size; 00052 register_params(); 00053 } 00054 00055 00056 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject() 00057 { 00058 init(); 00059 00060 if (size<10) 00061 size=10; 00062 00063 cache_size=size; 00064 00065 set_normalizer(new CIdentityKernelNormalizer()); 00066 init(p_lhs, p_rhs); 00067 register_params(); 00068 } 00069 00070 CKernel::~CKernel() 00071 { 00072 if (get_is_initialized()) 00073 SG_ERROR("Kernel still initialized on destruction.\n"); 00074 00075 remove_lhs_and_rhs(); 00076 SG_UNREF(normalizer); 00077 00078 SG_INFO("Kernel deleted (%p).\n", this); 00079 } 00080 00081 00082 00083 bool CKernel::init(CFeatures* l, CFeatures* r) 00084 { 00085 //make sure features were indeed supplied 00086 ASSERT(l); 00087 ASSERT(r); 00088 00089 //make sure features are compatible 00090 ASSERT(l->get_feature_class()==r->get_feature_class()); 00091 ASSERT(l->get_feature_type()==r->get_feature_type()); 00092 00093 //remove references to previous features 00094 remove_lhs_and_rhs(); 00095 00096 //increase reference counts 00097 SG_REF(l); 00098 if (l==r) 00099 lhs_equals_rhs=true; 00100 else // l!=r 00101 SG_REF(r); 00102 00103 lhs=l; 00104 rhs=r; 00105 00106 ASSERT(!num_lhs || num_lhs==l->get_num_vectors()); 00107 ASSERT(!num_rhs || num_rhs==l->get_num_vectors()); 00108 00109 num_lhs=l->get_num_vectors(); 00110 num_rhs=r->get_num_vectors(); 00111 00112 return true; 00113 } 00114 00115 bool CKernel::set_normalizer(CKernelNormalizer* n) 00116 { 00117 SG_REF(n); 00118 if (lhs && rhs) 00119 n->init(this); 00120 00121 SG_UNREF(normalizer); 00122 normalizer=n; 00123 00124 return (normalizer!=NULL); 00125 } 00126 00127 CKernelNormalizer* CKernel::get_normalizer() 00128 { 00129 SG_REF(normalizer) 00130 return normalizer; 00131 } 00132 00133 bool CKernel::init_normalizer() 00134 { 00135 return normalizer->init(this); 00136 } 00137 00138 void CKernel::cleanup() 00139 { 00140 remove_lhs_and_rhs(); 00141 } 00142 00143 00144 00145 void CKernel::load(CFile* loader) 00146 { 00147 SG_SET_LOCALE_C; 00148 SG_RESET_LOCALE; 00149 } 00150 00151 void CKernel::save(CFile* writer) 00152 { 00153 SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>(); 00154 SG_SET_LOCALE_C; 00155 writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols); 00156 SG_FREE(k_matrix.matrix); 00157 SG_RESET_LOCALE; 00158 } 00159 00160 void CKernel::remove_lhs_and_rhs() 00161 { 00162 if (rhs!=lhs) 00163 SG_UNREF(rhs); 00164 rhs = NULL; 00165 num_rhs=0; 00166 00167 SG_UNREF(lhs); 00168 lhs = NULL; 00169 num_lhs=0; 00170 lhs_equals_rhs=false; 00171 00172 00173 } 00174 00175 void CKernel::remove_lhs() 00176 { 00177 if (rhs==lhs) 00178 rhs=NULL; 00179 SG_UNREF(lhs); 00180 lhs = NULL; 00181 num_lhs=0; 00182 lhs_equals_rhs=false; 00183 00184 } 00185 00187 void CKernel::remove_rhs() 00188 { 00189 if (rhs!=lhs) 00190 SG_UNREF(rhs); 00191 rhs = NULL; 00192 num_rhs=0; 00193 lhs_equals_rhs=false; 00194 00195 00196 } 00197 00198 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break; 00199 00200 void CKernel::list_kernel() 00201 { 00202 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(), 00203 get_combined_kernel_weight(), 00204 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : 00205 "SLOWBUTMEMEFFICIENT"); 00206 00207 switch (get_kernel_type()) 00208 { 00209 ENUM_CASE(K_UNKNOWN) 00210 ENUM_CASE(K_LINEAR) 00211 ENUM_CASE(K_POLY) 00212 ENUM_CASE(K_GAUSSIAN) 00213 ENUM_CASE(K_GAUSSIANSHIFT) 00214 ENUM_CASE(K_GAUSSIANMATCH) 00215 ENUM_CASE(K_HISTOGRAM) 00216 ENUM_CASE(K_SALZBERG) 00217 ENUM_CASE(K_LOCALITYIMPROVED) 00218 ENUM_CASE(K_SIMPLELOCALITYIMPROVED) 00219 ENUM_CASE(K_FIXEDDEGREE) 00220 ENUM_CASE(K_WEIGHTEDDEGREE) 00221 ENUM_CASE(K_WEIGHTEDDEGREEPOS) 00222 ENUM_CASE(K_WEIGHTEDDEGREERBF) 00223 ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING) 00224 ENUM_CASE(K_POLYMATCH) 00225 ENUM_CASE(K_ALIGNMENT) 00226 ENUM_CASE(K_COMMWORDSTRING) 00227 ENUM_CASE(K_COMMULONGSTRING) 00228 ENUM_CASE(K_SPECTRUMRBF) 00229 ENUM_CASE(K_COMBINED) 00230 ENUM_CASE(K_AUC) 00231 ENUM_CASE(K_CUSTOM) 00232 ENUM_CASE(K_SIGMOID) 00233 ENUM_CASE(K_CHI2) 00234 ENUM_CASE(K_DIAG) 00235 ENUM_CASE(K_CONST) 00236 ENUM_CASE(K_DISTANCE) 00237 ENUM_CASE(K_LOCALALIGNMENT) 00238 ENUM_CASE(K_PYRAMIDCHI2) 00239 ENUM_CASE(K_OLIGO) 00240 ENUM_CASE(K_MATCHWORD) 00241 ENUM_CASE(K_TPPK) 00242 ENUM_CASE(K_REGULATORYMODULES) 00243 ENUM_CASE(K_SPARSESPATIALSAMPLE) 00244 ENUM_CASE(K_HISTOGRAMINTERSECTION) 00245 ENUM_CASE(K_WAVELET) 00246 ENUM_CASE(K_WAVE) 00247 ENUM_CASE(K_CAUCHY) 00248 ENUM_CASE(K_TSTUDENT) 00249 ENUM_CASE(K_MULTIQUADRIC) 00250 ENUM_CASE(K_EXPONENTIAL) 00251 ENUM_CASE(K_RATIONAL_QUADRATIC) 00252 ENUM_CASE(K_POWER) 00253 ENUM_CASE(K_SPHERICAL) 00254 ENUM_CASE(K_LOG) 00255 ENUM_CASE(K_SPLINE) 00256 ENUM_CASE(K_ANOVA) 00257 ENUM_CASE(K_CIRCULAR) 00258 ENUM_CASE(K_INVERSEMULTIQUADRIC) 00259 ENUM_CASE(K_SPECTRUMMISMATCHRBF) 00260 ENUM_CASE(K_DISTANTSEGMENTS) 00261 ENUM_CASE(K_BESSEL) 00262 } 00263 00264 switch (get_feature_class()) 00265 { 00266 ENUM_CASE(C_UNKNOWN) 00267 ENUM_CASE(C_SIMPLE) 00268 ENUM_CASE(C_SPARSE) 00269 ENUM_CASE(C_STRING) 00270 ENUM_CASE(C_STREAMING_SIMPLE) 00271 ENUM_CASE(C_STREAMING_SPARSE) 00272 ENUM_CASE(C_STREAMING_STRING) 00273 ENUM_CASE(C_STREAMING_VW) 00274 ENUM_CASE(C_COMBINED) 00275 ENUM_CASE(C_COMBINED_DOT) 00276 ENUM_CASE(C_WD) 00277 ENUM_CASE(C_SPEC) 00278 ENUM_CASE(C_WEIGHTEDSPEC) 00279 ENUM_CASE(C_POLY) 00280 ENUM_CASE(C_ANY) 00281 } 00282 00283 switch (get_feature_type()) 00284 { 00285 ENUM_CASE(F_UNKNOWN) 00286 ENUM_CASE(F_BOOL) 00287 ENUM_CASE(F_CHAR) 00288 ENUM_CASE(F_BYTE) 00289 ENUM_CASE(F_SHORT) 00290 ENUM_CASE(F_WORD) 00291 ENUM_CASE(F_INT) 00292 ENUM_CASE(F_UINT) 00293 ENUM_CASE(F_LONG) 00294 ENUM_CASE(F_ULONG) 00295 ENUM_CASE(F_SHORTREAL) 00296 ENUM_CASE(F_DREAL) 00297 ENUM_CASE(F_LONGREAL) 00298 ENUM_CASE(F_ANY) 00299 } 00300 SG_INFO( "\n"); 00301 } 00302 #undef ENUM_CASE 00303 00304 bool CKernel::init_optimization( 00305 int32_t count, int32_t *IDX, float64_t * weights) 00306 { 00307 SG_ERROR( "kernel does not support linadd optimization\n"); 00308 return false ; 00309 } 00310 00311 bool CKernel::delete_optimization() 00312 { 00313 SG_ERROR( "kernel does not support linadd optimization\n"); 00314 return false; 00315 } 00316 00317 float64_t CKernel::compute_optimized(int32_t vector_idx) 00318 { 00319 SG_ERROR( "kernel does not support linadd optimization\n"); 00320 return 0; 00321 } 00322 00323 void CKernel::compute_batch( 00324 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec, 00325 int32_t* IDX, float64_t* weights, float64_t factor) 00326 { 00327 SG_ERROR( "kernel does not support batch computation\n"); 00328 } 00329 00330 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight) 00331 { 00332 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n"); 00333 } 00334 00335 void CKernel::clear_normal() 00336 { 00337 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n"); 00338 } 00339 00340 int32_t CKernel::get_num_subkernels() 00341 { 00342 return 1; 00343 } 00344 00345 void CKernel::compute_by_subkernel( 00346 int32_t vector_idx, float64_t * subkernel_contrib) 00347 { 00348 SG_ERROR( "kernel compute_by_subkernel not implemented\n"); 00349 } 00350 00351 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights) 00352 { 00353 num_weights=1 ; 00354 return &combined_kernel_weight ; 00355 } 00356 00357 void CKernel::set_subkernel_weights(SGVector<float64_t> weights) 00358 { 00359 ASSERT(weights.vector); 00360 if (weights.vlen!=1) 00361 SG_ERROR( "number of subkernel weights should be one ...\n"); 00362 00363 combined_kernel_weight = weights.vector[0] ; 00364 } 00365 00366 bool CKernel::init_optimization_svm(CSVM * svm) 00367 { 00368 int32_t num_suppvec=svm->get_num_support_vectors(); 00369 int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec); 00370 float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec); 00371 00372 for (int32_t i=0; i<num_suppvec; i++) 00373 { 00374 sv_idx[i] = svm->get_support_vector(i); 00375 sv_weight[i] = svm->get_alpha(i); 00376 } 00377 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight); 00378 00379 SG_FREE(sv_idx); 00380 SG_FREE(sv_weight); 00381 return ret; 00382 } 00383 00384 void CKernel::load_serializable_post() throw (ShogunException) 00385 { 00386 CSGObject::load_serializable_post(); 00387 if (lhs_equals_rhs) 00388 rhs=lhs; 00389 } 00390 00391 void CKernel::save_serializable_pre() throw (ShogunException) 00392 { 00393 CSGObject::save_serializable_pre(); 00394 00395 if (lhs_equals_rhs) 00396 rhs=NULL; 00397 } 00398 00399 void CKernel::save_serializable_post() throw (ShogunException) 00400 { 00401 CSGObject::save_serializable_post(); 00402 00403 if (lhs_equals_rhs) 00404 rhs=lhs; 00405 } 00406 00407 void CKernel::register_params() { 00408 m_parameters->add(&cache_size, "cache_size", 00409 "Cache size in MB."); 00410 m_parameters->add((CSGObject**) &lhs, "lhs", 00411 "Feature vectors to occur on left hand side."); 00412 m_parameters->add((CSGObject**) &rhs, "rhs", 00413 "Feature vectors to occur on right hand side."); 00414 m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs", 00415 "If features on lhs are the same as on rhs."); 00416 m_parameters->add(&num_lhs, "num_lhs", 00417 "Number of feature vectors on left hand side."); 00418 m_parameters->add(&num_rhs, "num_rhs", 00419 "Number of feature vectors on right hand side."); 00420 m_parameters->add(&combined_kernel_weight, "combined_kernel_weight", 00421 "Combined kernel weight."); 00422 m_parameters->add(&optimization_initialized, 00423 "optimization_initialized", 00424 "Optimization is initialized."); 00425 m_parameters->add((machine_int_t*) &opt_type, "opt_type", 00426 "Optimization type."); 00427 m_parameters->add(&properties, "properties", 00428 "Kernel properties."); 00429 m_parameters->add((CSGObject**) &normalizer, "normalizer", 00430 "Normalize the kernel."); 00431 } 00432 00433 00434 void CKernel::init() 00435 { 00436 cache_size=10; 00437 kernel_matrix=NULL; 00438 lhs=NULL; 00439 rhs=NULL; 00440 num_lhs=0; 00441 num_rhs=0; 00442 combined_kernel_weight=1; 00443 optimization_initialized=false; 00444 opt_type=FASTBUTMEMHUNGRY; 00445 properties=KP_NONE; 00446 normalizer=NULL; 00447 00448 00449 00450 set_normalizer(new CIdentityKernelNormalizer()); 00451 }