SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/ui/SGInterface.h> 00013 #include <shogun/ui/GUIKernel.h> 00014 #include <shogun/ui/GUIPluginEstimate.h> 00015 00016 #include <shogun/lib/config.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/io/AsciiFile.h> 00019 #include <shogun/kernel/Kernel.h> 00020 #include <shogun/kernel/CombinedKernel.h> 00021 #include <shogun/kernel/Chi2Kernel.h> 00022 #include <shogun/kernel/LinearKernel.h> 00023 #include <shogun/kernel/LinearStringKernel.h> 00024 #include <shogun/kernel/WeightedDegreeStringKernel.h> 00025 #include <shogun/kernel/WeightedDegreeRBFKernel.h> 00026 #include <shogun/kernel/SpectrumMismatchRBFKernel.h> 00027 #include <shogun/kernel/WeightedDegreePositionStringKernel.h> 00028 #include <shogun/kernel/FixedDegreeStringKernel.h> 00029 #include <shogun/kernel/LocalityImprovedStringKernel.h> 00030 #include <shogun/kernel/SimpleLocalityImprovedStringKernel.h> 00031 #include <shogun/kernel/PolyKernel.h> 00032 #include <shogun/kernel/CustomKernel.h> 00033 #include <shogun/kernel/ConstKernel.h> 00034 #include <shogun/kernel/PolyMatchWordStringKernel.h> 00035 #include <shogun/kernel/PolyMatchStringKernel.h> 00036 #include <shogun/kernel/LocalAlignmentStringKernel.h> 00037 #include <shogun/kernel/MatchWordStringKernel.h> 00038 #include <shogun/kernel/CommWordStringKernel.h> 00039 #include <shogun/kernel/WeightedCommWordStringKernel.h> 00040 #include <shogun/kernel/CommUlongStringKernel.h> 00041 #include <shogun/kernel/HistogramWordStringKernel.h> 00042 #include <shogun/kernel/SalzbergWordStringKernel.h> 00043 #include <shogun/kernel/GaussianKernel.h> 00044 #include <shogun/kernel/GaussianShiftKernel.h> 00045 #include <shogun/kernel/SigmoidKernel.h> 00046 #include <shogun/kernel/DiagKernel.h> 00047 #include <shogun/kernel/OligoStringKernel.h> 00048 #include <shogun/kernel/DistanceKernel.h> 00049 #include <shogun/kernel/TensorProductPairKernel.h> 00050 #include <shogun/kernel/AvgDiagKernelNormalizer.h> 00051 #include <shogun/kernel/RidgeKernelNormalizer.h> 00052 #include <shogun/kernel/FirstElementKernelNormalizer.h> 00053 #include <shogun/kernel/IdentityKernelNormalizer.h> 00054 #include <shogun/kernel/SqrtDiagKernelNormalizer.h> 00055 #include <shogun/kernel/VarianceKernelNormalizer.h> 00056 #include <shogun/kernel/ScatterKernelNormalizer.h> 00057 #include <shogun/classifier/svm/SVM.h> 00058 #include <shogun/kernel/ZeroMeanCenterKernelNormalizer.h> 00059 #include <shogun/kernel/WaveletKernel.h> 00060 00061 #include <string.h> 00062 00063 using namespace shogun; 00064 00065 CGUIKernel::CGUIKernel(CSGInterface* ui_) 00066 : CSGObject(), ui(ui_) 00067 { 00068 kernel=NULL; 00069 } 00070 00071 CGUIKernel::~CGUIKernel() 00072 { 00073 SG_UNREF(kernel); 00074 } 00075 00076 CKernel* CGUIKernel::get_kernel() 00077 { 00078 return kernel; 00079 } 00080 00081 CKernel* CGUIKernel::create_oligo(int32_t size, int32_t k, float64_t width) 00082 { 00083 CKernel* kern=new COligoStringKernel(size, k, width); 00084 SG_DEBUG("created OligoStringKernel (%p) with size %d, k %d, width %f.\n", kern, size, k, width); 00085 00086 return kern; 00087 } 00088 00089 CKernel* CGUIKernel::create_diag(int32_t size, float64_t diag) 00090 { 00091 CKernel* kern=new CDiagKernel(size, diag); 00092 if (!kern) 00093 SG_ERROR("Couldn't create DiagKernel with size %d, diag %f.\n", size, diag); 00094 else 00095 SG_DEBUG("created DiagKernel (%p) with size %d, diag %f.\n", kern, size, diag); 00096 00097 return kern; 00098 } 00099 00100 CKernel* CGUIKernel::create_const(int32_t size, float64_t c) 00101 { 00102 CKernel* kern=new CConstKernel(c); 00103 if (!kern) 00104 SG_ERROR("Couldn't create ConstKernel with c %f.\n", c); 00105 else 00106 SG_DEBUG("created ConstKernel (%p) with c %f.\n", kern, c); 00107 00108 kern->set_cache_size(size); 00109 00110 return kern; 00111 } 00112 00113 CKernel* CGUIKernel::create_custom(float64_t* kmatrix, int32_t num_feat, int32_t num_vec, bool source_is_diag, bool dest_is_diag) 00114 { 00115 CCustomKernel* kern=new CCustomKernel(); 00116 SG_DEBUG("created CustomKernel (%p).\n", kern); 00117 00118 SGMatrix<float64_t> km=SGMatrix<float64_t>(kmatrix, num_feat, num_vec); 00119 00120 if (source_is_diag && dest_is_diag && num_feat==1) 00121 { 00122 kern->set_triangle_kernel_matrix_from_triangle( 00123 SGVector<float64_t>(kmatrix, num_vec)); 00124 } 00125 else if (!source_is_diag && dest_is_diag && num_vec==num_feat) 00126 kern->set_triangle_kernel_matrix_from_full(km); 00127 else 00128 kern->set_full_kernel_matrix_from_full(km); 00129 00130 SG_FREE(kmatrix); 00131 return kern; 00132 } 00133 00134 00135 CKernel* CGUIKernel::create_gaussianshift( 00136 int32_t size, float64_t width, int32_t max_shift, int32_t shift_step) 00137 { 00138 CKernel* kern=new CGaussianShiftKernel(size, width, max_shift, shift_step); 00139 if (!kern) 00140 SG_ERROR("Couldn't create GaussianShiftKernel with size %d, width %f, max_shift %d, shift_step %d.\n", size, width, max_shift, shift_step); 00141 else 00142 SG_DEBUG("created GaussianShiftKernel (%p) with size %d, width %f, max_shift %d, shift_step %d.\n", kern, size, width, max_shift, shift_step); 00143 00144 return kern; 00145 } 00146 00147 CKernel* CGUIKernel::create_sparsegaussian(int32_t size, float64_t width) 00148 { 00149 CKernel* kern=new CGaussianKernel(size, width); 00150 if (!kern) 00151 SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width); 00152 else 00153 SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width); 00154 00155 return kern; 00156 } 00157 00158 CKernel* CGUIKernel::create_gaussian(int32_t size, float64_t width) 00159 { 00160 CKernel* kern=new CGaussianKernel(size, width); 00161 if (!kern) 00162 SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width); 00163 else 00164 SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width); 00165 00166 return kern; 00167 } 00168 00169 CKernel* CGUIKernel::create_sigmoid( 00170 int32_t size, float64_t gamma, float64_t coef0) 00171 { 00172 CKernel* kern=new CSigmoidKernel(size, gamma, coef0); 00173 if (!kern) 00174 SG_ERROR("Couldn't create SigmoidKernel with size %d, gamma %f, coef0 %f.\n", size, gamma, coef0); 00175 else 00176 SG_DEBUG("created SigmoidKernel (%p) with size %d, gamma %f, coef0 %f.\n", kern, size, gamma, coef0); 00177 00178 return kern; 00179 } 00180 CKernel* CGUIKernel::create_wavelet( 00181 int32_t size, float64_t Wdilation, float64_t Wtranslation) 00182 { 00183 CKernel* kern=new CWaveletKernel(size, Wdilation, Wtranslation); 00184 if (!kern) 00185 SG_ERROR("Couldn't create WaveletKernel with size %d, Wdilation %f, Wtranslation %f.\n", size, Wdilation, Wtranslation); 00186 else 00187 SG_DEBUG("created WaveletKernel (%p) with size %d, Wdilation %f, Wtranslation %f.\n", kern, size, Wdilation, Wtranslation); 00188 00189 return kern; 00190 } 00191 CKernel* CGUIKernel::create_sparsepoly( 00192 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00193 { 00194 CKernel* kern=new CPolyKernel(size, degree, inhomogene); 00195 if (!normalize) 00196 kern->set_normalizer(new CIdentityKernelNormalizer()); 00197 SG_DEBUG("created PolyKernel with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize); 00198 00199 return kern; 00200 } 00201 00202 CKernel* CGUIKernel::create_poly( 00203 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00204 { 00205 CKernel* kern=new CPolyKernel(size, degree, inhomogene); 00206 if (!normalize) 00207 kern->set_normalizer(new CIdentityKernelNormalizer()); 00208 SG_DEBUG("created PolyKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize); 00209 00210 return kern; 00211 } 00212 00213 CKernel* CGUIKernel::create_localityimprovedstring( 00214 int32_t size, int32_t length, int32_t inner_degree, int32_t outer_degree, 00215 EKernelType ktype) 00216 { 00217 CKernel* kern=NULL; 00218 00219 if (ktype==K_SIMPLELOCALITYIMPROVED) 00220 { 00221 kern=new CSimpleLocalityImprovedStringKernel( 00222 size, length, inner_degree, outer_degree); 00223 } 00224 else if (ktype==K_LOCALITYIMPROVED) 00225 { 00226 kern=new CLocalityImprovedStringKernel( 00227 size, length, inner_degree, outer_degree); 00228 } 00229 00230 if (!kern) 00231 SG_ERROR("Couldn't create (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", size, length, inner_degree, outer_degree); 00232 else 00233 SG_DEBUG("created (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", kern, size, length, inner_degree, outer_degree); 00234 00235 return kern; 00236 } 00237 00238 CKernel* CGUIKernel::create_weighteddegreestring( 00239 int32_t size, int32_t order, int32_t max_mismatch, bool use_normalization, 00240 int32_t mkl_stepsize, bool block_computation, int32_t single_degree) 00241 { 00242 float64_t* weights=get_weights(order, max_mismatch); 00243 00244 int32_t i=0; 00245 if (single_degree>=0) 00246 { 00247 ASSERT(single_degree<order); 00248 for (i=0; i<order; i++) 00249 { 00250 if (i!=single_degree) 00251 weights[i]=0; 00252 else 00253 weights[i]=1; 00254 } 00255 } 00256 00257 CKernel* kern=new CWeightedDegreeStringKernel(weights, order); 00258 00259 SG_DEBUG("created WeightedDegreeStringKernel (%p) with size %d, order %d, " 00260 "max_mismatch %d, use_normalization %d, mkl_stepsize %d, " 00261 "block_computation %d, single_degree %d.\n", 00262 kern, size, order, max_mismatch, (int) use_normalization, mkl_stepsize, 00263 block_computation, single_degree); 00264 00265 if (!use_normalization) 00266 kern->set_normalizer(new CIdentityKernelNormalizer()); 00267 00268 ((CWeightedDegreeStringKernel*) kern)-> 00269 set_use_block_computation(block_computation); 00270 ((CWeightedDegreeStringKernel*) kern)->set_max_mismatch(max_mismatch); 00271 ((CWeightedDegreeStringKernel*) kern)->set_mkl_stepsize(mkl_stepsize); 00272 ((CWeightedDegreeStringKernel*) kern)->set_which_degree(single_degree); 00273 00274 SG_FREE(weights); 00275 return kern; 00276 } 00277 00278 CKernel* CGUIKernel::create_weighteddegreepositionstring( 00279 int32_t size, int32_t order, int32_t max_mismatch, int32_t length, 00280 int32_t center, float64_t step) 00281 { 00282 int32_t i=0; 00283 int32_t* shifts=SG_MALLOC(int32_t, length); 00284 00285 for (i=center; i<length; i++) 00286 shifts[i]=(int32_t) floor(((float64_t) (i-center))/step); 00287 00288 for (i=center-1; i>=0; i--) 00289 shifts[i]=(int32_t) floor(((float64_t) (center-i))/step); 00290 00291 for (i=0; i<length; i++) 00292 { 00293 if (shifts[i]>length) 00294 shifts[i]=length; 00295 } 00296 00297 for (i=0; i<length; i++) 00298 SG_INFO( "shift[%i]=%i\n", i, shifts[i]); 00299 00300 float64_t* weights=get_weights(order, max_mismatch); 00301 00302 CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length); 00303 if (!kern) 00304 SG_ERROR("Couldn't create WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", size, order, max_mismatch, length, center, step); 00305 else 00306 SG_DEBUG("created WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", kern, size, order, max_mismatch, length, center, step); 00307 00308 SG_FREE(weights); 00309 SG_FREE(shifts); 00310 return kern; 00311 } 00312 00313 CKernel* CGUIKernel::create_weighteddegreepositionstring3( 00314 int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts, 00315 int32_t length, int32_t mkl_stepsize, float64_t* position_weights) 00316 { 00317 float64_t* weights=get_weights(order, max_mismatch); 00318 00319 CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length, mkl_stepsize); 00320 kern->set_normalizer(new CIdentityKernelNormalizer()); 00321 00322 SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d and position_weights (MKL stepsize: %d).\n", kern, size, order, max_mismatch, length, mkl_stepsize); 00323 00324 if (!position_weights) 00325 { 00326 position_weights=SG_MALLOC(float64_t, length); 00327 for (int32_t i=0; i<length; i++) 00328 position_weights[i]=1.0/length; 00329 } 00330 ((CWeightedDegreePositionStringKernel*) kern)-> 00331 set_position_weights(SGVector<float64_t>(position_weights, length)); 00332 00333 SG_FREE(weights); 00334 return kern; 00335 } 00336 00337 CKernel* CGUIKernel::create_weighteddegreepositionstring2( 00338 int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts, 00339 int32_t length, bool use_normalization) 00340 { 00341 float64_t* weights=get_weights(order, max_mismatch); 00342 00343 CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length); 00344 if (!use_normalization) 00345 kern->set_normalizer(new CIdentityKernelNormalizer()); 00346 00347 00348 SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d, use_normalization %d.\n", kern, size, order, max_mismatch, length, use_normalization); 00349 00350 SG_FREE(weights); 00351 return kern; 00352 } 00353 00354 float64_t* CGUIKernel::get_weights(int32_t order, int32_t max_mismatch) 00355 { 00356 float64_t *weights=SG_MALLOC(float64_t, order*(1+max_mismatch)); 00357 float64_t sum=0; 00358 int32_t i=0; 00359 00360 for (i=0; i<order; i++) 00361 { 00362 weights[i]=order-i; 00363 sum+=weights[i]; 00364 } 00365 for (i=0; i<order; i++) 00366 weights[i]/=sum; 00367 00368 for (i=0; i<order; i++) 00369 { 00370 for (int32_t j=1; j<=max_mismatch; j++) 00371 { 00372 if (j<i+1) 00373 { 00374 int32_t nk=CMath::nchoosek(i+1, j); 00375 weights[i+j*order]=weights[i]/(nk*CMath::pow(3, j)); 00376 } 00377 else 00378 weights[i+j*order]=0; 00379 } 00380 } 00381 00382 return weights; 00383 } 00384 00385 CKernel* CGUIKernel::create_weighteddegreerbf(int32_t size, int32_t degree, int32_t nof_properties, float64_t width) 00386 { 00387 CKernel* kern=new CWeightedDegreeRBFKernel(size, width, degree, nof_properties); 00388 if (!kern) 00389 SG_ERROR("Couldn't create WeightedDegreeRBFKernel with size %d, width %f, degree %d, nof_properties %d.\n", size, width, degree, nof_properties); 00390 else 00391 SG_DEBUG("created WeightedDegreeRBFKernel (%p) with size %d, width %f, degree %d, nof_properties %d.\n", kern, size, width, degree, nof_properties); 00392 00393 return kern; 00394 } 00395 00396 CKernel* CGUIKernel::create_spectrummismatchrbf(int32_t size, float64_t* AA_matrix, int32_t nr, int32_t nc, int32_t max_mismatch, int32_t degree, float64_t width) 00397 { 00398 00399 CKernel* kern = new CSpectrumMismatchRBFKernel(size, AA_matrix, nr, nc, degree, max_mismatch, width); 00400 if (!kern) 00401 SG_ERROR("Couldn't create SpectrumMismatchRBFKernel with size %d, width %f, degree %d, max_mismatch %d.\n", size, width, degree, max_mismatch); 00402 else 00403 SG_DEBUG("created SpectrumMismatchRBFKernel (%p) with size %d, width %f, degree %d, max_mismatch %d.\n", kern, size, width, degree, max_mismatch); 00404 00405 return kern; 00406 00407 } 00408 00409 00410 CKernel* CGUIKernel::create_localalignmentstring(int32_t size) 00411 { 00412 CKernel* kern=new CLocalAlignmentStringKernel(size); 00413 if (!kern) 00414 SG_ERROR("Couldn't create LocalAlignmentStringKernel with size %d.\n", size); 00415 else 00416 SG_DEBUG("created LocalAlignmentStringKernel (%p) with size %d.\n", kern, size); 00417 00418 return kern; 00419 } 00420 00421 CKernel* CGUIKernel::create_fixeddegreestring(int32_t size, int32_t d) 00422 { 00423 CKernel* kern=new CFixedDegreeStringKernel(size, d); 00424 if (!kern) 00425 SG_ERROR("Couldn't create FixedDegreeStringKernel with size %d and d %d.\n", size, d); 00426 else 00427 SG_DEBUG("created FixedDegreeStringKernel (%p) with size %d and d %d.\n", kern, size, d); 00428 00429 return kern; 00430 } 00431 00432 CKernel* CGUIKernel::create_chi2(int32_t size, float64_t width) 00433 { 00434 CKernel* kern=new CChi2Kernel(size, width); 00435 if (!kern) 00436 SG_ERROR("Couldn't create Chi2Kernel with size %d and width %f.\n", size, width); 00437 else 00438 SG_DEBUG("created Chi2Kernel (%p) with size %d and width %f.\n", kern, size, width); 00439 00440 return kern; 00441 } 00442 00443 CKernel* CGUIKernel::create_commstring( 00444 int32_t size, bool use_sign, char* norm_str, EKernelType ktype) 00445 { 00446 CKernel* kern=NULL; 00447 00448 if (!norm_str) 00449 norm_str= (char*) "FULL"; 00450 00451 if (ktype==K_COMMULONGSTRING) 00452 kern=new CCommUlongStringKernel(size, use_sign); 00453 else if (ktype==K_COMMWORDSTRING) 00454 kern=new CCommWordStringKernel(size, use_sign); 00455 else if (ktype==K_WEIGHTEDCOMMWORDSTRING) 00456 kern=new CWeightedCommWordStringKernel(size, use_sign); 00457 00458 SG_DEBUG("created WeightedCommWord/CommWord/CommUlongStringKernel (%p) with size %d, use_sign %d norm_str %s.\n", kern, size, use_sign, norm_str); 00459 00460 00461 if (strncmp(norm_str, "NO", 2)==0) 00462 { 00463 kern->set_normalizer(new CIdentityKernelNormalizer()); 00464 } 00465 else if (strncmp(norm_str, "FULL", 4)==0) 00466 { 00467 //nop, as this one is default 00468 } 00469 else 00470 SG_ERROR("Unsupported Normalizer requested, supports only FULL and NO\n"); 00471 00472 return kern; 00473 } 00474 00475 CKernel* CGUIKernel::create_matchwordstring( 00476 int32_t size, int32_t d, bool normalize) 00477 { 00478 CKernel* kern=new CMatchWordStringKernel(size, d); 00479 SG_DEBUG("created MatchWordStringKernel (%p) with size %d and d %d.\n", kern, size, d); 00480 if (!normalize) 00481 kern->set_normalizer(new CIdentityKernelNormalizer()); 00482 00483 return kern; 00484 } 00485 00486 CKernel* CGUIKernel::create_polymatchstring( 00487 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00488 { 00489 CKernel* kern=new CPolyMatchStringKernel(size, degree, inhomogene); 00490 SG_DEBUG("created PolyMatchStringKernel (%p) with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize); 00491 if (!normalize) 00492 kern->set_normalizer(new CIdentityKernelNormalizer()); 00493 00494 return kern; 00495 } 00496 00497 CKernel* CGUIKernel::create_polymatchwordstring( 00498 int32_t size, int32_t degree, bool inhomogene, bool normalize) 00499 { 00500 CKernel* kern=new CPolyMatchWordStringKernel(size, degree, inhomogene); 00501 SG_DEBUG("created PolyMatchWordStringKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize); 00502 if (!normalize) 00503 kern->set_normalizer(new CIdentityKernelNormalizer()); 00504 00505 return kern; 00506 } 00507 00508 CKernel* CGUIKernel::create_salzbergword(int32_t size) 00509 { 00510 SG_INFO("Getting estimator.\n"); 00511 CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator(); 00512 if (!estimator) 00513 SG_ERROR("No estimator set.\n"); 00514 00515 CKernel* kern=new CSalzbergWordStringKernel(size, estimator); 00516 if (!kern) 00517 SG_ERROR("Couldn't create SalzbergWordString with size %d.\n", size); 00518 else 00519 SG_DEBUG("created SalzbergWordString (%p) with size %d.\n", kern, size); 00520 00521 /* 00522 // prior stuff 00523 SG_INFO("Getting labels.\n"); 00524 CLabels* train_labels=ui->ui_labels->get_train_labels(); 00525 if (!train_labels) 00526 { 00527 SG_INFO("Assign train labels first!\n"); 00528 return NULL; 00529 } 00530 ((CSalzbergWordStringKernel *) kern)->set_prior_probs_from_labels(train_labels); 00531 */ 00532 00533 return kern; 00534 } 00535 00536 CKernel* CGUIKernel::create_histogramword(int32_t size) 00537 { 00538 SG_INFO("Getting estimator.\n"); 00539 CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator(); 00540 if (!estimator) 00541 SG_ERROR("No estimator set.\n"); 00542 00543 CKernel* kern=new CHistogramWordStringKernel(size, estimator); 00544 if (!kern) 00545 SG_ERROR("Couldn't create HistogramWordString with size %d.\n", size); 00546 else 00547 SG_DEBUG("created HistogramWordString (%p) with size %d.\n", kern, size); 00548 00549 return kern; 00550 } 00551 00552 CKernel* CGUIKernel::create_linearbyte(int32_t size, float64_t scale) 00553 { 00554 size=0; 00555 CKernel* kern=new CLinearKernel(); 00556 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00557 SG_DEBUG("created LinearByteKernel (%p) with size %d and scale %f.\n", kern, size, scale); 00558 00559 return kern; 00560 } 00561 00562 CKernel* CGUIKernel::create_linearword(int32_t size, float64_t scale) 00563 { 00564 size=0; 00565 CKernel* kern=new CLinearKernel(); 00566 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00567 SG_DEBUG("created LinearWordKernel (%p) with size %d and scale %f.\n", kern, size, scale); 00568 00569 return kern; 00570 } 00571 00572 CKernel* CGUIKernel::create_linearstring(int32_t size, float64_t scale) 00573 { 00574 size=0; 00575 CKernel* kern=NULL; 00576 kern=new CLinearStringKernel(); 00577 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00578 00579 SG_DEBUG("created LinearStringKernel (%p) with size %d and scale %f.\n", kern, size, scale); 00580 00581 return kern; 00582 } 00583 00584 CKernel* CGUIKernel::create_linear(int32_t size, float64_t scale) 00585 { 00586 size=0; 00587 CKernel* kern=new CLinearKernel(); 00588 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00589 00590 SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale); 00591 00592 return kern; 00593 } 00594 00595 CKernel* CGUIKernel::create_sparselinear(int32_t size, float64_t scale) 00596 { 00597 size=0; 00598 CKernel* kern=new CLinearKernel(); 00599 kern->set_normalizer(new CAvgDiagKernelNormalizer(scale)); 00600 00601 SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale); 00602 00603 return kern; 00604 } 00605 00606 CKernel* CGUIKernel::create_tppk(int32_t size, float64_t* km, int32_t rows, int32_t cols) 00607 { 00608 CCustomKernel* k=new CCustomKernel(); 00609 k->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(km, rows, cols)); 00610 00611 CKernel* kern=new CTensorProductPairKernel(size, k); 00612 00613 SG_DEBUG("created TPPK (%p) with size %d and km %p, rows %d, cols %d.\n", kern, size, km, rows, cols); 00614 00615 return kern; 00616 } 00617 00618 CKernel* CGUIKernel::create_distance(int32_t size, float64_t width) 00619 { 00620 CDistance* dist=ui->ui_distance->get_distance(); 00621 if (!dist) 00622 SG_ERROR("No distance set for DistanceKernel.\n"); 00623 00624 CKernel* kern=new CDistanceKernel(size, width, dist); 00625 if (!kern) 00626 SG_ERROR("Couldn't create DistanceKernel with size %d and width %f.\n", size, width); 00627 else 00628 SG_DEBUG("created DistanceKernel (%p) with size %d and width %f.\n", kern, size, width); 00629 00630 return kern; 00631 } 00632 00633 CKernel* CGUIKernel::create_combined( 00634 int32_t size, bool append_subkernel_weights) 00635 { 00636 CKernel* kern=new CCombinedKernel(size, append_subkernel_weights); 00637 if (!kern) 00638 SG_ERROR("Couldn't create CombinedKernel with size %d and append_subkernel_weights %d.\n", size, append_subkernel_weights); 00639 else 00640 SG_DEBUG("created CombinedKernel (%p) with size %d and append_subkernel_weights %d.\n", kern, size, append_subkernel_weights); 00641 00642 return kern; 00643 } 00644 00645 bool CGUIKernel::set_normalization(char* normalization, float64_t c, float64_t r) 00646 { 00647 CKernel* k=kernel; 00648 00649 if (k && k->get_kernel_type()==K_COMBINED) 00650 k=((CCombinedKernel*) kernel)->get_last_kernel(); 00651 00652 if (!k) 00653 SG_ERROR("No kernel available.\n"); 00654 00655 if (strncmp(normalization, "IDENTITY", 8)==0) 00656 { 00657 SG_INFO("Identity Normalization (==NO NORMALIZATION) selected\n"); 00658 return k->set_normalizer(new CIdentityKernelNormalizer()); 00659 } 00660 else if (strncmp(normalization,"AVGDIAG", 7)==0) 00661 { 00662 SG_INFO("Average Kernel Diagonal Normalization selected\n"); 00663 return k->set_normalizer(new CAvgDiagKernelNormalizer(c)); 00664 } 00665 else if (strncmp(normalization,"RIDGE", 5)==0) 00666 { 00667 SG_INFO("Ridge Kernel Normalization selected\n"); 00668 return k->set_normalizer(new CRidgeKernelNormalizer(r, c)); 00669 } 00670 else if (strncmp(normalization,"SQRTDIAG", 8)==0) 00671 { 00672 SG_INFO("Sqrt Diagonal Normalization selected\n"); 00673 return k->set_normalizer(new CSqrtDiagKernelNormalizer()); 00674 } 00675 else if (strncmp(normalization,"FIRSTELEMENT", 12)==0) 00676 { 00677 SG_INFO("First Element Normalization selected\n"); 00678 return k->set_normalizer(new CFirstElementKernelNormalizer()); 00679 } 00680 else if (strncmp(normalization,"VARIANCE", 8)==0) 00681 { 00682 SG_INFO("Variance Normalization selected\n"); 00683 return k->set_normalizer(new CVarianceKernelNormalizer()); 00684 } 00685 else if (strncmp(normalization,"SCATTER", 7)==0) 00686 { 00687 SG_INFO("Scatter Normalization selected\n"); 00688 CLabels* train_labels=ui->ui_labels->get_train_labels(); 00689 ASSERT(train_labels); 00690 return k->set_normalizer(new CScatterKernelNormalizer(c,r, train_labels)); 00691 } 00692 else if (strncmp(normalization,"ZEROMEANCENTER", 13)==0) 00693 { 00694 SG_INFO("Zero Mean Center Normalization selected\n"); 00695 return k->set_normalizer(new CZeroMeanCenterKernelNormalizer()); 00696 } 00697 else 00698 SG_ERROR("Wrong kernel normalizer name.\n"); 00699 00700 SG_UNREF(k); 00701 00702 return false; 00703 } 00704 00705 bool CGUIKernel::set_kernel(CKernel* kern) 00706 { 00707 if (kern) 00708 { 00709 SG_DEBUG("deleting old kernel (%p).\n", kernel); 00710 SG_UNREF(kernel); 00711 SG_REF(kern); 00712 kernel=kern; 00713 SG_DEBUG("set new kernel (%p).\n", kern); 00714 00715 return true; 00716 } 00717 else 00718 return false; 00719 } 00720 00721 bool CGUIKernel::init_kernel_optimization() 00722 { 00723 CSVM* svm=(CSVM*) ui->ui_classifier->get_classifier(); 00724 if (svm) 00725 { 00726 if (kernel->has_property(KP_LINADD)) 00727 { 00728 int32_t num_sv=svm->get_num_support_vectors(); 00729 int32_t* sv_idx=SG_MALLOC(int32_t, num_sv); 00730 float64_t* sv_weight=SG_MALLOC(float64_t, num_sv); 00731 00732 for (int32_t i=0; i<num_sv; i++) 00733 { 00734 sv_idx[i]=svm->get_support_vector(i); 00735 sv_weight[i]=svm->get_alpha(i); 00736 } 00737 00738 bool ret=kernel->init_optimization(num_sv, sv_idx, sv_weight); 00739 00740 SG_FREE(sv_idx); 00741 SG_FREE(sv_weight); 00742 00743 if (!ret) 00744 SG_ERROR("Initialization of kernel optimization failed\n"); 00745 return ret; 00746 } 00747 } 00748 else 00749 SG_ERROR("Create SVM first!\n"); 00750 00751 return true; 00752 } 00753 00754 bool CGUIKernel::delete_kernel_optimization() 00755 { 00756 if (kernel && kernel->has_property(KP_LINADD) && kernel->get_is_initialized()) 00757 kernel->delete_optimization(); 00758 00759 return true; 00760 } 00761 00762 00763 bool CGUIKernel::init_kernel(const char* target) 00764 { 00765 if (!kernel) 00766 SG_ERROR("No kernel available.\n"); 00767 00768 // no need to init custom kernel 00769 if (kernel->get_kernel_type() == K_CUSTOM || !target) 00770 { 00771 initialized=true; 00772 return true; 00773 } 00774 00775 EFeatureClass k_fclass=kernel->get_feature_class(); 00776 EFeatureType k_ftype=kernel->get_feature_type(); 00777 00778 if (!strncmp(target, "TRAIN", 5)) 00779 { 00780 CFeatures* train=ui->ui_features->get_train_features(); 00781 00782 if (train) 00783 { 00784 EFeatureClass fclass=train->get_feature_class(); 00785 EFeatureType ftype=train->get_feature_type(); 00786 if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) && 00787 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY)) 00788 00789 { 00790 SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train); 00791 kernel->init(train, train); 00792 initialized=true; 00793 } 00794 else 00795 SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype); 00796 } 00797 else 00798 SG_DEBUG("Not initing kernel - no train features assigned.\n"); 00799 } 00800 else if (!strncmp(target, "TEST", 4)) 00801 { 00802 CFeatures* train=ui->ui_features->get_train_features(); 00803 CFeatures* test=ui->ui_features->get_test_features(); 00804 if (train && test) 00805 { 00806 EFeatureClass fclass=test->get_feature_class(); 00807 EFeatureType ftype=test->get_feature_type(); 00808 if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) && 00809 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY)) 00810 00811 { 00812 if (!initialized) 00813 { 00814 EFeatureClass tr_fclass=train->get_feature_class(); 00815 EFeatureType tr_ftype=train->get_feature_type(); 00816 if ((k_fclass==tr_fclass || k_fclass==C_ANY || tr_fclass==C_ANY) && 00817 (k_ftype==tr_ftype || k_ftype==F_ANY || tr_ftype==F_ANY)) 00818 { 00819 SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train); 00820 kernel->init(train, train); 00821 initialized=true; 00822 } 00823 else 00824 SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype); 00825 } 00826 00827 SG_INFO("Initialising kernel with TEST DATA, train: %p test %p\n", train, test); 00828 // lhs -> always train_features; rhs -> always test_features 00829 kernel->init(train, test); 00830 } 00831 else 00832 SG_ERROR("Kernel can not process this test feature type: %d %d.\n", fclass, ftype); 00833 } 00834 else 00835 SG_DEBUG("Not initing kernel - no train and test features assigned.\n"); 00836 } 00837 else 00838 SG_ERROR("Unknown target %s.\n", target); 00839 00840 return true; 00841 } 00842 00843 bool CGUIKernel::save_kernel(char* filename) 00844 { 00845 if (kernel && initialized) 00846 { 00847 CAsciiFile* file=new CAsciiFile(filename); 00848 try 00849 { 00850 kernel->save(file); 00851 } 00852 catch (...) 00853 { 00854 SG_ERROR("Writing to file %s failed!\n", filename); 00855 } 00856 00857 SG_UNREF(file); 00858 SG_INFO("Successfully written kernel to \"%s\" !\n", filename); 00859 return true; 00860 } 00861 else 00862 SG_ERROR("No kernel set / kernel not initialized!\n"); 00863 00864 return false; 00865 } 00866 00867 bool CGUIKernel::add_kernel(CKernel* kern, float64_t weight) 00868 { 00869 if (!kern) 00870 SG_ERROR("Given kernel to add is invalid.\n"); 00871 00872 if (!kernel) 00873 { 00874 kernel= new CCombinedKernel(20, false); 00875 SG_REF(kernel); 00876 } 00877 00878 if (kernel->get_kernel_type()!=K_COMBINED) 00879 { 00880 CKernel* first_elem=kernel; 00881 kernel= new CCombinedKernel(20, false); 00882 SG_REF(kernel); 00883 ((CCombinedKernel*) kernel)->append_kernel(first_elem); 00884 } 00885 00886 if (!kernel) 00887 SG_ERROR("Combined kernel object could not be created.\n"); 00888 00889 kern->set_combined_kernel_weight(weight); 00890 00891 bool success=((CCombinedKernel*) kernel)->append_kernel(kern); 00892 00893 initialized=true; 00894 if (success) 00895 ((CCombinedKernel*) kernel)->list_kernels(); 00896 else 00897 SG_ERROR("Adding of kernel failed.\n"); 00898 00899 return success; 00900 } 00901 00902 00903 bool CGUIKernel::del_last_kernel() 00904 { 00905 if (!kernel) 00906 SG_ERROR("No kernel available.\n"); 00907 00908 if (kernel->get_kernel_type()!=K_COMBINED) 00909 SG_ERROR("Need a combined kernel for deleting the last kernel in it.\n"); 00910 00911 CKernel* last=((CCombinedKernel*) kernel)->get_last_kernel(); 00912 if (last) 00913 return ((CCombinedKernel*) kernel)->delete_kernel(); 00914 else 00915 SG_ERROR("No kernel available to delete.\n"); 00916 00917 return false; 00918 } 00919 00920 bool CGUIKernel::clean_kernel() 00921 { 00922 SG_UNREF(kernel); 00923 kernel=NULL; 00924 return true; 00925 } 00926 00927 00928 00929 bool CGUIKernel::set_optimization_type(char* opt_type) 00930 { 00931 EOptimizationType opt=SLOWBUTMEMEFFICIENT; 00932 if (!kernel) 00933 SG_ERROR("No kernel available.\n"); 00934 00935 if (strncmp(opt_type, "FASTBUTMEMHUNGRY", 16)==0) 00936 { 00937 SG_INFO("FAST METHOD selected\n"); 00938 opt=FASTBUTMEMHUNGRY; 00939 kernel->set_optimization_type(opt); 00940 00941 return true; 00942 } 00943 else if (strncmp(opt_type,"SLOWBUTMEMEFFICIENT", 19)==0) 00944 { 00945 SG_INFO("MEMORY EFFICIENT METHOD selected\n"); 00946 opt=SLOWBUTMEMEFFICIENT; 00947 kernel->set_optimization_type(opt); 00948 00949 return true; 00950 } 00951 else 00952 SG_ERROR("Wrong kernel optimization type.\n"); 00953 00954 return false; 00955 } 00956 00957 bool CGUIKernel::precompute_subkernels() 00958 { 00959 if (!kernel) 00960 SG_ERROR("No kernel available.\n"); 00961 00962 if (kernel->get_kernel_type()!=K_COMBINED) 00963 SG_ERROR("Not a combined kernel.\n"); 00964 00965 return ((CCombinedKernel*) kernel)->precompute_subkernels(); 00966 }