SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/config.h>
00013 #include <shogun/lib/common.h>
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/io/File.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/lib/Signal.h>
00018 
00019 #include <shogun/base/Parallel.h>
00020 
00021 #include <shogun/kernel/Kernel.h>
00022 #include <shogun/kernel/IdentityKernelNormalizer.h>
00023 #include <shogun/features/Features.h>
00024 #include <shogun/base/Parameter.h>
00025 
00026 #include <shogun/classifier/svm/SVM.h>
00027 
00028 #include <string.h>
00029 #include <unistd.h>
00030 #include <math.h>
00031 
00032 #ifdef HAVE_PTHREAD
00033 #include <pthread.h>
00034 #endif
00035 
00036 using namespace shogun;
00037 
00038 CKernel::CKernel() : CSGObject()
00039 {
00040     init();
00041     register_params();
00042 }
00043 
00044 CKernel::CKernel(int32_t size) : CSGObject()
00045 {
00046     init();
00047     
00048     if (size<10)
00049         size=10;
00050 
00051     cache_size=size;
00052     register_params();
00053 }
00054 
00055 
00056 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00057 {
00058     init();
00059 
00060     if (size<10)
00061         size=10;
00062 
00063     cache_size=size;
00064 
00065     set_normalizer(new CIdentityKernelNormalizer());
00066     init(p_lhs, p_rhs);
00067     register_params();
00068 }
00069 
00070 CKernel::~CKernel()
00071 {
00072     if (get_is_initialized())
00073         SG_ERROR("Kernel still initialized on destruction.\n");
00074 
00075     remove_lhs_and_rhs();
00076     SG_UNREF(normalizer);
00077 
00078     SG_INFO("Kernel deleted (%p).\n", this);
00079 }
00080 
00081 
00082 
00083 bool CKernel::init(CFeatures* l, CFeatures* r)
00084 {
00085     //make sure features were indeed supplied
00086     ASSERT(l);
00087     ASSERT(r);
00088 
00089     //make sure features are compatible
00090     ASSERT(l->get_feature_class()==r->get_feature_class());
00091     ASSERT(l->get_feature_type()==r->get_feature_type());
00092 
00093     //remove references to previous features
00094     remove_lhs_and_rhs();
00095 
00096     //increase reference counts
00097     SG_REF(l);
00098     if (l==r)
00099         lhs_equals_rhs=true;
00100     else // l!=r
00101         SG_REF(r);
00102 
00103     lhs=l;
00104     rhs=r;
00105 
00106     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00107     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00108 
00109     num_lhs=l->get_num_vectors();
00110     num_rhs=r->get_num_vectors();
00111 
00112     return true;
00113 }
00114 
00115 bool CKernel::set_normalizer(CKernelNormalizer* n)
00116 {
00117     SG_REF(n);
00118     if (lhs && rhs)
00119         n->init(this);
00120 
00121     SG_UNREF(normalizer);
00122     normalizer=n;
00123 
00124     return (normalizer!=NULL);
00125 }
00126 
00127 CKernelNormalizer* CKernel::get_normalizer()
00128 {
00129     SG_REF(normalizer)
00130     return normalizer;
00131 }
00132 
00133 bool CKernel::init_normalizer()
00134 {
00135     return normalizer->init(this);
00136 }
00137 
00138 void CKernel::cleanup()
00139 {
00140     remove_lhs_and_rhs();
00141 }
00142 
00143 
00144 
00145 void CKernel::load(CFile* loader)
00146 {
00147     SG_SET_LOCALE_C;
00148     SG_RESET_LOCALE;
00149 }
00150 
00151 void CKernel::save(CFile* writer)
00152 {
00153     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00154     SG_SET_LOCALE_C;
00155     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00156     SG_FREE(k_matrix.matrix);
00157     SG_RESET_LOCALE;
00158 }
00159 
00160 void CKernel::remove_lhs_and_rhs()
00161 {
00162     if (rhs!=lhs)
00163         SG_UNREF(rhs);
00164     rhs = NULL;
00165     num_rhs=0;
00166 
00167     SG_UNREF(lhs);
00168     lhs = NULL;
00169     num_lhs=0;
00170     lhs_equals_rhs=false;
00171 
00172 
00173 }
00174 
00175 void CKernel::remove_lhs()
00176 {
00177     if (rhs==lhs)
00178         rhs=NULL;
00179     SG_UNREF(lhs);
00180     lhs = NULL;
00181     num_lhs=0;
00182     lhs_equals_rhs=false;
00183 
00184 }
00185 
00187 void CKernel::remove_rhs()
00188 {
00189     if (rhs!=lhs)
00190         SG_UNREF(rhs);
00191     rhs = NULL;
00192     num_rhs=0;
00193     lhs_equals_rhs=false;
00194 
00195 
00196 }
00197 
00198 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00199 
00200 void CKernel::list_kernel()
00201 {
00202     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00203             get_combined_kernel_weight(),
00204             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00205             "SLOWBUTMEMEFFICIENT");
00206 
00207     switch (get_kernel_type())
00208     {
00209         ENUM_CASE(K_UNKNOWN)
00210         ENUM_CASE(K_LINEAR)
00211         ENUM_CASE(K_POLY)
00212         ENUM_CASE(K_GAUSSIAN)
00213         ENUM_CASE(K_GAUSSIANSHIFT)
00214         ENUM_CASE(K_GAUSSIANMATCH)
00215         ENUM_CASE(K_HISTOGRAM)
00216         ENUM_CASE(K_SALZBERG)
00217         ENUM_CASE(K_LOCALITYIMPROVED)
00218         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00219         ENUM_CASE(K_FIXEDDEGREE)
00220         ENUM_CASE(K_WEIGHTEDDEGREE)
00221         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00222         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00223         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00224         ENUM_CASE(K_POLYMATCH)
00225         ENUM_CASE(K_ALIGNMENT)
00226         ENUM_CASE(K_COMMWORDSTRING)
00227         ENUM_CASE(K_COMMULONGSTRING)
00228         ENUM_CASE(K_SPECTRUMRBF)
00229         ENUM_CASE(K_COMBINED)
00230         ENUM_CASE(K_AUC)
00231         ENUM_CASE(K_CUSTOM)
00232         ENUM_CASE(K_SIGMOID)
00233         ENUM_CASE(K_CHI2)
00234         ENUM_CASE(K_DIAG)
00235         ENUM_CASE(K_CONST)
00236         ENUM_CASE(K_DISTANCE)
00237         ENUM_CASE(K_LOCALALIGNMENT)
00238         ENUM_CASE(K_PYRAMIDCHI2)
00239         ENUM_CASE(K_OLIGO)
00240         ENUM_CASE(K_MATCHWORD)
00241         ENUM_CASE(K_TPPK)
00242         ENUM_CASE(K_REGULATORYMODULES)
00243         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00244         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00245         ENUM_CASE(K_WAVELET)
00246         ENUM_CASE(K_WAVE)
00247         ENUM_CASE(K_CAUCHY)
00248         ENUM_CASE(K_TSTUDENT)
00249         ENUM_CASE(K_MULTIQUADRIC)
00250         ENUM_CASE(K_EXPONENTIAL)
00251         ENUM_CASE(K_RATIONAL_QUADRATIC)
00252         ENUM_CASE(K_POWER)
00253         ENUM_CASE(K_SPHERICAL)
00254         ENUM_CASE(K_LOG)
00255         ENUM_CASE(K_SPLINE)
00256         ENUM_CASE(K_ANOVA)
00257         ENUM_CASE(K_CIRCULAR)
00258         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00259         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00260         ENUM_CASE(K_DISTANTSEGMENTS)
00261         ENUM_CASE(K_BESSEL)
00262     }
00263 
00264     switch (get_feature_class())
00265     {
00266         ENUM_CASE(C_UNKNOWN)
00267         ENUM_CASE(C_SIMPLE)
00268         ENUM_CASE(C_SPARSE)
00269         ENUM_CASE(C_STRING)
00270         ENUM_CASE(C_STREAMING_SIMPLE)
00271         ENUM_CASE(C_STREAMING_SPARSE)
00272         ENUM_CASE(C_STREAMING_STRING)
00273         ENUM_CASE(C_STREAMING_VW)
00274         ENUM_CASE(C_COMBINED)
00275         ENUM_CASE(C_COMBINED_DOT)
00276         ENUM_CASE(C_WD)
00277         ENUM_CASE(C_SPEC)
00278         ENUM_CASE(C_WEIGHTEDSPEC)
00279         ENUM_CASE(C_POLY)
00280         ENUM_CASE(C_ANY)
00281     }
00282 
00283     switch (get_feature_type())
00284     {
00285         ENUM_CASE(F_UNKNOWN)
00286         ENUM_CASE(F_BOOL)
00287         ENUM_CASE(F_CHAR)
00288         ENUM_CASE(F_BYTE)
00289         ENUM_CASE(F_SHORT)
00290         ENUM_CASE(F_WORD)
00291         ENUM_CASE(F_INT)
00292         ENUM_CASE(F_UINT)
00293         ENUM_CASE(F_LONG)
00294         ENUM_CASE(F_ULONG)
00295         ENUM_CASE(F_SHORTREAL)
00296         ENUM_CASE(F_DREAL)
00297         ENUM_CASE(F_LONGREAL)
00298         ENUM_CASE(F_ANY)
00299     }
00300     SG_INFO( "\n");
00301 }
00302 #undef ENUM_CASE
00303 
00304 bool CKernel::init_optimization(
00305     int32_t count, int32_t *IDX, float64_t * weights)
00306 {
00307    SG_ERROR( "kernel does not support linadd optimization\n");
00308     return false ;
00309 }
00310 
00311 bool CKernel::delete_optimization()
00312 {
00313    SG_ERROR( "kernel does not support linadd optimization\n");
00314     return false;
00315 }
00316 
00317 float64_t CKernel::compute_optimized(int32_t vector_idx)
00318 {
00319    SG_ERROR( "kernel does not support linadd optimization\n");
00320     return 0;
00321 }
00322 
00323 void CKernel::compute_batch(
00324     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00325     int32_t* IDX, float64_t* weights, float64_t factor)
00326 {
00327    SG_ERROR( "kernel does not support batch computation\n");
00328 }
00329 
00330 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00331 {
00332    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00333 }
00334 
00335 void CKernel::clear_normal()
00336 {
00337    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00338 }
00339 
00340 int32_t CKernel::get_num_subkernels()
00341 {
00342     return 1;
00343 }
00344 
00345 void CKernel::compute_by_subkernel(
00346     int32_t vector_idx, float64_t * subkernel_contrib)
00347 {
00348    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00349 }
00350 
00351 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00352 {
00353     num_weights=1 ;
00354     return &combined_kernel_weight ;
00355 }
00356 
00357 void CKernel::set_subkernel_weights(SGVector<float64_t> weights)
00358 {
00359     ASSERT(weights.vector);
00360     if (weights.vlen!=1)
00361       SG_ERROR( "number of subkernel weights should be one ...\n");
00362 
00363     combined_kernel_weight = weights.vector[0] ;
00364 }
00365 
00366 bool CKernel::init_optimization_svm(CSVM * svm)
00367 {
00368     int32_t num_suppvec=svm->get_num_support_vectors();
00369     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00370     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00371 
00372     for (int32_t i=0; i<num_suppvec; i++)
00373     {
00374         sv_idx[i]    = svm->get_support_vector(i);
00375         sv_weight[i] = svm->get_alpha(i);
00376     }
00377     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00378 
00379     SG_FREE(sv_idx);
00380     SG_FREE(sv_weight);
00381     return ret;
00382 }
00383 
00384 void CKernel::load_serializable_post() throw (ShogunException)
00385 {
00386     CSGObject::load_serializable_post();
00387     if (lhs_equals_rhs)
00388         rhs=lhs;
00389 }
00390 
00391 void CKernel::save_serializable_pre() throw (ShogunException)
00392 {
00393     CSGObject::save_serializable_pre();
00394 
00395     if (lhs_equals_rhs)
00396         rhs=NULL;
00397 }
00398 
00399 void CKernel::save_serializable_post() throw (ShogunException)
00400 {
00401     CSGObject::save_serializable_post();
00402 
00403     if (lhs_equals_rhs)
00404         rhs=lhs;
00405 }
00406 
00407 void CKernel::register_params()   {
00408     m_parameters->add(&cache_size, "cache_size",
00409                       "Cache size in MB.");
00410     m_parameters->add((CSGObject**) &lhs, "lhs",
00411                       "Feature vectors to occur on left hand side.");
00412     m_parameters->add((CSGObject**) &rhs, "rhs",
00413                       "Feature vectors to occur on right hand side.");
00414     m_parameters->add(&lhs_equals_rhs, "lhs_equals_rhs",
00415                       "If features on lhs are the same as on rhs.");
00416     m_parameters->add(&num_lhs, "num_lhs",
00417                       "Number of feature vectors on left hand side.");
00418     m_parameters->add(&num_rhs, "num_rhs",
00419                       "Number of feature vectors on right hand side.");
00420     m_parameters->add(&combined_kernel_weight, "combined_kernel_weight",
00421                       "Combined kernel weight.");
00422     m_parameters->add(&optimization_initialized,
00423                       "optimization_initialized",
00424                       "Optimization is initialized.");
00425     m_parameters->add((machine_int_t*) &opt_type, "opt_type",
00426                       "Optimization type.");
00427     m_parameters->add(&properties, "properties",
00428                       "Kernel properties.");
00429     m_parameters->add((CSGObject**) &normalizer, "normalizer",
00430                       "Normalize the kernel.");
00431 }
00432 
00433 
00434 void CKernel::init()
00435 {
00436     cache_size=10;
00437     kernel_matrix=NULL;
00438     lhs=NULL;
00439     rhs=NULL;
00440     num_lhs=0;
00441     num_rhs=0;
00442     combined_kernel_weight=1;
00443     optimization_initialized=false;
00444     opt_type=FASTBUTMEMHUNGRY;
00445     properties=KP_NONE;
00446     normalizer=NULL;
00447 
00448 
00449 
00450     set_normalizer(new CIdentityKernelNormalizer());
00451 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation