SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014 
00015 #include <shogun/lib/common.h>
00016 #include <shogun/lib/Signal.h>
00017 #include <shogun/io/File.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/features/FeatureTypes.h>
00020 #include <shogun/base/SGObject.h>
00021 #include <shogun/features/Features.h>
00022 #include <shogun/kernel/KernelNormalizer.h>
00023 
00024 #include <vector>
00025 
00026 namespace shogun
00027 {
00028     class CFile;
00029     class CFeatures;
00030     class CKernelNormalizer;
00031 
00032 #ifdef USE_SHORTREAL_KERNELCACHE
00033 
00034     typedef float32_t KERNELCACHE_ELEM;
00035 #else
00036 
00037     typedef float64_t KERNELCACHE_ELEM;
00038 #endif
00039 
00041 typedef int64_t KERNELCACHE_IDX;
00042 
00043 
00045 enum EOptimizationType
00046 {
00047     FASTBUTMEMHUNGRY,
00048     SLOWBUTMEMEFFICIENT
00049 };
00050 
00052 enum EKernelType
00053 {
00054     K_UNKNOWN = 0,
00055     K_LINEAR = 10,
00056     K_POLY = 20,
00057     K_GAUSSIAN = 30,
00058     K_GAUSSIANSHIFT = 32,
00059     K_GAUSSIANMATCH = 33,
00060     K_HISTOGRAM = 40,
00061     K_SALZBERG = 41,
00062     K_LOCALITYIMPROVED = 50,
00063     K_SIMPLELOCALITYIMPROVED = 60,
00064     K_FIXEDDEGREE = 70,
00065     K_WEIGHTEDDEGREE =    80,
00066     K_WEIGHTEDDEGREEPOS = 81,
00067     K_WEIGHTEDDEGREERBF = 82,
00068     K_WEIGHTEDCOMMWORDSTRING = 90,
00069     K_POLYMATCH = 100,
00070     K_ALIGNMENT = 110,
00071     K_COMMWORDSTRING = 120,
00072     K_COMMULONGSTRING = 121,
00073     K_SPECTRUMRBF = 122,
00074     K_SPECTRUMMISMATCHRBF = 123,
00075     K_COMBINED = 140,
00076     K_AUC = 150,
00077     K_CUSTOM = 160,
00078     K_SIGMOID = 170,
00079     K_CHI2 = 180,
00080     K_DIAG = 190,
00081     K_CONST = 200,
00082     K_DISTANCE = 220,
00083     K_LOCALALIGNMENT = 230,
00084     K_PYRAMIDCHI2 = 240,
00085     K_OLIGO = 250,
00086     K_MATCHWORD = 260,
00087     K_TPPK = 270,
00088     K_REGULATORYMODULES = 280,
00089     K_SPARSESPATIALSAMPLE = 290,
00090     K_HISTOGRAMINTERSECTION = 300,
00091     K_WAVELET = 310,
00092     K_WAVE = 320,
00093     K_CAUCHY = 330,
00094     K_TSTUDENT = 340,
00095     K_RATIONAL_QUADRATIC = 350,
00096     K_MULTIQUADRIC = 360,
00097     K_EXPONENTIAL = 370,
00098     K_SPHERICAL = 380,
00099     K_SPLINE = 390,
00100     K_ANOVA = 400,
00101     K_POWER = 410,
00102     K_LOG = 420,
00103     K_CIRCULAR = 430,
00104     K_INVERSEMULTIQUADRIC = 440,
00105     K_DISTANTSEGMENTS = 450,
00106     K_BESSEL = 460,
00107 };
00108 
00110 enum EKernelProperty
00111 {
00112     KP_NONE = 0,
00113     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00114     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00115     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00116 };
00117 
00118 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00119 
00120 template <class T> struct K_THREAD_PARAM
00121 {
00123     CKernel* kernel;
00125     int32_t start;
00127     int32_t end;
00129     int32_t total_start;
00131     int32_t total_end;
00133     int32_t m;
00135     int32_t n;
00137     T* result;
00139     bool symmetric;
00141     bool verbose;
00142 };
00143 #endif
00144 
00145 class CSVM;
00146 
00172 class CKernel : public CSGObject
00173 {
00174     friend class CVarianceKernelNormalizer;
00175     friend class CSqrtDiagKernelNormalizer;
00176     friend class CAvgDiagKernelNormalizer;
00177     friend class CRidgeKernelNormalizer;
00178     friend class CFirstElementKernelNormalizer;
00179     friend class CMultitaskKernelNormalizer;
00180     friend class CMultitaskKernelMklNormalizer;
00181     friend class CMultitaskKernelMaskNormalizer;
00182     friend class CMultitaskKernelMaskPairNormalizer;
00183     friend class CTanimotoKernelNormalizer;
00184     friend class CDiceKernelNormalizer;
00185     friend class CZeroMeanCenterKernelNormalizer;
00186 
00187     public:
00188 
00192         CKernel();
00193 
00194 
00199         CKernel(int32_t size);
00200 
00207         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00208 
00209         virtual ~CKernel();
00210 
00218         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00219         {
00220             if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
00221             {
00222                 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00223                         idx_a,num_lhs, idx_b,num_rhs);
00224             }
00225 
00226             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00227         }
00228 
00233         SGMatrix<float64_t> get_kernel_matrix()
00234         {
00235             return get_kernel_matrix<float64_t>();
00236         }
00237 
00243         virtual SGVector<float64_t> get_kernel_col(int32_t j)
00244         {
00245 
00246             SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00247 
00248             for (int32_t i=0; i!=num_rhs; i++)
00249                 col[i] = kernel(i,j);
00250 
00251             return col;
00252         }
00253 
00254 
00260         virtual SGVector<float64_t> get_kernel_row(int32_t i)
00261         {
00262             SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00263 
00264             for (int32_t j=0; j!=num_lhs; j++)
00265                 row[j] = kernel(i,j);
00266 
00267             return row;
00268         }
00269 
00274         template <class T>
00275         SGMatrix<T> get_kernel_matrix()
00276         {
00277             T* result = NULL;
00278 
00279             if (!has_features())
00280                 SG_ERROR( "no features assigned to kernel\n");
00281 
00282             int32_t m=get_num_vec_lhs();
00283             int32_t n=get_num_vec_rhs();
00284 
00285             int64_t total_num = int64_t(m)*n;
00286 
00287             // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
00288             bool symmetric= (lhs && lhs==rhs && m==n);
00289 
00290             SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00291 
00292             result=SG_MALLOC(T, total_num);
00293 
00294             int32_t num_threads=parallel->get_num_threads();
00295             if (num_threads < 2)
00296             {
00297                 K_THREAD_PARAM<T> params;
00298                 params.kernel=this;
00299                 params.result=result;
00300                 params.start=0;
00301                 params.end=m;
00302                 params.total_start=0;
00303                 params.total_end=total_num;
00304                 params.n=n;
00305                 params.m=m;
00306                 params.symmetric=symmetric;
00307                 params.verbose=true;
00308                 get_kernel_matrix_helper<T>((void*) &params);
00309             }
00310             else
00311             {
00312                 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00313                 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00314                 int64_t step= total_num/num_threads;
00315 
00316                 int32_t t;
00317 
00318                 num_threads--;
00319                 for (t=0; t<num_threads; t++)
00320                 {
00321                     params[t].kernel = this;
00322                     params[t].result = result;
00323                     params[t].start = compute_row_start(t*step, n, symmetric);
00324                     params[t].end = compute_row_start((t+1)*step, n, symmetric);
00325                     params[t].total_start=t*step;
00326                     params[t].total_end=(t+1)*step;
00327                     params[t].n=n;
00328                     params[t].m=m;
00329                     params[t].symmetric=symmetric;
00330                     params[t].verbose=false;
00331 
00332                     int code=pthread_create(&threads[t], NULL,
00333                             CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
00334 
00335                     if (code != 0)
00336                     {
00337                         SG_WARNING("Thread creation failed (thread %d of %d) "
00338                                 "with error:'%s'\n",t, num_threads, strerror(code));
00339                         num_threads=t;
00340                         break;
00341                     }
00342                 }
00343 
00344                 params[t].kernel = this;
00345                 params[t].result = result;
00346                 params[t].start = compute_row_start(t*step, n, symmetric);
00347                 params[t].end = m;
00348                 params[t].total_start=t*step;
00349                 params[t].total_end=total_num;
00350                 params[t].n=n;
00351                 params[t].m=m;
00352                 params[t].symmetric=symmetric;
00353                 params[t].verbose=true;
00354                 get_kernel_matrix_helper<T>(&params[t]);
00355 
00356                 for (t=0; t<num_threads; t++)
00357                 {
00358                     if (pthread_join(threads[t], NULL) != 0)
00359                         SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00360                 }
00361 
00362                 SG_FREE(params);
00363                 SG_FREE(threads);
00364             }
00365 
00366             SG_DONE();
00367 
00368             return SGMatrix<T>(result,m,n,true);
00369         }
00370 
00371 
00382         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00383 
00388         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00389 
00394         virtual CKernelNormalizer* get_normalizer();
00395 
00399         virtual bool init_normalizer();
00400 
00407         virtual void cleanup();
00408 
00413         void load(CFile* loader);
00414 
00419         void save(CFile* writer);
00420 
00425         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00426 
00431         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00432 
00437         virtual inline int32_t get_num_vec_lhs()
00438         {
00439             return num_lhs;
00440         }
00441 
00446         virtual inline int32_t get_num_vec_rhs()
00447         {
00448             return num_rhs;
00449         }
00450 
00455         virtual inline bool has_features()
00456         {
00457             return lhs && rhs;
00458         }
00459 
00464         inline bool get_lhs_equals_rhs()
00465         {
00466             return lhs_equals_rhs;
00467         }
00468 
00470         virtual void remove_lhs_and_rhs();
00471 
00473         virtual void remove_lhs();
00474 
00476         virtual void remove_rhs();
00477 
00485         virtual EKernelType get_kernel_type()=0 ;
00486 
00493         virtual EFeatureType get_feature_type()=0;
00494 
00501         virtual EFeatureClass get_feature_class()=0;
00502 
00507         inline void set_cache_size(int32_t size)
00508         {
00509             cache_size = size;
00510 
00511         }
00512 
00517         inline int32_t get_cache_size() { return cache_size; }
00518 
00519 
00520 
00522         void list_kernel();
00523 
00529         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00530 
00534         virtual void clear_normal();
00535 
00541         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00542 
00547         inline EOptimizationType get_optimization_type() { return opt_type; }
00548 
00553         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00554 
00559         inline bool get_is_initialized() { return optimization_initialized; }
00560 
00568         virtual bool init_optimization(
00569             int32_t count, int32_t *IDX, float64_t *weights);
00570 
00575         virtual bool delete_optimization();
00576 
00582         bool init_optimization_svm(CSVM * svm) ;
00583 
00589         virtual float64_t compute_optimized(int32_t vector_idx);
00590 
00599         virtual void compute_batch(
00600             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00601             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00602             float64_t factor=1.0);
00603 
00608         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00609 
00614         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00615 
00620         virtual int32_t get_num_subkernels();
00621 
00627         virtual void compute_by_subkernel(
00628             int32_t vector_idx, float64_t * subkernel_contrib);
00629 
00635         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00636 
00641         virtual void set_subkernel_weights(SGVector<float64_t> weights);
00642 
00643     protected:
00648         inline void set_property(EKernelProperty p)
00649         {
00650             properties |= p;
00651         }
00652 
00657         inline void unset_property(EKernelProperty p)
00658         {
00659             properties &= (properties | p) ^ p;
00660         }
00661 
00666         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00667 
00678         virtual float64_t compute(int32_t x, int32_t y)=0;
00679 
00686         int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00687         {
00688             int32_t i_start;
00689 
00690             if (symmetric)
00691                 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00692             else
00693                 i_start=(int32_t) (offs/int64_t(n));
00694 
00695             return i_start;
00696         }
00697 
00702         template <class T>
00703         static void* get_kernel_matrix_helper(void* p)
00704         {
00705             K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00706             int32_t i_start=params->start;
00707             int32_t i_end=params->end;
00708             CKernel* k=params->kernel;
00709             T* result=params->result;
00710             bool symmetric=params->symmetric;
00711             int32_t n=params->n;
00712             int32_t m=params->m;
00713             bool verbose=params->verbose;
00714             int64_t total_start=params->total_start;
00715             int64_t total_end=params->total_end;
00716             int64_t total=total_start;
00717 
00718             for (int32_t i=i_start; i<i_end; i++)
00719             {
00720                 int32_t j_start=0;
00721 
00722                 if (symmetric)
00723                     j_start=i;
00724 
00725                 for (int32_t j=j_start; j<n; j++)
00726                 {
00727                     float64_t v=k->kernel(i,j);
00728                     result[i+j*m]=v;
00729 
00730                     if (symmetric && i!=j)
00731                         result[j+i*m]=v;
00732 
00733                     if (verbose)
00734                     {
00735                         total++;
00736 
00737                         if (symmetric && i!=j)
00738                             total++;
00739 
00740                         if (total%100 == 0)
00741                             k->SG_PROGRESS(total, total_start, total_end);
00742 
00743                         if (CSignal::cancel_computations())
00744                             break;
00745                     }
00746                 }
00747 
00748             }
00749 
00750             return NULL;
00751         }
00752 
00761         virtual void load_serializable_post() throw (ShogunException);
00762 
00771         virtual void save_serializable_pre() throw (ShogunException);
00772 
00781         virtual void save_serializable_post() throw (ShogunException);
00786         virtual void register_params();
00787 
00788     private:
00791         void init();
00792 
00793 
00794 
00796 
00797     protected:
00799         int32_t cache_size;
00800 
00801 
00802 
00805         KERNELCACHE_ELEM* kernel_matrix;
00806 
00808         CFeatures* lhs;
00810         CFeatures* rhs;
00811 
00813         bool lhs_equals_rhs;
00814 
00816         int32_t num_lhs;
00818         int32_t num_rhs;
00819 
00821         float64_t combined_kernel_weight;
00822 
00824         bool optimization_initialized;
00828         EOptimizationType opt_type;
00829 
00831         uint64_t  properties;
00832 
00835         CKernelNormalizer* normalizer;
00836 };
00837 
00838 }
00839 #endif /* _KERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation