SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Soeren Sonnenburg 00008 * Copyright (C) 2011 Berlin Institute of Technology 00009 */ 00010 00011 #include <shogun/preprocessor/KernelPCA.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/lib/config.h> 00014 #include <shogun/mathematics/Math.h> 00015 00016 #include <string.h> 00017 #include <stdlib.h> 00018 00019 #include <shogun/mathematics/lapack.h> 00020 #include <shogun/lib/common.h> 00021 #include <shogun/kernel/Kernel.h> 00022 #include <shogun/preprocessor/DimensionReductionPreprocessor.h> 00023 #include <shogun/features/Features.h> 00024 #include <shogun/features/SimpleFeatures.h> 00025 #include <shogun/io/SGIO.h> 00026 00027 using namespace shogun; 00028 00029 CKernelPCA::CKernelPCA() : CDimensionReductionPreprocessor() 00030 { 00031 init(); 00032 } 00033 00034 CKernelPCA::CKernelPCA(CKernel* k) : CDimensionReductionPreprocessor() 00035 { 00036 init(); 00037 set_kernel(k); 00038 } 00039 00040 void CKernelPCA::init() 00041 { 00042 m_initialized = false; 00043 m_init_features = NULL; 00044 m_transformation_matrix = SGMatrix<float64_t>(NULL, 0, 0, false); 00045 m_bias_vector = SGVector<float64_t>(NULL, 0, false); 00046 00047 m_parameters->add(&m_transformation_matrix, "transformation matrix", 00048 "matrix used to transform data"); 00049 m_parameters->add(&m_bias_vector, "bias vector", 00050 "bias vector used to transform data"); 00051 } 00052 00053 void CKernelPCA::cleanup() 00054 { 00055 m_transformation_matrix.destroy_matrix(); 00056 m_bias_vector.destroy_vector(); 00057 if (m_init_features) SG_UNREF(m_init_features); 00058 m_initialized = false; 00059 } 00060 00061 CKernelPCA::~CKernelPCA() 00062 { 00063 m_transformation_matrix.destroy_matrix(); 00064 m_bias_vector.destroy_vector(); 00065 if (m_init_features) SG_UNREF(m_init_features); 00066 } 00067 00068 bool CKernelPCA::init(CFeatures* features) 00069 { 00070 if (!m_initialized && m_kernel) 00071 { 00072 SG_REF(features); 00073 m_init_features = features; 00074 00075 m_kernel->init(features,features); 00076 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix(); 00077 m_kernel->cleanup(); 00078 int32_t n = kernel_matrix.num_cols; 00079 int32_t m = kernel_matrix.num_rows; 00080 ASSERT(n==m); 00081 00082 float64_t* bias_tmp = CMath::get_column_sum(kernel_matrix.matrix, n,n); 00083 CMath::scale_vector(-1.0/n, bias_tmp, n); 00084 float64_t s = CMath::sum(bias_tmp, n)/n; 00085 CMath::add_scalar(-s, bias_tmp, n); 00086 00087 CMath::center_matrix(kernel_matrix.matrix, n, m); 00088 00089 float64_t* eigenvalues=CMath::compute_eigenvectors(kernel_matrix.matrix, n, n); 00090 00091 for (int32_t i=0; i<n; i++) 00092 { 00093 //normalize and trap divide by zero and negative eigenvalues 00094 for (int32_t j=0; j<n; j++) 00095 kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i])); 00096 } 00097 00098 SG_FREE(eigenvalues); 00099 00100 m_transformation_matrix.destroy_matrix(); 00101 m_bias_vector.destroy_vector(); 00102 00103 m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n); 00104 m_bias_vector = SGVector<float64_t>(n); 00105 CMath::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0); 00106 00107 CMath::dgemv(1.0, m_transformation_matrix.matrix, n, n, 00108 CblasTrans, bias_tmp, 0.0, m_bias_vector.vector); 00109 00110 float64_t* rowsum = CMath::get_row_sum(m_transformation_matrix.matrix, n, n); 00111 CMath::scale_vector(1.0/n, rowsum, n); 00112 00113 for (int32_t i=0; i<n; i++) 00114 { 00115 for (int32_t j=0; j<n; j++) 00116 m_transformation_matrix.matrix[j+n*i] -= rowsum[i]; 00117 } 00118 SG_FREE(rowsum); 00119 SG_FREE(bias_tmp); 00120 00121 m_initialized=true; 00122 SG_INFO("Done\n"); 00123 return true; 00124 } 00125 return false; 00126 } 00127 00128 00129 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features) 00130 { 00131 ASSERT(m_initialized); 00132 CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*)features; 00133 00134 int32_t num_vectors = simple_features->get_num_vectors(); 00135 int32_t i,j,k; 00136 int32_t n = m_transformation_matrix.num_cols; 00137 00138 m_kernel->init(features,m_init_features); 00139 00140 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors); 00141 00142 for (i=0; i<num_vectors; i++) 00143 { 00144 for (j=0; j<m_target_dim; j++) 00145 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j]; 00146 00147 for (j=0; j<n; j++) 00148 { 00149 float64_t kij = m_kernel->kernel(i,j); 00150 00151 for (k=0; k<m_target_dim; k++) 00152 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00153 } 00154 } 00155 00156 m_kernel->cleanup(); 00157 simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors)); 00158 return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix(); 00159 } 00160 00161 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector) 00162 { 00163 ASSERT(m_initialized); 00164 SGVector<float64_t> result = SGVector<float64_t>(m_target_dim); 00165 m_kernel->init(new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)), 00166 m_init_features); 00167 00168 int32_t j,k; 00169 int32_t n = m_transformation_matrix.num_cols; 00170 00171 for (j=0; j<m_target_dim; j++) 00172 result.vector[j] = m_bias_vector.vector[j]; 00173 00174 for (j=0; j<n; j++) 00175 { 00176 float64_t kj = m_kernel->kernel(0,j); 00177 00178 for (k=0; k<m_target_dim; k++) 00179 result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00180 } 00181 00182 m_kernel->cleanup(); 00183 return result; 00184 } 00185 00186 CSimpleFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features) 00187 { 00188 ASSERT(m_initialized); 00189 00190 int32_t num_vectors = features->get_num_vectors(); 00191 int32_t i,j,k; 00192 int32_t n = m_transformation_matrix.num_cols; 00193 00194 m_kernel->init(features,m_init_features); 00195 00196 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors); 00197 00198 for (i=0; i<num_vectors; i++) 00199 { 00200 for (j=0; j<m_target_dim; j++) 00201 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j]; 00202 00203 for (j=0; j<n; j++) 00204 { 00205 float64_t kij = m_kernel->kernel(i,j); 00206 00207 for (k=0; k<m_target_dim; k++) 00208 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j]; 00209 } 00210 } 00211 00212 m_kernel->cleanup(); 00213 00214 return new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors)); 00215 } 00216 00217 #endif