SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Alesis Novik 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 #include <shogun/lib/config.h> 00011 00012 #ifdef HAVE_LAPACK 00013 00014 #include <shogun/distributions/Gaussian.h> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/base/Parameter.h> 00017 00018 using namespace shogun; 00019 00020 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL) 00021 { 00022 register_params(); 00023 } 00024 00025 CGaussian::CGaussian(SGVector<float64_t> mean, SGMatrix<float64_t> cov, 00026 ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type) 00027 { 00028 ASSERT(mean.vlen==cov.num_rows); 00029 ASSERT(cov.num_rows==cov.num_cols); 00030 00031 m_mean=mean; 00032 00033 if (cov.num_rows==1) 00034 m_cov_type=SPHERICAL; 00035 00036 decompose_cov(cov); 00037 init(); 00038 register_params(); 00039 00040 if (cov.do_free) 00041 cov.free_matrix(); 00042 } 00043 00044 void CGaussian::init() 00045 { 00046 m_constant=CMath::log(2*M_PI)*m_mean.vlen; 00047 switch (m_cov_type) 00048 { 00049 case FULL: 00050 case DIAG: 00051 for (int32_t i=0; i<m_d.vlen; i++) 00052 m_constant+=CMath::log(m_d.vector[i]); 00053 break; 00054 case SPHERICAL: 00055 m_constant+=m_mean.vlen*CMath::log(m_d.vector[0]); 00056 break; 00057 } 00058 } 00059 00060 CGaussian::~CGaussian() 00061 { 00062 m_d.destroy_vector(); 00063 m_u.destroy_matrix(); 00064 m_mean.free_vector(); 00065 } 00066 00067 bool CGaussian::train(CFeatures* data) 00068 { 00069 // init features with data if necessary and assure type is correct 00070 if (data) 00071 { 00072 if (!data->has_property(FP_DOT)) 00073 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00074 set_features(data); 00075 } 00076 CDotFeatures* dotdata=(CDotFeatures *) data; 00077 00078 m_mean.destroy_vector(); 00079 00080 m_mean=dotdata->get_mean(); 00081 SGMatrix<float64_t> cov=dotdata->get_cov(); 00082 00083 decompose_cov(cov); 00084 cov.destroy_matrix(); 00085 00086 init(); 00087 00088 return true; 00089 } 00090 00091 int32_t CGaussian::get_num_model_parameters() 00092 { 00093 switch (m_cov_type) 00094 { 00095 case FULL: 00096 return m_u.num_rows*m_u.num_cols+m_d.vlen+m_mean.vlen; 00097 case DIAG: 00098 return m_d.vlen+m_mean.vlen; 00099 case SPHERICAL: 00100 return 1+m_mean.vlen; 00101 } 00102 return 0; 00103 } 00104 00105 float64_t CGaussian::get_log_model_parameter(int32_t num_param) 00106 { 00107 SG_NOTIMPLEMENTED; 00108 return 0; 00109 } 00110 00111 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example) 00112 { 00113 SG_NOTIMPLEMENTED; 00114 return 0; 00115 } 00116 00117 float64_t CGaussian::get_log_likelihood_example(int32_t num_example) 00118 { 00119 ASSERT(features->has_property(FP_DOT)); 00120 SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example); 00121 float64_t answer=compute_log_PDF(v); 00122 v.free_vector(); 00123 return answer; 00124 } 00125 00126 float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point) 00127 { 00128 ASSERT(m_mean.vector && m_d.vector); 00129 ASSERT(point.vlen == m_mean.vlen); 00130 float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen); 00131 memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen); 00132 00133 for (int32_t i = 0; i < m_mean.vlen; i++) 00134 difference[i] -= m_mean.vector[i]; 00135 00136 float64_t answer=m_constant; 00137 00138 if (m_cov_type==FULL) 00139 { 00140 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen); 00141 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen, 00142 1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1); 00143 00144 for (int32_t i=0; i<m_d.vlen; i++) 00145 answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i]; 00146 00147 SG_FREE(temp_holder); 00148 } 00149 else if (m_cov_type==DIAG) 00150 { 00151 for (int32_t i=0; i<m_mean.vlen; i++) 00152 answer+=difference[i]*difference[i]/m_d.vector[i]; 00153 } 00154 else 00155 { 00156 for (int32_t i=0; i<m_mean.vlen; i++) 00157 answer+=difference[i]*difference[i]/m_d.vector[0]; 00158 } 00159 00160 SG_FREE(difference); 00161 00162 return -0.5*answer; 00163 } 00164 00165 SGMatrix<float64_t> CGaussian::get_cov() 00166 { 00167 float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen); 00168 memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen); 00169 00170 if (m_cov_type==FULL) 00171 { 00172 if (!m_u.matrix) 00173 SG_ERROR("Unitary matrix not set\n"); 00174 00175 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00176 float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00177 memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen); 00178 for(int32_t i=0; i<m_d.vlen; i++) 00179 diag_holder[i*m_d.vlen+i]=m_d.vector[i]; 00180 00181 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 00182 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen, 00183 diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen); 00184 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 00185 m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen, 00186 m_u.matrix, m_d.vlen, 0, cov, m_d.vlen); 00187 00188 SG_FREE(diag_holder); 00189 SG_FREE(temp_holder); 00190 } 00191 else if (m_cov_type==DIAG) 00192 { 00193 for (int32_t i=0; i<m_d.vlen; i++) 00194 cov[i*m_d.vlen+i]=m_d.vector[i]; 00195 } 00196 else 00197 { 00198 for (int32_t i=0; i<m_mean.vlen; i++) 00199 cov[i*m_mean.vlen+i]=m_d.vector[0]; 00200 } 00201 return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed 00202 } 00203 00204 void CGaussian::register_params() 00205 { 00206 m_parameters->add(&m_u, "m_u", "Unitary matrix."); 00207 m_parameters->add(&m_d, "m_d", "Diagonal."); 00208 m_parameters->add(&m_mean, "m_mean", "Mean."); 00209 m_parameters->add(&m_constant, "m_constant", "Constant part."); 00210 m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type."); 00211 } 00212 00213 void CGaussian::decompose_cov(SGMatrix<float64_t> cov) 00214 { 00215 m_d.destroy_vector(); 00216 switch (m_cov_type) 00217 { 00218 case FULL: 00219 m_u.destroy_matrix(); 00220 m_u.matrix=SG_MALLOC(float64_t, cov.num_rows*cov.num_rows); 00221 memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows); 00222 00223 m_d.vector=CMath::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows); 00224 m_d.vlen=cov.num_rows; 00225 m_u.num_rows=cov.num_rows; 00226 m_u.num_cols=cov.num_rows; 00227 break; 00228 case DIAG: 00229 m_d.vector=SG_MALLOC(float64_t, cov.num_rows); 00230 00231 for (int32_t i=0; i<cov.num_rows; i++) 00232 m_d.vector[i]=cov.matrix[i*cov.num_rows+i]; 00233 00234 m_d.vlen=cov.num_rows; 00235 break; 00236 case SPHERICAL: 00237 m_d.vector=SG_MALLOC(float64_t, 1); 00238 00239 m_d.vector[0]=cov.matrix[0]; 00240 m_d.vlen=1; 00241 break; 00242 } 00243 } 00244 00245 SGVector<float64_t> CGaussian::sample() 00246 { 00247 float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen); 00248 memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t)); 00249 00250 switch (m_cov_type) 00251 { 00252 case FULL: 00253 case DIAG: 00254 for (int32_t i=0; i<m_mean.vlen; i++) 00255 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]); 00256 00257 break; 00258 case SPHERICAL: 00259 for (int32_t i=0; i<m_mean.vlen; i++) 00260 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]); 00261 00262 break; 00263 } 00264 00265 float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen); 00266 00267 for (int32_t i=0; i<m_mean.vlen; i++) 00268 random_vec[i]=CMath::randn_double(); 00269 00270 if (m_cov_type==FULL) 00271 { 00272 float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen); 00273 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 00274 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen, 00275 r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen); 00276 SG_FREE(r_matrix); 00277 r_matrix=temp_matrix; 00278 } 00279 00280 float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen); 00281 00282 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen, 00283 1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1); 00284 00285 for (int32_t i=0; i<m_mean.vlen; i++) 00286 samp[i]+=m_mean.vector[i]; 00287 00288 SG_FREE(random_vec); 00289 SG_FREE(r_matrix); 00290 00291 return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed 00292 } 00293 00294 #endif