SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___ 00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___ 00013 00014 #include <shogun/kernel/KernelNormalizer.h> 00015 #include <shogun/kernel/MultitaskKernelMklNormalizer.h> 00016 #include <shogun/kernel/Kernel.h> 00017 #include <algorithm> 00018 00019 00020 00021 namespace shogun 00022 { 00026 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer 00027 { 00028 00029 public: 00031 CMultitaskKernelPlifNormalizer() : CMultitaskKernelMklNormalizer() 00032 { 00033 num_tasks = 0; 00034 num_tasksqr = 0; 00035 num_betas = 0; 00036 } 00037 00040 CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector) 00041 : CMultitaskKernelMklNormalizer() 00042 { 00043 00044 num_betas = static_cast<int>(support_.size()); 00045 00046 support = support_; 00047 00048 // init support points values with constant function 00049 betas = std::vector<float64_t>(num_betas); 00050 for (int i=0; i!=num_betas; i++) 00051 { 00052 betas[i] = 1; 00053 } 00054 00055 num_tasks = get_num_unique_tasks(task_vector); 00056 num_tasksqr = num_tasks * num_tasks; 00057 00058 // set both sides equally 00059 set_task_vector(task_vector); 00060 00061 // init distance matrix 00062 distance_matrix = std::vector<float64_t>(num_tasksqr); 00063 00064 // init similarity matrix 00065 similarity_matrix = std::vector<float64_t>(num_tasksqr); 00066 00067 } 00068 00069 00075 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, 00076 int32_t idx_rhs) 00077 { 00078 00079 //lookup tasks 00080 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00081 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00082 00083 //lookup similarity 00084 float64_t task_similarity = get_task_similarity(task_idx_lhs, 00085 task_idx_rhs); 00086 00087 //take task similarity into account 00088 float64_t similarity = (value/scale) * task_similarity; 00089 00090 00091 return similarity; 00092 00093 } 00094 00100 int32_t get_num_unique_tasks(std::vector<int32_t> vec) { 00101 00102 //sort 00103 std::sort(vec.begin(), vec.end()); 00104 00105 //reorder tasks with unique prefix 00106 std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end()); 00107 00108 //count unique tasks 00109 int32_t num_vec = std::distance(vec.begin(), endLocation); 00110 00111 return num_vec; 00112 00113 } 00114 00115 00117 virtual ~CMultitaskKernelPlifNormalizer() 00118 { 00119 } 00120 00121 00123 void update_cache() 00124 { 00125 00126 00127 for (int32_t i=0; i!=num_tasks; i++) 00128 { 00129 for (int32_t j=0; j!=num_tasks; j++) 00130 { 00131 00132 float64_t similarity = compute_task_similarity(i, j); 00133 set_task_similarity(i,j,similarity); 00134 00135 } 00136 00137 } 00138 } 00139 00140 00142 float64_t compute_task_similarity(int32_t task_a, int32_t task_b) 00143 { 00144 00145 float64_t distance = get_task_distance(task_a, task_b); 00146 float64_t similarity = -1; 00147 00148 int32_t upper_bound_idx = -1; 00149 00150 00151 // determine interval 00152 for (int i=1; i!=num_betas; i++) 00153 { 00154 if (distance <= support[i]) 00155 { 00156 upper_bound_idx = i; 00157 break; 00158 } 00159 } 00160 00161 // perform interpolation (constant for beyond upper bound) 00162 if (upper_bound_idx == -1) 00163 { 00164 00165 similarity = betas[num_betas-1]; 00166 00167 } else { 00168 00169 int32_t lower_bound_idx = upper_bound_idx - 1; 00170 float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx]; 00171 00172 float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size; 00173 float64_t factor_upper = 1 - factor_lower; 00174 00175 similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx]; 00176 00177 } 00178 00179 return similarity; 00180 00181 } 00182 00183 00184 public: 00185 00187 virtual std::vector<int32_t> get_task_vector_lhs() const 00188 { 00189 return task_vector_lhs; 00190 } 00191 00193 virtual void set_task_vector_lhs(std::vector<int32_t> vec) 00194 { 00195 task_vector_lhs = vec; 00196 } 00197 00199 virtual std::vector<int32_t> get_task_vector_rhs() const 00200 { 00201 return task_vector_rhs; 00202 } 00203 00205 virtual void set_task_vector_rhs(std::vector<int32_t> vec) 00206 { 00207 task_vector_rhs = vec; 00208 } 00209 00211 virtual void set_task_vector(std::vector<int32_t> vec) 00212 { 00213 task_vector_lhs = vec; 00214 task_vector_rhs = vec; 00215 } 00216 00222 float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs) 00223 { 00224 00225 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00226 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00227 00228 return distance_matrix[task_lhs * num_tasks + task_rhs]; 00229 00230 } 00231 00237 void set_task_distance(int32_t task_lhs, int32_t task_rhs, 00238 float64_t distance) 00239 { 00240 00241 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00242 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00243 00244 distance_matrix[task_lhs * num_tasks + task_rhs] = distance; 00245 00246 } 00247 00253 float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs) 00254 { 00255 00256 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00257 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00258 00259 return similarity_matrix[task_lhs * num_tasks + task_rhs]; 00260 00261 } 00262 00268 void set_task_similarity(int32_t task_lhs, int32_t task_rhs, 00269 float64_t similarity) 00270 { 00271 00272 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00273 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00274 00275 similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity; 00276 00277 } 00278 00282 float64_t get_beta(int32_t idx) 00283 { 00284 00285 return betas[idx]; 00286 00287 } 00288 00293 void set_beta(int32_t idx, float64_t weight) 00294 { 00295 00296 betas[idx] = weight; 00297 00298 update_cache(); 00299 00300 } 00301 00305 int32_t get_num_betas() 00306 { 00307 00308 return num_betas; 00309 00310 } 00311 00312 00314 inline virtual const char* get_name() const 00315 { 00316 return "MultitaskKernelPlifNormalizer"; 00317 } 00318 00322 CMultitaskKernelPlifNormalizer* KernelNormalizerToMultitaskKernelPlifNormalizer(CKernelNormalizer* n) 00323 { 00324 return dynamic_cast<shogun::CMultitaskKernelPlifNormalizer*>(n); 00325 } 00326 00327 protected: 00330 virtual void register_params() 00331 { 00332 m_parameters->add(&num_tasks, "num_tasks", "the number of tasks"); 00333 m_parameters->add(&num_betas, "num_betas", "the number of weights"); 00334 m_parameters->add_vector((SGString<float64_t>**)&distance_matrix, &num_tasksqr, "distance_matrix", "distance between tasks"); 00335 m_parameters->add_vector((SGString<float64_t>**)&similarity_matrix, &num_tasksqr, "similarity_matrix", "similarity between tasks"); 00336 m_parameters->add_vector((SGString<float64_t>**)&betas, &num_betas, "num_betas", "weights"); 00337 m_parameters->add_vector((SGString<float64_t>**)&support, &num_betas, "support", "support points"); 00338 } 00339 00341 int32_t num_tasks; 00343 int32_t num_tasksqr; 00344 00346 std::vector<int32_t> task_vector_lhs; 00347 00349 std::vector<int32_t> task_vector_rhs; 00350 00352 std::vector<float64_t> distance_matrix; 00353 00355 std::vector<float64_t> similarity_matrix; 00356 00358 int32_t num_betas; 00359 00361 std::vector<float64_t> betas; 00362 00364 std::vector<float64_t> support; 00365 00366 }; 00367 } 00368 #endif