SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___ 00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___ 00014 00015 #include <shogun/lib/common.h> 00016 #include <shogun/lib/Trie.h> 00017 #include <shogun/kernel/StringKernel.h> 00018 #include <shogun/kernel/MultitaskKernelMklNormalizer.h> 00019 #include <shogun/features/StringFeatures.h> 00020 00021 namespace shogun 00022 { 00023 00025 enum EWDKernType 00026 { 00027 E_WD=0, 00028 E_EXTERNAL=1, 00029 00030 E_BLOCK_CONST=2, 00031 E_BLOCK_LINEAR=3, 00032 E_BLOCK_SQPOLY=4, 00033 E_BLOCK_CUBICPOLY=5, 00034 E_BLOCK_EXP=6, 00035 E_BLOCK_LOG=7, 00036 }; 00037 00038 00053 class CWeightedDegreeStringKernel: public CStringKernel<char> 00054 { 00055 public: 00056 00060 CWeightedDegreeStringKernel(); 00061 00062 00068 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD); 00069 00075 CWeightedDegreeStringKernel(float64_t* weights, int32_t degree); 00076 00083 CWeightedDegreeStringKernel( 00084 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree); 00085 00086 virtual ~CWeightedDegreeStringKernel(); 00087 00094 virtual bool init(CFeatures* l, CFeatures* r); 00095 00097 virtual void cleanup(); 00098 00106 EWDKernType get_type() const 00107 { 00108 return type; 00109 } 00110 00115 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; } 00116 00121 virtual const char* get_name() const { 00122 return "WeightedDegreeStringKernel"; 00123 } 00124 00132 inline virtual bool init_optimization( 00133 int32_t count, int32_t *IDX, float64_t* alphas) 00134 { 00135 return init_optimization(count, IDX, alphas, -1); 00136 } 00137 00148 virtual bool init_optimization( 00149 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num); 00150 00155 virtual bool delete_optimization(); 00156 00162 virtual float64_t compute_optimized(int32_t idx) 00163 { 00164 if (get_is_initialized()) 00165 return compute_by_tree(idx); 00166 00167 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00168 return 0; 00169 } 00170 00175 static void* compute_batch_helper(void* p); 00176 00187 virtual void compute_batch( 00188 int32_t num_vec, int32_t* vec_idx, float64_t* target, 00189 int32_t num_suppvec, int32_t* IDX, float64_t* alphas, 00190 float64_t factor=1.0); 00191 00195 inline virtual void clear_normal() 00196 { 00197 if (get_is_initialized()) 00198 { 00199 00200 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00201 SG_ERROR("not implemented"); 00202 00203 tries->delete_trees(max_mismatch==0); 00204 set_is_initialized(false); 00205 } 00206 } 00207 00213 inline virtual void add_to_normal(int32_t idx, float64_t weight) 00214 { 00215 00216 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00217 SG_ERROR("not implemented"); 00218 00219 if (max_mismatch==0) 00220 add_example_to_tree(idx, weight); 00221 else 00222 add_example_to_tree_mismatch(idx, weight); 00223 00224 set_is_initialized(true); 00225 } 00226 00231 inline virtual int32_t get_num_subkernels() 00232 { 00233 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00234 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas(); 00235 if (position_weights!=NULL) 00236 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ; 00237 if (length==0) 00238 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize); 00239 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ; 00240 } 00241 00247 inline void compute_by_subkernel( 00248 int32_t idx, float64_t * subkernel_contrib) 00249 { 00250 00251 if (get_is_initialized()) 00252 { 00253 00254 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00255 SG_ERROR("not implemented"); 00256 00257 compute_by_tree(idx, subkernel_contrib); 00258 return ; 00259 } 00260 00261 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00262 } 00263 00269 inline const float64_t* get_subkernel_weights(int32_t& num_weights) 00270 { 00271 00272 num_weights = get_num_subkernels(); 00273 00274 SG_FREE(weights_buffer); 00275 weights_buffer = SG_MALLOC(float64_t, num_weights); 00276 00277 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00278 for (int32_t i=0; i<num_weights; i++) 00279 weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i); 00280 else if (position_weights!=NULL) 00281 for (int32_t i=0; i<num_weights; i++) 00282 weights_buffer[i] = position_weights[i*mkl_stepsize]; 00283 else 00284 for (int32_t i=0; i<num_weights; i++) 00285 weights_buffer[i] = weights[i*mkl_stepsize]; 00286 00287 return weights_buffer; 00288 } 00289 00294 virtual void set_subkernel_weights(SGVector<float64_t> w) 00295 { 00296 float64_t* weights2=w.vector; 00297 int32_t num_weights2=w.vlen; 00298 int32_t num_weights = get_num_subkernels(); 00299 if (num_weights!=num_weights2) 00300 SG_ERROR( "number of weights do not match\n"); 00301 00302 00303 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00304 for (int32_t i=0; i<num_weights; i++) 00305 ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]); 00306 else if (position_weights!=NULL) 00307 { 00308 for (int32_t i=0; i<num_weights; i++) 00309 { 00310 for (int32_t j=0; j<mkl_stepsize; j++) 00311 { 00312 if (i*mkl_stepsize+j<seq_length) 00313 position_weights[i*mkl_stepsize+j] = weights2[i]; 00314 } 00315 } 00316 } 00317 else if (length==0) 00318 { 00319 for (int32_t i=0; i<num_weights; i++) 00320 { 00321 for (int32_t j=0; j<mkl_stepsize; j++) 00322 { 00323 if (i*mkl_stepsize+j<get_degree()) 00324 weights[i*mkl_stepsize+j] = weights2[i]; 00325 } 00326 } 00327 } 00328 else 00329 { 00330 for (int32_t i=0; i<num_weights; i++) 00331 { 00332 for (int32_t j=0; j<mkl_stepsize; j++) 00333 { 00334 if (i*mkl_stepsize+j<get_degree()*length) 00335 weights[i*mkl_stepsize+j] = weights2[i]; 00336 } 00337 } 00338 } 00339 } 00340 00345 virtual bool set_normalizer(CKernelNormalizer* normalizer_) { 00346 00347 if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) { 00348 unset_property(KP_LINADD); 00349 unset_property(KP_BATCHEVALUATION); 00350 } 00351 else 00352 { 00353 set_property(KP_LINADD); 00354 set_property(KP_BATCHEVALUATION); 00355 } 00356 00357 00358 return CStringKernel<char>::set_normalizer(normalizer_); 00359 00360 } 00361 00362 // other kernel tree operations 00368 float64_t *compute_abs_weights(int32_t & len); 00369 00376 void compute_by_tree(int32_t idx, float64_t *LevelContrib); 00377 00382 bool is_tree_initialized() { return tree_initialized; } 00383 00389 inline float64_t *get_degree_weights(int32_t& d, int32_t& len) 00390 { 00391 d=degree; 00392 len=length; 00393 return weights; 00394 } 00395 00401 inline float64_t *get_weights(int32_t& num_weights) 00402 { 00403 00404 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00405 SG_ERROR("not implemented"); 00406 00407 if (position_weights!=NULL) 00408 { 00409 num_weights = seq_length ; 00410 return position_weights ; 00411 } 00412 if (length==0) 00413 num_weights = degree ; 00414 else 00415 num_weights = degree*length ; 00416 return weights; 00417 } 00418 00424 inline float64_t *get_position_weights(int32_t& len) 00425 { 00426 len=seq_length; 00427 return position_weights; 00428 } 00429 00435 bool set_wd_weights_by_type(EWDKernType type); 00436 00441 inline void set_wd_weights(SGVector<float64_t> new_weights) 00442 { 00443 set_weights(SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0)); 00444 } 00445 00450 bool set_weights(SGMatrix<float64_t> new_weights); 00451 00458 bool set_position_weights(float64_t* pws, int32_t len); 00459 00464 bool init_block_weights(); 00465 00470 bool init_block_weights_from_wd(); 00471 00476 bool init_block_weights_from_wd_external(); 00477 00482 bool init_block_weights_const(); 00483 00488 bool init_block_weights_linear(); 00489 00494 bool init_block_weights_sqpoly(); 00495 00500 bool init_block_weights_cubicpoly(); 00501 00506 bool init_block_weights_exp(); 00507 00512 bool init_block_weights_log(); 00513 00518 bool delete_position_weights() 00519 { 00520 SG_FREE(position_weights); 00521 position_weights=NULL; 00522 return true; 00523 } 00524 00530 bool set_max_mismatch(int32_t max); 00531 00536 inline int32_t get_max_mismatch() const { return max_mismatch; } 00537 00543 inline bool set_degree(int32_t deg) { degree=deg; return true; } 00544 00549 inline int32_t get_degree() const { return degree; } 00550 00556 inline bool set_use_block_computation(bool block) 00557 { 00558 block_computation=block; 00559 return true; 00560 } 00561 00566 inline bool get_use_block_computation() { return block_computation; } 00567 00573 inline bool set_mkl_stepsize(int32_t step) 00574 { 00575 if (step<1) 00576 SG_ERROR("Stepsize must be a positive integer\n"); 00577 mkl_stepsize=step; 00578 return true; 00579 } 00580 00585 inline int32_t get_mkl_stepsize() { return mkl_stepsize; } 00586 00592 inline bool set_which_degree(int32_t which) 00593 { 00594 which_degree=which; 00595 return true; 00596 } 00597 00602 inline int32_t get_which_degree() { return which_degree; } 00603 00604 protected: 00606 void create_empty_tries(); 00607 00613 void add_example_to_tree(int32_t idx, float64_t weight); 00614 00621 void add_example_to_single_tree( 00622 int32_t idx, float64_t weight, int32_t tree_num); 00623 00629 void add_example_to_tree_mismatch(int32_t idx, float64_t weight); 00630 00637 void add_example_to_single_tree_mismatch( 00638 int32_t idx, float64_t weight, int32_t tree_num); 00639 00645 float64_t compute_by_tree(int32_t idx); 00646 00655 float64_t compute(int32_t idx_a, int32_t idx_b); 00656 00665 float64_t compute_with_mismatch( 00666 char* avec, int32_t alen, char* bvec, int32_t blen); 00667 00676 float64_t compute_without_mismatch( 00677 char* avec, int32_t alen, char* bvec, int32_t blen); 00678 00687 float64_t compute_without_mismatch_matrix( 00688 char* avec, int32_t alen, char* bvec, int32_t blen); 00689 00698 float64_t compute_using_block(char* avec, int32_t alen, 00699 char* bvec, int32_t blen); 00700 00702 virtual void remove_lhs(); 00703 00704 private: 00707 void init(); 00708 00709 protected: 00713 float64_t* weights; 00715 int32_t weights_degree; 00717 int32_t weights_length; 00718 00719 00721 float64_t* position_weights; 00723 int32_t position_weights_len; 00725 float64_t* weights_buffer; 00727 int32_t mkl_stepsize; 00729 int32_t degree; 00731 int32_t length; 00732 00734 int32_t max_mismatch; 00736 int32_t seq_length; 00737 00739 bool initialized; 00740 00742 bool block_computation; 00743 00745 float64_t* block_weights; 00747 EWDKernType type; 00749 int32_t which_degree; 00750 00752 CTrie<DNATrie>* tries; 00753 00755 bool tree_initialized; 00756 00758 CAlphabet* alphabet; 00759 }; 00760 00761 } 00762 00763 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */