SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/lib/Signal.h> 00015 #include <shogun/lib/Trie.h> 00016 #include <shogun/base/Parallel.h> 00017 00018 #include <shogun/kernel/WeightedDegreePositionStringKernel.h> 00019 #include <shogun/kernel/SqrtDiagKernelNormalizer.h> 00020 #include <shogun/features/Features.h> 00021 #include <shogun/features/StringFeatures.h> 00022 00023 #include <shogun/classifier/svm/SVM.h> 00024 00025 #ifdef HAVE_PTHREAD 00026 #include <pthread.h> 00027 #endif 00028 00029 using namespace shogun; 00030 00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X)) 00032 00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00034 template <class Trie> struct S_THREAD_PARAM 00035 { 00036 int32_t* vec; 00037 float64_t* result; 00038 float64_t* weights; 00039 CWeightedDegreePositionStringKernel* kernel; 00040 CTrie<Trie>* tries; 00041 float64_t factor; 00042 int32_t j; 00043 int32_t start; 00044 int32_t end; 00045 int32_t length; 00046 int32_t max_shift; 00047 int32_t* shift; 00048 int32_t* vec_idx; 00049 }; 00050 #endif // DOXYGEN_SHOULD_SKIP_THIS 00051 00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00053 void) 00054 : CStringKernel<char>() 00055 { 00056 init(); 00057 } 00058 00059 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00060 int32_t size, int32_t d, int32_t mm, int32_t mkls) 00061 : CStringKernel<char>(size) 00062 { 00063 init(); 00064 00065 mkl_stepsize=mkls; 00066 degree=d; 00067 max_mismatch=mm; 00068 00069 tries=CTrie<DNATrie>(d); 00070 poim_tries=CTrie<POIMTrie>(d); 00071 00072 set_wd_weights(); 00073 ASSERT(weights); 00074 } 00075 00076 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00077 int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl, 00078 int32_t mkls) 00079 : CStringKernel<char>(size) 00080 { 00081 init(); 00082 00083 mkl_stepsize=mkls; 00084 degree=d; 00085 max_mismatch=mm; 00086 00087 tries=CTrie<DNATrie>(d); 00088 poim_tries=CTrie<POIMTrie>(d); 00089 00090 weights=SG_MALLOC(float64_t, d*(1+max_mismatch)); 00091 weights_degree=degree; 00092 weights_length=(1+max_mismatch); 00093 00094 for (int32_t i=0; i<d*(1+max_mismatch); i++) 00095 weights[i]=w[i]; 00096 00097 set_shifts(SGVector<int32_t>(s, sl)); 00098 } 00099 00100 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel( 00101 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d) 00102 : CStringKernel<char>() 00103 { 00104 init(); 00105 00106 mkl_stepsize=1; 00107 degree=d; 00108 00109 tries=CTrie<DNATrie>(d); 00110 poim_tries=CTrie<POIMTrie>(d); 00111 00112 set_wd_weights(); 00113 ASSERT(weights); 00114 00115 init(l, r); 00116 } 00117 00118 00119 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel() 00120 { 00121 cleanup(); 00122 cleanup_POIM2(); 00123 00124 SG_FREE(shift); 00125 shift=NULL; 00126 00127 SG_FREE(weights); 00128 weights=NULL; 00129 weights_degree=0; 00130 weights_length=0; 00131 00132 SG_FREE(block_weights); 00133 block_weights=NULL; 00134 00135 SG_FREE(position_weights); 00136 position_weights=NULL; 00137 00138 SG_FREE(position_weights_lhs); 00139 position_weights_lhs=NULL; 00140 00141 SG_FREE(position_weights_rhs); 00142 position_weights_rhs=NULL; 00143 00144 SG_FREE(weights_buffer); 00145 weights_buffer=NULL; 00146 } 00147 00148 void CWeightedDegreePositionStringKernel::remove_lhs() 00149 { 00150 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00151 delete_optimization(); 00152 00153 tries.destroy(); 00154 poim_tries.destroy(); 00155 00156 CKernel::remove_lhs(); 00157 } 00158 00159 void CWeightedDegreePositionStringKernel::create_empty_tries() 00160 { 00161 ASSERT(lhs); 00162 seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length(); 00163 00164 if (opt_type==SLOWBUTMEMEFFICIENT) 00165 { 00166 tries.create(seq_length, true); 00167 poim_tries.create(seq_length, true); 00168 } 00169 else if (opt_type==FASTBUTMEMHUNGRY) 00170 { 00171 tries.create(seq_length, false); // still buggy 00172 poim_tries.create(seq_length, false); // still buggy 00173 } 00174 else 00175 SG_ERROR( "unknown optimization type\n"); 00176 } 00177 00178 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r) 00179 { 00180 int32_t lhs_changed = (lhs!=l) ; 00181 int32_t rhs_changed = (rhs!=r) ; 00182 00183 CStringKernel<char>::init(l,r); 00184 00185 SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ; 00186 SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ; 00187 00188 CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l; 00189 CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r; 00190 00191 /* set shift */ 00192 if (shift_len==0) { 00193 shift_len=sf_l->get_vector_length(0); 00194 int32_t *shifts=SG_MALLOC(int32_t, shift_len); 00195 for (int32_t i=0; i<shift_len; i++) { 00196 shifts[i]=1; 00197 } 00198 set_shifts(SGVector<int32_t>(shifts, shift_len)); 00199 SG_FREE(shifts); 00200 } 00201 00202 00203 int32_t len=sf_l->get_max_vector_length(); 00204 if (lhs_changed && !sf_l->have_same_length(len)) 00205 SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n"); 00206 00207 if (rhs_changed && !sf_r->have_same_length(len)) 00208 SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n"); 00209 00210 SG_UNREF(alphabet); 00211 alphabet= sf_l->get_alphabet(); 00212 CAlphabet* ralphabet=sf_r->get_alphabet(); 00213 00214 if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA))) 00215 properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION); 00216 00217 ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet()); 00218 SG_UNREF(ralphabet); 00219 00220 //whenever init is called also init tries and block weights 00221 create_empty_tries(); 00222 init_block_weights(); 00223 00224 return init_normalizer(); 00225 } 00226 00227 void CWeightedDegreePositionStringKernel::cleanup() 00228 { 00229 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00230 delete_optimization(); 00231 00232 SG_FREE(block_weights); 00233 block_weights=NULL; 00234 00235 tries.destroy(); 00236 poim_tries.destroy(); 00237 00238 seq_length = 0; 00239 tree_initialized = false; 00240 00241 SG_UNREF(alphabet); 00242 alphabet=NULL; 00243 00244 CKernel::cleanup(); 00245 } 00246 00247 bool CWeightedDegreePositionStringKernel::init_optimization( 00248 int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num, 00249 int32_t upto_tree) 00250 { 00251 ASSERT(position_weights_lhs==NULL); 00252 ASSERT(position_weights_rhs==NULL); 00253 00254 if (upto_tree<0) 00255 upto_tree=tree_num; 00256 00257 if (max_mismatch!=0) 00258 { 00259 SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n"); 00260 return false ; 00261 } 00262 00263 if (tree_num<0) 00264 SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n"); 00265 00266 delete_optimization(); 00267 00268 if (tree_num<0) 00269 SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ; 00270 00271 for (int32_t i=0; i<p_count; i++) 00272 { 00273 if (tree_num<0) 00274 { 00275 if ( (i % (p_count/10+1)) == 0) 00276 SG_PROGRESS(i,0,p_count); 00277 add_example_to_tree(IDX[i], alphas[i]); 00278 } 00279 else 00280 { 00281 for (int32_t t=tree_num; t<=upto_tree; t++) 00282 add_example_to_single_tree(IDX[i], alphas[i], t); 00283 } 00284 } 00285 00286 if (tree_num<0) 00287 SG_DONE(); 00288 00289 set_is_initialized(true) ; 00290 return true ; 00291 } 00292 00293 bool CWeightedDegreePositionStringKernel::delete_optimization() 00294 { 00295 if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 00296 { 00297 tries.set_use_compact_terminal_nodes(false) ; 00298 SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ; 00299 } 00300 00301 if (get_is_initialized()) 00302 { 00303 if (opt_type==SLOWBUTMEMEFFICIENT) 00304 tries.delete_trees(true); 00305 else if (opt_type==FASTBUTMEMHUNGRY) 00306 tries.delete_trees(false); // still buggy 00307 else { 00308 SG_ERROR( "unknown optimization type\n"); 00309 } 00310 set_is_initialized(false); 00311 00312 return true; 00313 } 00314 00315 return false; 00316 } 00317 00318 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch( 00319 char* avec, int32_t alen, char* bvec, int32_t blen) 00320 { 00321 float64_t* max_shift_vec= SG_MALLOC(float64_t, max_shift); 00322 float64_t sum0=0 ; 00323 for (int32_t i=0; i<max_shift; i++) 00324 max_shift_vec[i]=0 ; 00325 00326 // no shift 00327 for (int32_t i=0; i<alen; i++) 00328 { 00329 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00330 continue ; 00331 00332 int32_t mismatches=0; 00333 float64_t sumi = 0.0 ; 00334 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00335 { 00336 if (avec[i+j]!=bvec[i+j]) 00337 { 00338 mismatches++ ; 00339 if (mismatches>max_mismatch) 00340 break ; 00341 } ; 00342 sumi += weights[j+degree*mismatches]; 00343 } 00344 if (position_weights!=NULL) 00345 sum0 += position_weights[i]*sumi ; 00346 else 00347 sum0 += sumi ; 00348 } ; 00349 00350 for (int32_t i=0; i<alen; i++) 00351 { 00352 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00353 { 00354 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00355 continue ; 00356 00357 float64_t sumi1 = 0.0 ; 00358 // shift in sequence a 00359 int32_t mismatches=0; 00360 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00361 { 00362 if (avec[i+j+k]!=bvec[i+j]) 00363 { 00364 mismatches++ ; 00365 if (mismatches>max_mismatch) 00366 break ; 00367 } ; 00368 sumi1 += weights[j+degree*mismatches]; 00369 } 00370 float64_t sumi2 = 0.0 ; 00371 // shift in sequence b 00372 mismatches=0; 00373 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00374 { 00375 if (avec[i+j]!=bvec[i+j+k]) 00376 { 00377 mismatches++ ; 00378 if (mismatches>max_mismatch) 00379 break ; 00380 } ; 00381 sumi2 += weights[j+degree*mismatches]; 00382 } 00383 if (position_weights!=NULL) 00384 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00385 else 00386 max_shift_vec[k-1] += sumi1 + sumi2 ; 00387 } ; 00388 } 00389 00390 float64_t result = sum0 ; 00391 for (int32_t i=0; i<max_shift; i++) 00392 result += max_shift_vec[i]/(2*(i+1)) ; 00393 00394 SG_FREE(max_shift_vec); 00395 return result ; 00396 } 00397 00398 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch( 00399 char* avec, int32_t alen, char* bvec, int32_t blen) 00400 { 00401 float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift); 00402 float64_t sum0=0 ; 00403 for (int32_t i=0; i<max_shift; i++) 00404 max_shift_vec[i]=0 ; 00405 00406 // no shift 00407 for (int32_t i=0; i<alen; i++) 00408 { 00409 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00410 continue ; 00411 00412 float64_t sumi = 0.0 ; 00413 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00414 { 00415 if (avec[i+j]!=bvec[i+j]) 00416 break ; 00417 sumi += weights[j]; 00418 } 00419 if (position_weights!=NULL) 00420 sum0 += position_weights[i]*sumi ; 00421 else 00422 sum0 += sumi ; 00423 } ; 00424 00425 for (int32_t i=0; i<alen; i++) 00426 { 00427 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00428 { 00429 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00430 continue ; 00431 00432 float64_t sumi1 = 0.0 ; 00433 // shift in sequence a 00434 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00435 { 00436 if (avec[i+j+k]!=bvec[i+j]) 00437 break ; 00438 sumi1 += weights[j]; 00439 } 00440 float64_t sumi2 = 0.0 ; 00441 // shift in sequence b 00442 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00443 { 00444 if (avec[i+j]!=bvec[i+j+k]) 00445 break ; 00446 sumi2 += weights[j]; 00447 } 00448 if (position_weights!=NULL) 00449 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00450 else 00451 max_shift_vec[k-1] += sumi1 + sumi2 ; 00452 } ; 00453 } 00454 00455 float64_t result = sum0 ; 00456 for (int32_t i=0; i<max_shift; i++) 00457 result += max_shift_vec[i]/(2*(i+1)) ; 00458 00459 SG_FREE(max_shift_vec); 00460 00461 return result ; 00462 } 00463 00464 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix( 00465 char* avec, int32_t alen, char* bvec, int32_t blen) 00466 { 00467 float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift); 00468 float64_t sum0=0 ; 00469 for (int32_t i=0; i<max_shift; i++) 00470 max_shift_vec[i]=0 ; 00471 00472 // no shift 00473 for (int32_t i=0; i<alen; i++) 00474 { 00475 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00476 continue ; 00477 float64_t sumi = 0.0 ; 00478 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00479 { 00480 if (avec[i+j]!=bvec[i+j]) 00481 break ; 00482 sumi += weights[i*degree+j]; 00483 } 00484 if (position_weights!=NULL) 00485 sum0 += position_weights[i]*sumi ; 00486 else 00487 sum0 += sumi ; 00488 } ; 00489 00490 for (int32_t i=0; i<alen; i++) 00491 { 00492 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00493 { 00494 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00495 continue ; 00496 00497 float64_t sumi1 = 0.0 ; 00498 // shift in sequence a 00499 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00500 { 00501 if (avec[i+j+k]!=bvec[i+j]) 00502 break ; 00503 sumi1 += weights[i*degree+j]; 00504 } 00505 float64_t sumi2 = 0.0 ; 00506 // shift in sequence b 00507 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00508 { 00509 if (avec[i+j]!=bvec[i+j+k]) 00510 break ; 00511 sumi2 += weights[i*degree+j]; 00512 } 00513 if (position_weights!=NULL) 00514 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00515 else 00516 max_shift_vec[k-1] += sumi1 + sumi2 ; 00517 } ; 00518 } 00519 00520 float64_t result = sum0 ; 00521 for (int32_t i=0; i<max_shift; i++) 00522 result += max_shift_vec[i]/(2*(i+1)) ; 00523 00524 SG_FREE(max_shift_vec); 00525 return result ; 00526 } 00527 00528 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights( 00529 char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec, 00530 float64_t* pos_weights_rhs, int32_t blen) 00531 { 00532 float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift); 00533 float64_t sum0=0 ; 00534 for (int32_t i=0; i<max_shift; i++) 00535 max_shift_vec[i]=0 ; 00536 00537 // no shift 00538 for (int32_t i=0; i<alen; i++) 00539 { 00540 if ((position_weights!=NULL) && (position_weights[i]==0.0)) 00541 continue ; 00542 00543 float64_t sumi = 0.0 ; 00544 float64_t posweight_lhs = 0.0 ; 00545 float64_t posweight_rhs = 0.0 ; 00546 for (int32_t j=0; (j<degree) && (i+j<alen); j++) 00547 { 00548 posweight_lhs += pos_weights_lhs[i+j] ; 00549 posweight_rhs += pos_weights_rhs[i+j] ; 00550 00551 if (avec[i+j]!=bvec[i+j]) 00552 break ; 00553 sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00554 } 00555 if (position_weights!=NULL) 00556 sum0 += position_weights[i]*sumi ; 00557 else 00558 sum0 += sumi ; 00559 } ; 00560 00561 for (int32_t i=0; i<alen; i++) 00562 { 00563 for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++) 00564 { 00565 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0)) 00566 continue ; 00567 00568 // shift in sequence a 00569 float64_t sumi1 = 0.0 ; 00570 float64_t posweight_lhs = 0.0 ; 00571 float64_t posweight_rhs = 0.0 ; 00572 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00573 { 00574 posweight_lhs += pos_weights_lhs[i+j+k] ; 00575 posweight_rhs += pos_weights_rhs[i+j] ; 00576 if (avec[i+j+k]!=bvec[i+j]) 00577 break ; 00578 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00579 } 00580 // shift in sequence b 00581 float64_t sumi2 = 0.0 ; 00582 posweight_lhs = 0.0 ; 00583 posweight_rhs = 0.0 ; 00584 for (int32_t j=0; (j<degree) && (i+j+k<alen); j++) 00585 { 00586 posweight_lhs += pos_weights_lhs[i+j] ; 00587 posweight_rhs += pos_weights_rhs[i+j+k] ; 00588 if (avec[i+j]!=bvec[i+j+k]) 00589 break ; 00590 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ; 00591 } 00592 if (position_weights!=NULL) 00593 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ; 00594 else 00595 max_shift_vec[k-1] += sumi1 + sumi2 ; 00596 } ; 00597 } 00598 00599 float64_t result = sum0 ; 00600 for (int32_t i=0; i<max_shift; i++) 00601 result += max_shift_vec[i]/(2*(i+1)) ; 00602 00603 SG_FREE(max_shift_vec); 00604 return result ; 00605 } 00606 00607 00608 float64_t CWeightedDegreePositionStringKernel::compute( 00609 int32_t idx_a, int32_t idx_b) 00610 { 00611 int32_t alen, blen; 00612 bool free_avec, free_bvec; 00613 00614 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00615 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00616 // can only deal with strings of same length 00617 ASSERT(alen==blen); 00618 ASSERT(shift_len==alen); 00619 00620 float64_t result = 0 ; 00621 if (position_weights_lhs!=NULL || position_weights_rhs!=NULL) 00622 { 00623 ASSERT(max_mismatch==0); 00624 float64_t* position_weights_rhs_ = position_weights_rhs ; 00625 if (lhs==rhs) 00626 position_weights_rhs_ = position_weights_lhs ; 00627 00628 result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ; 00629 } 00630 else if (max_mismatch > 0) 00631 result = compute_with_mismatch(avec, alen, bvec, blen) ; 00632 else if (length==0) 00633 result = compute_without_mismatch(avec, alen, bvec, blen) ; 00634 else 00635 result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ; 00636 00637 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00638 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00639 00640 return result ; 00641 } 00642 00643 00644 void CWeightedDegreePositionStringKernel::add_example_to_tree( 00645 int32_t idx, float64_t alpha) 00646 { 00647 ASSERT(position_weights_lhs==NULL); 00648 ASSERT(position_weights_rhs==NULL); 00649 ASSERT(alphabet); 00650 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00651 00652 int32_t len=0; 00653 bool free_vec; 00654 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00655 ASSERT(max_mismatch==0); 00656 int32_t *vec = SG_MALLOC(int32_t, len); 00657 00658 for (int32_t i=0; i<len; i++) 00659 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00660 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00661 00662 if (opt_type==FASTBUTMEMHUNGRY) 00663 { 00664 //TRIES(set_use_compact_terminal_nodes(false)) ; 00665 ASSERT(!TRIES(get_use_compact_terminal_nodes())); 00666 } 00667 00668 for (int32_t i=0; i<len; i++) 00669 { 00670 int32_t max_s=-1; 00671 00672 if (opt_type==SLOWBUTMEMEFFICIENT) 00673 max_s=0; 00674 else if (opt_type==FASTBUTMEMHUNGRY) 00675 max_s=shift[i]; 00676 else { 00677 SG_ERROR( "unknown optimization type\n"); 00678 } 00679 00680 for (int32_t s=max_s; s>=0; s--) 00681 { 00682 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00683 TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ; 00684 if ((s==0) || (i+s>=len)) 00685 continue; 00686 00687 TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ; 00688 } 00689 } 00690 00691 SG_FREE(vec); 00692 tree_initialized=true ; 00693 } 00694 00695 void CWeightedDegreePositionStringKernel::add_example_to_single_tree( 00696 int32_t idx, float64_t alpha, int32_t tree_num) 00697 { 00698 ASSERT(position_weights_lhs==NULL); 00699 ASSERT(position_weights_rhs==NULL); 00700 ASSERT(alphabet); 00701 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00702 00703 int32_t len=0; 00704 bool free_vec; 00705 char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec); 00706 ASSERT(max_mismatch==0); 00707 int32_t *vec=SG_MALLOC(int32_t, len); 00708 int32_t max_s=-1; 00709 00710 if (opt_type==SLOWBUTMEMEFFICIENT) 00711 max_s=0; 00712 else if (opt_type==FASTBUTMEMHUNGRY) 00713 { 00714 ASSERT(!tries.get_use_compact_terminal_nodes()); 00715 max_s=shift[tree_num]; 00716 } 00717 else { 00718 SG_ERROR( "unknown optimization type\n"); 00719 } 00720 for (int32_t i=CMath::max(0,tree_num-max_shift); 00721 i<CMath::min(len,tree_num+degree+max_shift); i++) 00722 { 00723 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00724 } 00725 ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec); 00726 00727 for (int32_t s=max_s; s>=0; s--) 00728 { 00729 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00730 tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ; 00731 } 00732 00733 if (opt_type==FASTBUTMEMHUNGRY) 00734 { 00735 for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++) 00736 { 00737 int32_t s=tree_num-i; 00738 if ((i+s<len) && (s>=1) && (s<=shift[i])) 00739 { 00740 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx); 00741 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 00742 } 00743 } 00744 } 00745 SG_FREE(vec); 00746 tree_initialized=true ; 00747 } 00748 00749 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx) 00750 { 00751 ASSERT(position_weights_lhs==NULL); 00752 ASSERT(position_weights_rhs==NULL); 00753 ASSERT(alphabet); 00754 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00755 00756 float64_t sum=0; 00757 int32_t len=0; 00758 bool free_vec; 00759 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00760 ASSERT(max_mismatch==0); 00761 int32_t *vec=SG_MALLOC(int32_t, len); 00762 00763 for (int32_t i=0; i<len; i++) 00764 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00765 00766 ((CStringFeatures<char>*) rhs)->free_feature_vector(char_vec, idx, free_vec); 00767 00768 for (int32_t i=0; i<len; i++) 00769 sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ; 00770 00771 if (opt_type==SLOWBUTMEMEFFICIENT) 00772 { 00773 for (int32_t i=0; i<len; i++) 00774 { 00775 for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++) 00776 { 00777 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ; 00778 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ; 00779 } 00780 } 00781 } 00782 00783 SG_FREE(vec); 00784 00785 return normalizer->normalize_rhs(sum, idx); 00786 } 00787 00788 void CWeightedDegreePositionStringKernel::compute_by_tree( 00789 int32_t idx, float64_t* LevelContrib) 00790 { 00791 ASSERT(position_weights_lhs==NULL); 00792 ASSERT(position_weights_rhs==NULL); 00793 ASSERT(alphabet); 00794 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 00795 00796 int32_t len=0; 00797 bool free_vec; 00798 char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec); 00799 ASSERT(max_mismatch==0); 00800 int32_t *vec=SG_MALLOC(int32_t, len); 00801 00802 for (int32_t i=0; i<len; i++) 00803 vec[i]=alphabet->remap_to_bin(char_vec[i]); 00804 00805 ((CStringFeatures<char>*) rhs)->free_feature_vector(char_vec, idx, free_vec); 00806 00807 for (int32_t i=0; i<len; i++) 00808 { 00809 tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib, 00810 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights, 00811 (length!=0)); 00812 } 00813 00814 if (opt_type==SLOWBUTMEMEFFICIENT) 00815 { 00816 for (int32_t i=0; i<len; i++) 00817 for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++) 00818 { 00819 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib, 00820 normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize, 00821 weights, (length!=0)) ; 00822 tries.compute_by_tree_helper(vec, len, i+k, i, i+k, 00823 LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx), 00824 mkl_stepsize, weights, (length!=0)) ; 00825 } 00826 } 00827 00828 SG_FREE(vec); 00829 } 00830 00831 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights( 00832 int32_t &len) 00833 { 00834 return tries.compute_abs_weights(len); 00835 } 00836 00837 void CWeightedDegreePositionStringKernel::set_shifts(SGVector<int32_t> shifts) 00838 { 00839 SG_FREE(shift); 00840 00841 shift_len = shifts.vlen; 00842 shift = SG_MALLOC(int32_t, shift_len); 00843 00844 if (shift) 00845 { 00846 max_shift = 0 ; 00847 00848 for (int32_t i=0; i<shift_len; i++) 00849 { 00850 shift[i] = shifts.vector[i] ; 00851 max_shift = CMath::max(shift[i], max_shift); 00852 } 00853 00854 ASSERT(max_shift>=0 && max_shift<=shift_len); 00855 } 00856 } 00857 00858 bool CWeightedDegreePositionStringKernel::set_wd_weights() 00859 { 00860 ASSERT(degree>0); 00861 00862 SG_FREE(weights); 00863 weights=SG_MALLOC(float64_t, degree); 00864 weights_degree=degree; 00865 weights_length=1; 00866 00867 if (weights) 00868 { 00869 int32_t i; 00870 float64_t sum=0; 00871 for (i=0; i<degree; i++) 00872 { 00873 weights[i]=degree-i; 00874 sum+=weights[i]; 00875 } 00876 for (i=0; i<degree; i++) 00877 weights[i]/=sum; 00878 00879 for (i=0; i<degree; i++) 00880 { 00881 for (int32_t j=1; j<=max_mismatch; j++) 00882 { 00883 if (j<i+1) 00884 { 00885 int32_t nk=CMath::nchoosek(i+1, j); 00886 weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j)); 00887 } 00888 else 00889 weights[i+j*degree]= 0; 00890 } 00891 } 00892 00893 return true; 00894 } 00895 else 00896 return false; 00897 } 00898 00899 bool CWeightedDegreePositionStringKernel::set_weights(SGMatrix<float64_t> new_weights) 00900 { 00901 float64_t* ws=new_weights.matrix; 00902 int32_t d=new_weights.num_rows; 00903 int32_t len=new_weights.num_cols; 00904 00905 if (d!=degree || len<0) 00906 SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree); 00907 00908 degree=d; 00909 length=len; 00910 00911 if (len <= 0) 00912 len=1; 00913 00914 weights_degree=degree; 00915 weights_length=len+max_mismatch; 00916 00917 SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length); 00918 int32_t num_weights=weights_degree*weights_length; 00919 SG_FREE(weights); 00920 weights=SG_MALLOC(float64_t, num_weights); 00921 00922 for (int32_t i=0; i<degree*len; i++) 00923 weights[i]=ws[i]; 00924 00925 return true; 00926 } 00927 00928 void CWeightedDegreePositionStringKernel::set_position_weights(SGVector<float64_t> pws) 00929 { 00930 if (seq_length==0) 00931 seq_length=pws.vlen; 00932 00933 if (seq_length!=pws.vlen) 00934 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, pws.vlen); 00935 00936 SG_FREE(position_weights); 00937 position_weights=SG_MALLOC(float64_t, pws.vlen); 00938 position_weights_len=pws.vlen; 00939 tries.set_position_weights(position_weights); 00940 00941 for (int32_t i=0; i<pws.vlen; i++) 00942 position_weights[i]=pws.vector[i]; 00943 } 00944 00945 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num) 00946 { 00947 if (position_weights_rhs==position_weights_lhs) 00948 position_weights_rhs=NULL; 00949 else 00950 delete_position_weights_rhs(); 00951 00952 if (len==0) 00953 { 00954 return delete_position_weights_lhs(); 00955 } 00956 00957 if (seq_length!=len) 00958 { 00959 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00960 return false; 00961 } 00962 00963 SG_FREE(position_weights_lhs); 00964 position_weights_lhs=SG_MALLOC(float64_t, len*num); 00965 position_weights_lhs_len=len*num; 00966 00967 for (int32_t i=0; i<len*num; i++) 00968 position_weights_lhs[i]=pws[i]; 00969 00970 return true; 00971 } 00972 00973 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs( 00974 float64_t* pws, int32_t len, int32_t num) 00975 { 00976 if (len==0) 00977 { 00978 if (position_weights_rhs==position_weights_lhs) 00979 { 00980 position_weights_rhs=NULL; 00981 return true; 00982 } 00983 return delete_position_weights_rhs(); 00984 } 00985 00986 if (seq_length!=len) 00987 { 00988 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len); 00989 return false; 00990 } 00991 00992 SG_FREE(position_weights_rhs); 00993 position_weights_rhs=SG_MALLOC(float64_t, len*num); 00994 position_weights_rhs_len=len*num; 00995 00996 for (int32_t i=0; i<len*num; i++) 00997 position_weights_rhs[i]=pws[i]; 00998 00999 return true; 01000 } 01001 01002 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd() 01003 { 01004 SG_FREE(block_weights); 01005 block_weights=SG_MALLOC(float64_t, CMath::max(seq_length,degree)); 01006 01007 if (block_weights) 01008 { 01009 int32_t k; 01010 float64_t d=degree; // use float to evade rounding errors below 01011 01012 for (k=0; k<degree; k++) 01013 block_weights[k]= 01014 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1)); 01015 for (k=degree; k<seq_length; k++) 01016 block_weights[k]=(-d+3*k+4)/3; 01017 } 01018 01019 return (block_weights!=NULL); 01020 } 01021 01022 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external() 01023 { 01024 ASSERT(weights); 01025 SG_FREE(block_weights); 01026 block_weights=SG_MALLOC(float64_t, CMath::max(seq_length,degree)); 01027 01028 if (block_weights) 01029 { 01030 int32_t i=0; 01031 block_weights[0]=weights[0]; 01032 for (i=1; i<CMath::max(seq_length,degree); i++) 01033 block_weights[i]=0; 01034 01035 for (i=1; i<CMath::max(seq_length,degree); i++) 01036 { 01037 block_weights[i]=block_weights[i-1]; 01038 01039 float64_t contrib=0; 01040 for (int32_t j=0; j<CMath::min(degree,i+1); j++) 01041 contrib+=weights[j]; 01042 01043 block_weights[i]+=contrib; 01044 } 01045 } 01046 01047 return (block_weights!=NULL); 01048 } 01049 01050 bool CWeightedDegreePositionStringKernel::init_block_weights_const() 01051 { 01052 SG_FREE(block_weights); 01053 block_weights=SG_MALLOC(float64_t, seq_length); 01054 01055 if (block_weights) 01056 { 01057 for (int32_t i=1; i<seq_length+1 ; i++) 01058 block_weights[i-1]=1.0/seq_length; 01059 } 01060 01061 return (block_weights!=NULL); 01062 } 01063 01064 bool CWeightedDegreePositionStringKernel::init_block_weights_linear() 01065 { 01066 SG_FREE(block_weights); 01067 block_weights=SG_MALLOC(float64_t, seq_length); 01068 01069 if (block_weights) 01070 { 01071 for (int32_t i=1; i<seq_length+1 ; i++) 01072 block_weights[i-1]=degree*i; 01073 } 01074 01075 return (block_weights!=NULL); 01076 } 01077 01078 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly() 01079 { 01080 SG_FREE(block_weights); 01081 block_weights=SG_MALLOC(float64_t, seq_length); 01082 01083 if (block_weights) 01084 { 01085 for (int32_t i=1; i<degree+1 ; i++) 01086 block_weights[i-1]=((float64_t) i)*i; 01087 01088 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01089 block_weights[i-1]=i; 01090 } 01091 01092 return (block_weights!=NULL); 01093 } 01094 01095 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly() 01096 { 01097 SG_FREE(block_weights); 01098 block_weights=SG_MALLOC(float64_t, seq_length); 01099 01100 if (block_weights) 01101 { 01102 for (int32_t i=1; i<degree+1 ; i++) 01103 block_weights[i-1]=((float64_t) i)*i*i; 01104 01105 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01106 block_weights[i-1]=i; 01107 } 01108 01109 return (block_weights!=NULL); 01110 } 01111 01112 bool CWeightedDegreePositionStringKernel::init_block_weights_exp() 01113 { 01114 SG_FREE(block_weights); 01115 block_weights=SG_MALLOC(float64_t, seq_length); 01116 01117 if (block_weights) 01118 { 01119 for (int32_t i=1; i<degree+1 ; i++) 01120 block_weights[i-1]=exp(((float64_t) i/10.0)); 01121 01122 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01123 block_weights[i-1]=i; 01124 } 01125 01126 return (block_weights!=NULL); 01127 } 01128 01129 bool CWeightedDegreePositionStringKernel::init_block_weights_log() 01130 { 01131 SG_FREE(block_weights); 01132 block_weights=SG_MALLOC(float64_t, seq_length); 01133 01134 if (block_weights) 01135 { 01136 for (int32_t i=1; i<degree+1 ; i++) 01137 block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2); 01138 01139 for (int32_t i=degree+1; i<seq_length+1 ; i++) 01140 block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2); 01141 } 01142 01143 return (block_weights!=NULL); 01144 } 01145 01146 bool CWeightedDegreePositionStringKernel::init_block_weights() 01147 { 01148 switch (type) 01149 { 01150 case E_WD: 01151 return init_block_weights_from_wd(); 01152 case E_EXTERNAL: 01153 return init_block_weights_from_wd_external(); 01154 case E_BLOCK_CONST: 01155 return init_block_weights_const(); 01156 case E_BLOCK_LINEAR: 01157 return init_block_weights_linear(); 01158 case E_BLOCK_SQPOLY: 01159 return init_block_weights_sqpoly(); 01160 case E_BLOCK_CUBICPOLY: 01161 return init_block_weights_cubicpoly(); 01162 case E_BLOCK_EXP: 01163 return init_block_weights_exp(); 01164 case E_BLOCK_LOG: 01165 return init_block_weights_log(); 01166 }; 01167 return false; 01168 } 01169 01170 01171 01172 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p) 01173 { 01174 S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p; 01175 int32_t j=params->j; 01176 CWeightedDegreePositionStringKernel* wd=params->kernel; 01177 CTrie<DNATrie>* tries=params->tries; 01178 float64_t* weights=params->weights; 01179 int32_t length=params->length; 01180 int32_t max_shift=params->max_shift; 01181 int32_t* vec=params->vec; 01182 float64_t* result=params->result; 01183 float64_t factor=params->factor; 01184 int32_t* shift=params->shift; 01185 int32_t* vec_idx=params->vec_idx; 01186 01187 for (int32_t i=params->start; i<params->end; i++) 01188 { 01189 int32_t len=0; 01190 CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs()); 01191 CAlphabet* alpha=wd->alphabet; 01192 01193 bool free_vec; 01194 char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec); 01195 for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++) 01196 vec[k]=alpha->remap_to_bin(char_vec[k]); 01197 rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec); 01198 01199 SG_UNREF(rhs_feat); 01200 01201 result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]); 01202 01203 if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT) 01204 { 01205 for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++) 01206 { 01207 int32_t s=j-q ; 01208 if ((s>=1) && (s<=shift[q]) && (q+s<len)) 01209 { 01210 result[i] += 01211 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01212 len, q, q+s, q, weights, (length!=0)), 01213 vec_idx[i])/(2.0*s); 01214 } 01215 } 01216 01217 for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++) 01218 { 01219 result[i] += 01220 wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, 01221 len, j+s, j, j+s, weights, (length!=0)), 01222 vec_idx[i])/(2.0*s); 01223 } 01224 } 01225 } 01226 01227 return NULL; 01228 } 01229 01230 void CWeightedDegreePositionStringKernel::compute_batch( 01231 int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec, 01232 int32_t* IDX, float64_t* alphas, float64_t factor) 01233 { 01234 ASSERT(alphabet); 01235 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01236 ASSERT(position_weights_lhs==NULL); 01237 ASSERT(position_weights_rhs==NULL); 01238 ASSERT(rhs); 01239 ASSERT(num_vec<=rhs->get_num_vectors()); 01240 ASSERT(num_vec>0); 01241 ASSERT(vec_idx); 01242 ASSERT(result); 01243 create_empty_tries(); 01244 01245 int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01246 ASSERT(num_feat>0); 01247 int32_t num_threads=parallel->get_num_threads(); 01248 ASSERT(num_threads>0); 01249 int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat); 01250 01251 if (num_threads < 2) 01252 { 01253 #ifdef WIN32 01254 for (int32_t j=0; j<num_feat; j++) 01255 #else 01256 CSignal::clear_cancel(); 01257 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01258 #endif 01259 { 01260 init_optimization(num_suppvec, IDX, alphas, j); 01261 S_THREAD_PARAM<DNATrie> params; 01262 params.vec=vec; 01263 params.result=result; 01264 params.weights=weights; 01265 params.kernel=this; 01266 params.tries=&tries; 01267 params.factor=factor; 01268 params.j=j; 01269 params.start=0; 01270 params.end=num_vec; 01271 params.length=length; 01272 params.max_shift=max_shift; 01273 params.shift=shift; 01274 params.vec_idx=vec_idx; 01275 compute_batch_helper((void*) ¶ms); 01276 01277 SG_PROGRESS(j,0,num_feat); 01278 } 01279 } 01280 #ifdef HAVE_PTHREAD 01281 else 01282 { 01283 01284 CSignal::clear_cancel(); 01285 for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++) 01286 { 01287 init_optimization(num_suppvec, IDX, alphas, j); 01288 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 01289 S_THREAD_PARAM<DNATrie>* params = SG_MALLOC(S_THREAD_PARAM<DNATrie>, num_threads); 01290 int32_t step= num_vec/num_threads; 01291 int32_t t; 01292 01293 for (t=0; t<num_threads-1; t++) 01294 { 01295 params[t].vec=&vec[num_feat*t]; 01296 params[t].result=result; 01297 params[t].weights=weights; 01298 params[t].kernel=this; 01299 params[t].tries=&tries; 01300 params[t].factor=factor; 01301 params[t].j=j; 01302 params[t].start = t*step; 01303 params[t].end = (t+1)*step; 01304 params[t].length=length; 01305 params[t].max_shift=max_shift; 01306 params[t].shift=shift; 01307 params[t].vec_idx=vec_idx; 01308 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)¶ms[t]); 01309 } 01310 01311 params[t].vec=&vec[num_feat*t]; 01312 params[t].result=result; 01313 params[t].weights=weights; 01314 params[t].kernel=this; 01315 params[t].tries=&tries; 01316 params[t].factor=factor; 01317 params[t].j=j; 01318 params[t].start=t*step; 01319 params[t].end=num_vec; 01320 params[t].length=length; 01321 params[t].max_shift=max_shift; 01322 params[t].shift=shift; 01323 params[t].vec_idx=vec_idx; 01324 compute_batch_helper((void*) ¶ms[t]); 01325 01326 for (t=0; t<num_threads-1; t++) 01327 pthread_join(threads[t], NULL); 01328 SG_PROGRESS(j,0,num_feat); 01329 01330 SG_FREE(params); 01331 SG_FREE(threads); 01332 } 01333 } 01334 #endif 01335 01336 SG_FREE(vec); 01337 01338 //really also free memory as this can be huge on testing especially when 01339 //using the combined kernel 01340 create_empty_tries(); 01341 } 01342 01343 float64_t* CWeightedDegreePositionStringKernel::compute_scoring( 01344 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result, 01345 int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01346 { 01347 ASSERT(position_weights_lhs==NULL); 01348 ASSERT(position_weights_rhs==NULL); 01349 01350 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01351 ASSERT(num_feat>0); 01352 ASSERT(alphabet); 01353 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01354 01355 num_sym=4; //for now works only w/ DNA 01356 01357 ASSERT(max_degree>0); 01358 01359 // === variables 01360 int32_t* nofsKmers=SG_MALLOC(int32_t, max_degree); 01361 float64_t** C=SG_MALLOC(float64_t*, max_degree); 01362 float64_t** L=SG_MALLOC(float64_t*, max_degree); 01363 float64_t** R=SG_MALLOC(float64_t*, max_degree); 01364 01365 int32_t i; 01366 int32_t k; 01367 01368 // --- return table 01369 int32_t bigtabSize=0; 01370 for (k=0; k<max_degree; ++k ) 01371 { 01372 nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1); 01373 const int32_t tabSize=nofsKmers[k]*num_feat; 01374 bigtabSize+=tabSize; 01375 } 01376 result=SG_MALLOC(float64_t, bigtabSize); 01377 01378 // --- auxilliary tables 01379 int32_t tabOffs=0; 01380 for( k = 0; k < max_degree; ++k ) 01381 { 01382 const int32_t tabSize = nofsKmers[k] * num_feat; 01383 C[k] = &result[tabOffs]; 01384 L[k] = SG_MALLOC(float64_t, tabSize ); 01385 R[k] = SG_MALLOC(float64_t, tabSize ); 01386 tabOffs+=tabSize; 01387 for(i = 0; i < tabSize; i++ ) 01388 { 01389 C[k][i] = 0.0; 01390 L[k][i] = 0.0; 01391 R[k][i] = 0.0; 01392 } 01393 } 01394 01395 // --- tree parsing info 01396 float64_t* margFactors=SG_MALLOC(float64_t, degree); 01397 01398 int32_t* x = SG_MALLOC(int32_t, degree+1 ); 01399 int32_t* substrs = SG_MALLOC(int32_t, degree+1 ); 01400 // - fill arrays 01401 margFactors[0] = 1.0; 01402 substrs[0] = 0; 01403 for( k=1; k < degree; ++k ) { 01404 margFactors[k] = 0.25 * margFactors[k-1]; 01405 substrs[k] = -1; 01406 } 01407 substrs[degree] = -1; 01408 // - fill struct 01409 struct TreeParseInfo info; 01410 info.num_sym = num_sym; 01411 info.num_feat = num_feat; 01412 info.p = -1; 01413 info.k = -1; 01414 info.nofsKmers = nofsKmers; 01415 info.margFactors = margFactors; 01416 info.x = x; 01417 info.substrs = substrs; 01418 info.y0 = 0; 01419 info.C_k = NULL; 01420 info.L_k = NULL; 01421 info.R_k = NULL; 01422 01423 // === main loop 01424 i = 0; // total progress 01425 for( k = 0; k < max_degree; ++k ) 01426 { 01427 const int32_t nofKmers = nofsKmers[ k ]; 01428 info.C_k = C[k]; 01429 info.L_k = L[k]; 01430 info.R_k = R[k]; 01431 01432 // --- run over all trees 01433 for(int32_t p = 0; p < num_feat; ++p ) 01434 { 01435 init_optimization( num_suppvec, IDX, alphas, p ); 01436 int32_t tree = p ; 01437 for(int32_t j = 0; j < degree+1; j++ ) { 01438 x[j] = -1; 01439 } 01440 tries.traverse( tree, p, info, 0, x, k ); 01441 SG_PROGRESS(i++,0,num_feat*max_degree); 01442 } 01443 01444 // --- add partial overlap scores 01445 if( k > 0 ) { 01446 const int32_t j = k - 1; 01447 const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 ); 01448 for(int32_t p = 0; p < num_feat; ++p ) { 01449 const int32_t offsetJ = nofJmers * p; 01450 const int32_t offsetJ1 = nofJmers * (p+1); 01451 const int32_t offsetK = nofKmers * p; 01452 int32_t y; 01453 int32_t sym; 01454 for( y = 0; y < nofJmers; ++y ) { 01455 for( sym = 0; sym < num_sym; ++sym ) { 01456 const int32_t y_sym = num_sym*y + sym; 01457 const int32_t sym_y = nofJmers*sym + y; 01458 ASSERT(0<=y_sym && y_sym<nofKmers); 01459 ASSERT(0<=sym_y && sym_y<nofKmers); 01460 C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ]; 01461 if( p < num_feat-1 ) { 01462 C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ]; 01463 } 01464 } 01465 } 01466 } 01467 } 01468 // if( k > 1 ) 01469 // j = k-1 01470 // for all positions p 01471 // for all j-mers y 01472 // for n in {A,C,G,T} 01473 // C_k[ p, [y,n] ] += L_j[ p, y ] 01474 // C_k[ p, [n,y] ] += R_j[ p+1, y ] 01475 // end; 01476 // end; 01477 // end; 01478 // end; 01479 } 01480 01481 // === return a vector 01482 num_feat=1; 01483 num_sym = bigtabSize; 01484 // --- clean up 01485 SG_FREE(nofsKmers); 01486 SG_FREE(margFactors); 01487 SG_FREE(substrs); 01488 SG_FREE(x); 01489 SG_FREE(C); 01490 for( k = 0; k < max_degree; ++k ) { 01491 SG_FREE(L[k]); 01492 SG_FREE(R[k]); 01493 } 01494 SG_FREE(L); 01495 SG_FREE(R); 01496 return result; 01497 } 01498 01499 char* CWeightedDegreePositionStringKernel::compute_consensus( 01500 int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01501 { 01502 ASSERT(position_weights_lhs==NULL); 01503 ASSERT(position_weights_rhs==NULL); 01504 //only works for order <= 32 01505 ASSERT(degree<=32); 01506 ASSERT(!tries.get_use_compact_terminal_nodes()); 01507 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01508 ASSERT(num_feat>0); 01509 ASSERT(alphabet); 01510 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA); 01511 01512 //consensus 01513 char* result=SG_MALLOC(char, num_feat); 01514 01515 //backtracking and scoring table 01516 int32_t num_tables=CMath::max(1,num_feat-degree+1); 01517 DynArray<ConsensusEntry>** table=SG_MALLOC(DynArray<ConsensusEntry>*, num_tables); 01518 01519 for (int32_t i=0; i<num_tables; i++) 01520 table[i]=new DynArray<ConsensusEntry>(num_suppvec/10); 01521 01522 //compute consensus via dynamic programming 01523 for (int32_t i=0; i<num_tables; i++) 01524 { 01525 bool cumulative=false; 01526 01527 if (i<num_tables-1) 01528 init_optimization(num_suppvec, IDX, alphas, i); 01529 else 01530 { 01531 init_optimization(num_suppvec, IDX, alphas, i, num_feat-1); 01532 cumulative=true; 01533 } 01534 01535 if (i==0) 01536 tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights); 01537 else 01538 tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights); 01539 01540 SG_PROGRESS(i,0,num_feat); 01541 } 01542 01543 01544 //int32_t n=table[0]->get_num_elements(); 01545 01546 //for (int32_t i=0; i<n; i++) 01547 //{ 01548 // ConsensusEntry e= table[0]->get_element(i); 01549 // SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01550 //} 01551 01552 //n=table[num_tables-1]->get_num_elements(); 01553 //for (int32_t i=0; i<n; i++) 01554 //{ 01555 // ConsensusEntry e= table[num_tables-1]->get_element(i); 01556 // SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01557 //} 01558 //n=table[num_tables-2]->get_num_elements(); 01559 //for (int32_t i=0; i<n; i++) 01560 //{ 01561 // ConsensusEntry e= table[num_tables-2]->get_element(i); 01562 // SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt); 01563 //} 01564 01565 const char* acgt="ACGT"; 01566 01567 //backtracking start 01568 int32_t max_idx=-1; 01569 float32_t max_score=0; 01570 int32_t num_elements=table[num_tables-1]->get_num_elements(); 01571 01572 for (int32_t i=0; i<num_elements; i++) 01573 { 01574 float64_t sc=table[num_tables-1]->get_element(i).score; 01575 if (sc>max_score || max_idx==-1) 01576 { 01577 max_idx=i; 01578 max_score=sc; 01579 } 01580 } 01581 uint64_t endstr=table[num_tables-1]->get_element(max_idx).string; 01582 01583 SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score); 01584 01585 for (int32_t i=0; i<degree; i++) 01586 result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3]; 01587 01588 if (num_tables>1) 01589 { 01590 for (int32_t i=num_tables-1; i>=0; i--) 01591 { 01592 //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i); 01593 result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3]; 01594 max_idx=table[i]->get_element(max_idx).bt; 01595 } 01596 } 01597 01598 //for (int32_t t=0; t<num_tables; t++) 01599 //{ 01600 // n=table[t]->get_num_elements(); 01601 // for (int32_t i=0; i<n; i++) 01602 // { 01603 // ConsensusEntry e= table[t]->get_element(i); 01604 // SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt); 01605 // } 01606 //} 01607 01608 for (int32_t i=0; i<num_tables; i++) 01609 delete table[i]; 01610 01611 SG_FREE(table); 01612 return result; 01613 } 01614 01615 01616 float64_t* CWeightedDegreePositionStringKernel::extract_w( 01617 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01618 float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas) 01619 { 01620 delete_optimization(); 01621 use_poim_tries=true; 01622 poim_tries.delete_trees(false); 01623 01624 // === check 01625 ASSERT(position_weights_lhs==NULL); 01626 ASSERT(position_weights_rhs==NULL); 01627 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01628 ASSERT(num_feat>0); 01629 ASSERT(alphabet->get_alphabet()==DNA); 01630 ASSERT(max_degree>0); 01631 01632 // === general variables 01633 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01634 const int32_t seqLen = num_feat; 01635 float64_t** subs; 01636 int32_t i; 01637 int32_t k; 01638 //int32_t y; 01639 01640 // === init tables "subs" for substring scores / POIMs 01641 // --- compute table sizes 01642 int32_t* offsets; 01643 int32_t offset; 01644 offsets = SG_MALLOC(int32_t, max_degree ); 01645 offset = 0; 01646 for( k = 0; k < max_degree; ++k ) { 01647 offsets[k] = offset; 01648 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01649 const int32_t tabSize = nofsKmers * seqLen; 01650 offset += tabSize; 01651 } 01652 // --- allocate memory 01653 const int32_t bigTabSize = offset; 01654 w_result=SG_MALLOC(float64_t, bigTabSize); 01655 for (i=0; i<bigTabSize; ++i) 01656 w_result[i]=0; 01657 01658 // --- set pointers for tables 01659 subs = SG_MALLOC(float64_t*, max_degree ); 01660 ASSERT( subs != NULL ); 01661 for( k = 0; k < max_degree; ++k ) { 01662 subs[k] = &w_result[ offsets[k] ]; 01663 } 01664 SG_FREE(offsets); 01665 01666 // === init trees; extract "w" 01667 init_optimization( num_suppvec, IDX, alphas, -1); 01668 poim_tries.POIMs_extract_W( subs, max_degree ); 01669 01670 // === clean; return "subs" as vector 01671 SG_FREE(subs); 01672 num_feat = 1; 01673 num_sym = bigTabSize; 01674 use_poim_tries=false; 01675 poim_tries.delete_trees(false); 01676 return w_result; 01677 } 01678 01679 float64_t* CWeightedDegreePositionStringKernel::compute_POIM( 01680 int32_t max_degree, int32_t& num_feat, int32_t& num_sym, 01681 float64_t* poim_result, int32_t num_suppvec, int32_t* IDX, 01682 float64_t* alphas, float64_t* distrib ) 01683 { 01684 delete_optimization(); 01685 use_poim_tries=true; 01686 poim_tries.delete_trees(false); 01687 01688 // === check 01689 ASSERT(position_weights_lhs==NULL); 01690 ASSERT(position_weights_rhs==NULL); 01691 num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length(); 01692 ASSERT(num_feat>0); 01693 ASSERT(alphabet->get_alphabet()==DNA); 01694 ASSERT(max_degree!=0); 01695 ASSERT(distrib); 01696 01697 // === general variables 01698 static const int32_t NUM_SYMS = poim_tries.NUM_SYMS; 01699 const int32_t seqLen = num_feat; 01700 float64_t** subs; 01701 int32_t i; 01702 int32_t k; 01703 01704 // === DEBUGGING mode 01705 // 01706 // Activated if "max_degree" < 0. 01707 // Allows to output selected partial score. 01708 // 01709 // |max_degree| mod 4 01710 // 0: substring 01711 // 1: superstring 01712 // 2: left overlap 01713 // 3: right overlap 01714 // 01715 const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0; 01716 if( debug ) { 01717 max_degree = abs(max_degree) / 4; 01718 switch( debug ) { 01719 case 1: { 01720 printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree ); 01721 break; 01722 } 01723 case 2: { 01724 printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree ); 01725 break; 01726 } 01727 case 3: { 01728 printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree ); 01729 break; 01730 } 01731 case 4: { 01732 printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree ); 01733 break; 01734 } 01735 default: { 01736 printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree ); 01737 ASSERT(0); 01738 break; 01739 } 01740 } 01741 } 01742 01743 // --- compute table sizes 01744 int32_t* offsets; 01745 int32_t offset; 01746 offsets = SG_MALLOC(int32_t, max_degree ); 01747 offset = 0; 01748 for( k = 0; k < max_degree; ++k ) { 01749 offsets[k] = offset; 01750 const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 ); 01751 const int32_t tabSize = nofsKmers * seqLen; 01752 offset += tabSize; 01753 } 01754 // --- allocate memory 01755 const int32_t bigTabSize=offset; 01756 poim_result=SG_MALLOC(float64_t, bigTabSize); 01757 for (i=0; i<bigTabSize; ++i ) 01758 poim_result[i]=0; 01759 01760 // --- set pointers for tables 01761 subs=SG_MALLOC(float64_t*, max_degree); 01762 for (k=0; k<max_degree; ++k) 01763 subs[k]=&poim_result[offsets[k]]; 01764 01765 SG_FREE(offsets); 01766 01767 // === init trees; precalc S, L and R 01768 init_optimization( num_suppvec, IDX, alphas, -1); 01769 poim_tries.POIMs_precalc_SLR( distrib ); 01770 01771 // === compute substring scores 01772 if( debug==0 || debug==1 ) { 01773 poim_tries.POIMs_extract_W( subs, max_degree ); 01774 for( k = 1; k < max_degree; ++k ) { 01775 const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0; 01776 const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k ); 01777 const int32_t nofKmers0 = nofKmers1 * NUM_SYMS; 01778 for( i = 0; i < seqLen; ++i ) { 01779 float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL; 01780 float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL; 01781 float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ]; 01782 float64_t* const subs_k0i = & subs[ k-0 ][ i*nofKmers0 ]; 01783 int32_t y0; 01784 for( y0 = 0; y0 < nofKmers0; ++y0 ) { 01785 const int32_t y1l = y0 / NUM_SYMS; 01786 const int32_t y1r = y0 % nofKmers1; 01787 const int32_t y2 = y1r / NUM_SYMS; 01788 subs_k0i[ y0 ] += subs_k1i0[ y1l ]; 01789 if( i < seqLen-1 ) { 01790 subs_k0i[ y0 ] += subs_k1i1[ y1r ]; 01791 if( k > 1 ) { 01792 subs_k0i[ y0 ] -= subs_k2i1[ y2 ]; 01793 } 01794 } 01795 } 01796 } 01797 } 01798 } 01799 01800 // === compute POIMs 01801 poim_tries.POIMs_add_SLR( subs, max_degree, debug ); 01802 01803 // === clean; return "subs" as vector 01804 SG_FREE(subs); 01805 num_feat = 1; 01806 num_sym = bigTabSize; 01807 01808 use_poim_tries=false; 01809 poim_tries.delete_trees(false); 01810 01811 return poim_result; 01812 } 01813 01814 01815 void CWeightedDegreePositionStringKernel::prepare_POIM2( 01816 float64_t* distrib, int32_t num_sym, int32_t num_feat) 01817 { 01818 SG_FREE(m_poim_distrib); 01819 m_poim_distrib=SG_MALLOC(float64_t, num_sym*num_feat); 01820 01821 memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t)); 01822 m_poim_num_sym=num_sym; 01823 m_poim_num_feat=num_feat; 01824 } 01825 01826 void CWeightedDegreePositionStringKernel::compute_POIM2( 01827 int32_t max_degree, CSVM* svm) 01828 { 01829 ASSERT(svm); 01830 int32_t num_suppvec=svm->get_num_support_vectors(); 01831 int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec); 01832 float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec); 01833 01834 for (int32_t i=0; i<num_suppvec; i++) 01835 { 01836 sv_idx[i]=svm->get_support_vector(i); 01837 sv_weight[i]=svm->get_alpha(i); 01838 } 01839 01840 if ((max_degree < 1) || (max_degree > 12)) 01841 { 01842 //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree); 01843 SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree); 01844 max_degree=1; 01845 } 01846 01847 int32_t num_feat = m_poim_num_feat; 01848 int32_t num_sym = m_poim_num_sym; 01849 SG_FREE(m_poim); 01850 01851 m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL, num_suppvec, sv_idx, 01852 sv_weight, m_poim_distrib); 01853 01854 ASSERT(num_feat==1); 01855 m_poim_result_len=num_sym; 01856 01857 SG_FREE(sv_weight); 01858 SG_FREE(sv_idx); 01859 } 01860 01861 void CWeightedDegreePositionStringKernel::get_POIM2( 01862 float64_t** poim, int32_t* result_len) 01863 { 01864 *poim=SG_MALLOC(float64_t, m_poim_result_len); 01865 ASSERT(*poim); 01866 memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ; 01867 *result_len=m_poim_result_len ; 01868 } 01869 01870 void CWeightedDegreePositionStringKernel::cleanup_POIM2() 01871 { 01872 SG_FREE(m_poim) ; 01873 m_poim=NULL ; 01874 SG_FREE(m_poim_distrib) ; 01875 m_poim_distrib=NULL ; 01876 m_poim_num_sym=0 ; 01877 m_poim_num_sym=0 ; 01878 m_poim_result_len=0 ; 01879 } 01880 01881 void CWeightedDegreePositionStringKernel::load_serializable_post() throw (ShogunException) 01882 { 01883 CKernel::load_serializable_post(); 01884 01885 tries=CTrie<DNATrie>(degree); 01886 poim_tries=CTrie<POIMTrie>(degree); 01887 01888 if (weights) 01889 init_block_weights(); 01890 } 01891 01892 void CWeightedDegreePositionStringKernel::init() 01893 { 01894 weights=NULL; 01895 position_weights=NULL; 01896 position_weights_len=0; 01897 01898 position_weights_lhs=NULL; 01899 position_weights_lhs_len=0; 01900 position_weights_rhs=NULL; 01901 position_weights_rhs_len=0; 01902 01903 weights_buffer=NULL; 01904 mkl_stepsize=1; 01905 degree=1; 01906 length=0; 01907 01908 max_shift=0; 01909 max_mismatch=0; 01910 seq_length=0; 01911 shift=NULL; 01912 shift_len=0; 01913 01914 block_weights=NULL; 01915 block_computation=true; 01916 type=E_EXTERNAL; 01917 which_degree=-1; 01918 tries=CTrie<DNATrie>(1); 01919 poim_tries=CTrie<POIMTrie>(1); 01920 01921 tree_initialized=false; 01922 use_poim_tries=false; 01923 m_poim_distrib=NULL; 01924 01925 m_poim=NULL; 01926 m_poim_num_sym=0; 01927 m_poim_num_feat=0; 01928 m_poim_result_len=0; 01929 01930 alphabet=NULL; 01931 01932 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION; 01933 01934 set_normalizer(new CSqrtDiagKernelNormalizer()); 01935 01936 m_parameters->add_matrix(&weights, &weights_degree, &weights_length, 01937 "weights", "WD Kernel weights."); 01938 m_parameters->add_vector(&position_weights, &position_weights_len, 01939 "position_weights", 01940 "Weights per position."); 01941 m_parameters->add_vector(&position_weights_lhs, &position_weights_lhs_len, 01942 "position_weights_lhs", 01943 "Weights per position left hand side."); 01944 m_parameters->add_vector(&position_weights_rhs, &position_weights_rhs_len, 01945 "position_weights_rhs", 01946 "Weights per position right hand side."); 01947 m_parameters->add_vector(&shift, &shift_len, 01948 "shift", 01949 "Shift Vector."); 01950 m_parameters->add(&mkl_stepsize, "mkl_stepsize", "MKL step size."); 01951 m_parameters->add(°ree, "degree", "Order of WD kernel."); 01952 m_parameters->add(&max_mismatch, "max_mismatch", 01953 "Number of allowed mismatches."); 01954 m_parameters->add(&block_computation, "block_computation", 01955 "If block computation shall be used."); 01956 m_parameters->add((machine_int_t*) &type, "type", 01957 "WeightedDegree kernel type."); 01958 m_parameters->add(&which_degree, "which_degree", 01959 "Unqueal -1 if just a single degree is selected."); 01960 m_parameters->add((CSGObject**) &alphabet, "alphabet", 01961 "Alphabet of Features."); 01962 }