SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2007 Soeren Sonnenburg 00008 * Written (W) 1999-2007 Gunnar Raetsch 00009 * Written (W) 2008-2009 Jonas Behr 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/structure/DynProg.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/lib/config.h> 00017 #include <shogun/features/StringFeatures.h> 00018 #include <shogun/features/Alphabet.h> 00019 #include <shogun/structure/Plif.h> 00020 #include <shogun/structure/IntronList.h> 00021 #include <shogun/lib/Array.h> 00022 #include <shogun/lib/Array2.h> 00023 #include <shogun/lib/Array3.h> 00024 00025 #include <stdlib.h> 00026 #include <stdio.h> 00027 #include <time.h> 00028 #include <ctype.h> 00029 00030 using namespace shogun; 00031 00032 //#define USE_TMP_ARRAYCLASS 00033 //#define DYNPROG_DEBUG 00034 00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ; 00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ; 00037 int32_t CDynProg::frame_plifs[3]={4,5,6}; 00038 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ; 00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1, 00040 1,1,1,1,1,1,1,1, 00041 0,0,0,0,0,0,0,0, 00042 0,0,0,0,0,0,0,0} ; 00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true, 00044 false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts 00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0, 00046 1,1,1,1,1,1,1,1} ; // which string should be used 00047 00048 CDynProg::CDynProg(int32_t num_svms /*= 8 */) 00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1), 00050 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1), 00051 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1), 00052 m_end_state_distribution_q_deriv(1), 00053 00054 // multi svm 00055 m_num_degrees(4), 00056 m_num_svms(num_svms), 00057 m_word_degree(word_degree_default, m_num_degrees, true, true), 00058 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true), 00059 m_cum_num_words_array(m_cum_num_words.get_array()), 00060 m_num_words(num_words_default, m_num_degrees, true, true), 00061 m_num_words_array(m_num_words.get_array()), 00062 m_mod_words(mod_words_default, m_num_svms, 2, true, true), 00063 m_mod_words_array(m_mod_words.get_array()), 00064 m_sign_words(sign_words_default, m_num_svms, true, true), 00065 m_sign_words_array(m_sign_words.get_array()), 00066 m_string_words(string_words_default, m_num_svms, true, true), 00067 m_string_words_array(m_string_words.get_array()), 00068 //m_svm_pos_start(m_num_degrees), 00069 m_num_unique_words(m_num_degrees), 00070 m_svm_arrays_clean(true), 00071 00072 m_max_a_id(0), m_observation_matrix(1,1,1), 00073 m_pos(1), 00074 m_seq_len(0), 00075 m_orf_info(1,2), 00076 m_plif_list(1), 00077 m_PEN(1,1), 00078 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 00079 m_segment_ids(1), 00080 m_segment_mask(1), 00081 m_my_state_seq(1), 00082 m_my_pos_seq(1), 00083 m_my_scores(1), 00084 m_my_losses(1), 00085 m_scores(1), 00086 m_states(1,1), 00087 m_positions(1,1), 00088 00089 m_seq_sparse1(NULL), 00090 m_seq_sparse2(NULL), 00091 m_plif_matrices(NULL), 00092 00093 m_genestr_stop(1), 00094 m_intron_list(NULL), 00095 m_num_intron_plifs(0), 00096 m_lin_feat(1,1), //by Jonas 00097 m_raw_intensities(NULL), 00098 m_probe_pos(NULL), 00099 m_num_probes_cum(NULL), 00100 m_num_lin_feat_plifs_cum(NULL), 00101 m_num_raw_data(0), 00102 00103 m_long_transitions(true), 00104 m_long_transition_threshold(1000) 00105 { 00106 trans_list_forward = NULL ; 00107 trans_list_forward_cnt = NULL ; 00108 trans_list_forward_val = NULL ; 00109 trans_list_forward_id = NULL ; 00110 trans_list_len = 0 ; 00111 00112 mem_initialized = true ; 00113 00114 m_N=1; 00115 00116 m_raw_intensities = NULL; 00117 m_probe_pos = NULL; 00118 m_num_probes_cum = SG_MALLOC(int32_t, 100); 00119 m_num_probes_cum[0] = 0; 00120 //m_use_tiling=false; 00121 m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100); 00122 m_num_lin_feat_plifs_cum[0] = m_num_svms; 00123 m_num_raw_data = 0; 00124 #ifdef ARRAY_STATISTICS 00125 m_word_degree.set_array_name("word_degree"); 00126 #endif 00127 00128 m_transition_matrix_a_id.set_array_name("transition_matrix_a_id"); 00129 m_transition_matrix_a.set_array_name("transition_matrix_a"); 00130 m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv"); 00131 m_mod_words.set_array_name("mod_words"); 00132 m_orf_info.set_array_name("orf_info"); 00133 m_segment_sum_weights.set_array_name("segment_sum_weights"); 00134 m_PEN.set_array_name("PEN"); 00135 m_PEN_state_signals.set_array_name("PEN_state_signals"); 00136 m_dict_weights.set_array_name("dict_weights"); 00137 m_states.set_array_name("states"); 00138 m_positions.set_array_name("positions"); 00139 m_lin_feat.set_array_name("lin_feat"); 00140 00141 00142 m_observation_matrix.set_array_name("m_observation_matrix"); 00143 m_segment_loss.set_array_name("m_segment_loss"); 00144 m_seg_loss_obj = new CSegmentLoss(); 00145 } 00146 00147 CDynProg::~CDynProg() 00148 { 00149 if (trans_list_forward_cnt) 00150 SG_FREE(trans_list_forward_cnt); 00151 if (trans_list_forward) 00152 { 00153 for (int32_t i=0; i<trans_list_len; i++) 00154 { 00155 if (trans_list_forward[i]) 00156 SG_FREE(trans_list_forward[i]); 00157 } 00158 SG_FREE(trans_list_forward); 00159 } 00160 if (trans_list_forward_val) 00161 { 00162 for (int32_t i=0; i<trans_list_len; i++) 00163 { 00164 if (trans_list_forward_val[i]) 00165 SG_FREE(trans_list_forward_val[i]); 00166 } 00167 SG_FREE(trans_list_forward_val); 00168 } 00169 if (trans_list_forward_id) 00170 { 00171 for (int32_t i=0; i<trans_list_len; i++) 00172 { 00173 if (trans_list_forward_id[i]) 00174 SG_FREE(trans_list_forward_id[i]); 00175 } 00176 SG_FREE(trans_list_forward_id); 00177 } 00178 if (m_raw_intensities) 00179 SG_FREE(m_raw_intensities); 00180 if (m_probe_pos) 00181 SG_FREE(m_probe_pos); 00182 if (m_num_probes_cum) 00183 SG_FREE(m_num_probes_cum); 00184 if (m_num_lin_feat_plifs_cum) 00185 SG_FREE(m_num_lin_feat_plifs_cum); 00186 00187 delete m_intron_list; 00188 00189 SG_UNREF(m_seq_sparse1); 00190 SG_UNREF(m_seq_sparse2); 00191 SG_UNREF(m_plif_matrices); 00192 } 00193 00195 int32_t CDynProg::get_num_svms() 00196 { 00197 return m_num_svms; 00198 } 00199 00200 void CDynProg::precompute_stop_codons() 00201 { 00202 int32_t length=m_genestr.get_dim1(); 00203 00204 m_genestr_stop.resize_array(length) ; 00205 m_genestr_stop.zero() ; 00206 m_genestr_stop.set_array_name("genestr_stop") ; 00207 { 00208 for (int32_t i=0; i<length-2; i++) 00209 if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 00210 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 00211 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) || 00212 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') ))) 00213 { 00214 m_genestr_stop.element(i)=true ; 00215 } 00216 else 00217 m_genestr_stop.element(i)=false ; 00218 m_genestr_stop.element(length-2)=false ; 00219 m_genestr_stop.element(length-1)=false ; 00220 } 00221 } 00222 00223 void CDynProg::set_num_states(int32_t p_N) 00224 { 00225 m_N=p_N ; 00226 00227 m_transition_matrix_a_id.resize_array(m_N,m_N) ; 00228 m_transition_matrix_a.resize_array(m_N,m_N) ; 00229 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 00230 m_initial_state_distribution_p.resize_array(m_N) ; 00231 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 00232 m_end_state_distribution_q.resize_array(m_N); 00233 m_end_state_distribution_q_deriv.resize_array(m_N) ; 00234 00235 m_orf_info.resize_array(m_N,2) ; 00236 m_PEN.resize_array(m_N,m_N) ; 00237 } 00238 00239 int32_t CDynProg::get_num_states() 00240 { 00241 return m_N; 00242 } 00243 00244 void CDynProg::init_tiling_data( 00245 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes) 00246 { 00247 m_num_raw_data++; 00248 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes; 00249 00250 int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]); 00251 float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]); 00252 00253 00254 if (m_num_raw_data==1){ 00255 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00256 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00257 //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2); 00258 }else{ 00259 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 00260 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t)); 00261 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t)); 00262 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t)); 00263 } 00264 SG_FREE(m_probe_pos); 00265 SG_FREE(m_raw_intensities); 00266 m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes); 00267 m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes); 00268 00269 //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00270 //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00271 00272 } 00273 00274 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms) 00275 { 00276 m_lin_feat.resize_array(p_num_svms, m_seq_len); 00277 00278 // initialize array 00279 for (int s=0; s<p_num_svms; s++) 00280 for (int p=0; p<m_seq_len; p++) 00281 m_lin_feat.set_element(0.0, s, p) ; 00282 } 00283 00284 void CDynProg::resize_lin_feat(const int32_t num_new_feat) 00285 { 00286 int32_t dim1, dim2; 00287 m_lin_feat.get_array_size(dim1, dim2); 00288 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]); 00289 ASSERT(dim2==m_seq_len); // == number of candidate positions 00290 00291 00292 00293 float64_t* arr = m_lin_feat.get_array(); 00294 float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2); 00295 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ; 00296 for(int32_t j=0;j<m_seq_len;j++) 00297 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++) 00298 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k]; 00299 00300 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 00301 SG_FREE(tmp); 00302 00303 /*for(int32_t j=0;j<5;j++) 00304 { 00305 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++) 00306 { 00307 SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j)); 00308 } 00309 SG_PRINT("\n"); 00310 } 00311 m_lin_feat.get_array_size(dim1,dim2); 00312 SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/ 00313 00314 //SG_PRINT("resize_lin_feat: done\n"); 00315 } 00316 00317 void CDynProg::precompute_tiling_plifs( 00318 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs) 00319 { 00320 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs; 00321 float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs); 00322 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 00323 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++) 00324 svm_value[i]=0.0; 00325 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs); 00326 for (int32_t i=0; i<num_tiling_plifs; i++) 00327 { 00328 tiling_plif[i]=0.0; 00329 CPlif * plif = PEN[tiling_plif_ids[i]]; 00330 tiling_rows[i] = plif->get_use_svm(); 00331 00332 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i) 00333 } 00334 resize_lin_feat(num_tiling_plifs); 00335 00336 00337 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]]; 00338 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]]; 00339 int32_t num=m_num_probes_cum[m_num_raw_data-1]; 00340 00341 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++) 00342 { 00343 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx]) 00344 { 00345 for (int32_t i=0; i<num_tiling_plifs; i++) 00346 { 00347 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data; 00348 CPlif * plif = PEN[tiling_plif_ids[i]]; 00349 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1); 00350 plif->set_do_calc(true); 00351 tiling_plif[i]+=plif->lookup_penalty(0,svm_value); 00352 plif->set_do_calc(false); 00353 } 00354 p_tiling_data++; 00355 p_tiling_pos++; 00356 num++; 00357 } 00358 for (int32_t i=0; i<num_tiling_plifs; i++) 00359 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx); 00360 } 00361 SG_FREE(svm_value); 00362 SG_FREE(tiling_plif); 00363 SG_FREE(tiling_rows); 00364 } 00365 00366 void CDynProg::create_word_string() 00367 { 00368 SG_FREE(m_wordstr); 00369 m_wordstr=SG_MALLOC(uint16_t**, 5440); 00370 int32_t k=0; 00371 int32_t genestr_len=m_genestr.get_dim1(); 00372 00373 m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees); 00374 for (int32_t j=0; j<m_num_degrees; j++) 00375 { 00376 m_wordstr[k][j]=NULL ; 00377 { 00378 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len); 00379 for (int32_t i=0; i<genestr_len; i++) 00380 switch (m_genestr[i]) 00381 { 00382 case 'A': 00383 case 'a': m_wordstr[k][j][i]=0 ; break ; 00384 case 'C': 00385 case 'c': m_wordstr[k][j][i]=1 ; break ; 00386 case 'G': 00387 case 'g': m_wordstr[k][j][i]=2 ; break ; 00388 case 'T': 00389 case 't': m_wordstr[k][j][i]=3 ; break ; 00390 default: ASSERT(0) ; 00391 } 00392 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ; 00393 } 00394 } 00395 } 00396 00397 void CDynProg::precompute_content_values() 00398 { 00399 for (int32_t s=0; s<m_num_svms; s++) 00400 m_lin_feat.set_element(0.0, s, 0); 00401 00402 for (int32_t p=0 ; p<m_seq_len-1 ; p++) 00403 { 00404 int32_t from_pos = m_pos[p]; 00405 int32_t to_pos = m_pos[p+1]; 00406 float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms); 00407 //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos); 00408 00409 ASSERT(from_pos<=m_genestr.get_dim1()); 00410 ASSERT(to_pos<=m_genestr.get_dim1()); 00411 00412 for (int32_t s=0; s<m_num_svms; s++) 00413 my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p); 00414 00415 for (int32_t i=from_pos; i<to_pos; i++) 00416 { 00417 for (int32_t j=0; j<m_num_degrees; j++) 00418 { 00419 uint16_t word = m_wordstr[0][j][i] ; 00420 for (int32_t s=0; s<m_num_svms; s++) 00421 { 00422 // check if this k-mer should be considered for this SVM 00423 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1)) 00424 continue; 00425 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ; 00426 } 00427 } 00428 } 00429 for (int32_t s=0; s<m_num_svms; s++) 00430 { 00431 float64_t prev = m_lin_feat.get_element(s, p); 00432 //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ; 00433 if (prev<-1e20 || prev>1e20) 00434 { 00435 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ; 00436 prev=0 ; 00437 } 00438 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1); 00439 } 00440 SG_FREE(my_svm_values_unnormalized); 00441 } 00442 //for (int32_t j=0; j<m_num_degrees; j++) 00443 // SG_FREE(m_wordstr[0][j]); 00444 //SG_FREE(m_wordstr[0]); 00445 } 00446 00447 void CDynProg::set_p_vector(SGVector<float64_t> p) 00448 { 00449 if (!(p.vlen==m_N)) 00450 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N); 00451 00452 m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true); 00453 } 00454 00455 void CDynProg::set_q_vector(SGVector<float64_t> q) 00456 { 00457 if (!(q.vlen==m_N)) 00458 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N); 00459 m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true); 00460 } 00461 00462 void CDynProg::set_a(SGMatrix<float64_t> a) 00463 { 00464 ASSERT(a.num_cols==m_N); 00465 ASSERT(a.num_rows==m_N); 00466 m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true); 00467 m_transition_matrix_a_deriv.resize_array(m_N, m_N); 00468 } 00469 00470 void CDynProg::set_a_id(SGMatrix<int32_t> a) 00471 { 00472 ASSERT(a.num_cols==m_N); 00473 ASSERT(a.num_rows==m_N); 00474 m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true); 00475 m_max_a_id = 0; 00476 for (int32_t i=0; i<m_N; i++) 00477 { 00478 for (int32_t j=0; j<m_N; j++) 00479 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)); 00480 } 00481 } 00482 00483 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans) 00484 { 00485 int32_t num_trans=a_trans.num_rows; 00486 int32_t num_cols=a_trans.num_cols; 00487 00488 //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans"); 00489 00490 if (!((num_cols==3) || (num_cols==4))) 00491 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols); 00492 00493 SG_FREE(trans_list_forward); 00494 SG_FREE(trans_list_forward_cnt); 00495 SG_FREE(trans_list_forward_val); 00496 SG_FREE(trans_list_forward_id); 00497 00498 trans_list_forward = NULL ; 00499 trans_list_forward_cnt = NULL ; 00500 trans_list_forward_val = NULL ; 00501 trans_list_len = 0 ; 00502 00503 m_transition_matrix_a.zero() ; 00504 m_transition_matrix_a_id.zero() ; 00505 00506 mem_initialized = true ; 00507 00508 trans_list_forward_cnt=NULL ; 00509 trans_list_len = m_N ; 00510 trans_list_forward = SG_MALLOC(T_STATES*, m_N); 00511 trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N); 00512 trans_list_forward_val = SG_MALLOC(float64_t*, m_N); 00513 trans_list_forward_id = SG_MALLOC(int32_t*, m_N); 00514 00515 int32_t start_idx=0; 00516 for (int32_t j=0; j<m_N; j++) 00517 { 00518 int32_t old_start_idx=start_idx; 00519 00520 while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j) 00521 { 00522 start_idx++; 00523 00524 if (start_idx>1 && start_idx<num_trans) 00525 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]); 00526 } 00527 00528 if (start_idx>1 && start_idx<num_trans) 00529 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]); 00530 00531 int32_t len=start_idx-old_start_idx; 00532 ASSERT(len>=0); 00533 00534 trans_list_forward_cnt[j] = 0 ; 00535 00536 if (len>0) 00537 { 00538 trans_list_forward[j] = SG_MALLOC(T_STATES, len); 00539 trans_list_forward_val[j] = SG_MALLOC(float64_t, len); 00540 trans_list_forward_id[j] = SG_MALLOC(int32_t, len); 00541 } 00542 else 00543 { 00544 trans_list_forward[j] = NULL; 00545 trans_list_forward_val[j] = NULL; 00546 trans_list_forward_id[j] = NULL; 00547 } 00548 } 00549 00550 for (int32_t i=0; i<num_trans; i++) 00551 { 00552 int32_t from_state = (int32_t)a_trans.matrix[i] ; 00553 int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ; 00554 float64_t val = a_trans.matrix[i+num_trans*2] ; 00555 int32_t id = 0 ; 00556 if (num_cols==4) 00557 id = (int32_t)a_trans.matrix[i+num_trans*3] ; 00558 //SG_DEBUG( "id=%i\n", id) ; 00559 00560 ASSERT(to_state>=0 && to_state<m_N); 00561 ASSERT(from_state>=0 && from_state<m_N); 00562 00563 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ; 00564 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ; 00565 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ; 00566 trans_list_forward_cnt[to_state]++ ; 00567 m_transition_matrix_a.element(from_state, to_state) = val ; 00568 m_transition_matrix_a_id.element(from_state, to_state) = id ; 00569 //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state)); 00570 } ; 00571 00572 m_max_a_id = 0 ; 00573 for (int32_t i=0; i<m_N; i++) 00574 for (int32_t j=0; j<m_N; j++) 00575 { 00576 //if (m_transition_matrix_a_id.element(i,j)) 00577 //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ; 00578 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ; 00579 } 00580 //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ; 00581 } 00582 00583 00584 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input) 00585 { 00586 //for (int32_t i=0; i<mod_words_input.num_cols; i++) 00587 //{ 00588 // for (int32_t j=0; j<mod_words_input.num_rows; j++) 00589 // SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j]); 00590 // SG_PRINT("\n"); 00591 //} 00592 m_svm_arrays_clean=false ; 00593 00594 ASSERT(m_num_svms==mod_words_input.num_rows); 00595 ASSERT(mod_words_input.num_cols==2); 00596 00597 m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ; 00598 m_mod_words_array = m_mod_words.get_array() ; 00599 00600 /*SG_DEBUG( "m_mod_words=[") ; 00601 for (int32_t i=0; i<mod_words_input.num_rows; i++) 00602 SG_DEBUG( "%i, ", p_mod_words_array[i]) ; 00603 SG_DEBUG( "]\n") ;*/ 00604 } 00605 00606 bool CDynProg::check_svm_arrays() 00607 { 00608 //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings); 00609 if ((m_word_degree.get_dim1()==m_num_degrees) && 00610 (m_cum_num_words.get_dim1()==m_num_degrees+1) && 00611 (m_num_words.get_dim1()==m_num_degrees) && 00612 //(word_used.get_dim1()==m_num_degrees) && 00613 //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) && 00614 //(word_used.get_dim3()==m_num_strings) && 00615 // (svm_values_unnormalized.get_dim1()==m_num_degrees) && 00616 // (svm_values_unnormalized.get_dim2()==m_num_svms) && 00617 //(m_svm_pos_start.get_dim1()==m_num_degrees) && 00618 (m_num_unique_words.get_dim1()==m_num_degrees) && 00619 (m_mod_words.get_dim1()==m_num_svms) && 00620 (m_mod_words.get_dim2()==2) && 00621 (m_sign_words.get_dim1()==m_num_svms) && 00622 (m_string_words.get_dim1()==m_num_svms)) 00623 { 00624 m_svm_arrays_clean=true ; 00625 return true ; 00626 } 00627 else 00628 { 00629 if ((m_num_unique_words.get_dim1()==m_num_degrees) && 00630 (m_mod_words.get_dim1()==m_num_svms) && 00631 (m_mod_words.get_dim2()==2) && 00632 (m_sign_words.get_dim1()==m_num_svms) && 00633 (m_string_words.get_dim1()==m_num_svms)) 00634 SG_PRINT("OK\n") ; 00635 else 00636 SG_PRINT("not OK\n") ; 00637 00638 if (!(m_word_degree.get_dim1()==m_num_degrees)) 00639 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ; 00640 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1)) 00641 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ; 00642 if (!(m_num_words.get_dim1()==m_num_degrees)) 00643 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ; 00644 //if (!(m_svm_pos_start.get_dim1()==m_num_degrees)) 00645 // SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ; 00646 if (!(m_num_unique_words.get_dim1()==m_num_degrees)) 00647 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ; 00648 if (!(m_mod_words.get_dim1()==m_num_svms)) 00649 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ; 00650 if (!(m_mod_words.get_dim2()==2)) 00651 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ; 00652 if (!(m_sign_words.get_dim1()==m_num_svms)) 00653 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ; 00654 if (!(m_string_words.get_dim1()==m_num_svms)) 00655 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ; 00656 00657 m_svm_arrays_clean=false ; 00658 return false ; 00659 } 00660 } 00661 00662 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq) 00663 { 00664 if (seq.num_dims!=3) 00665 SG_ERROR("Expected 3-dimensional Matrix\n"); 00666 00667 int32_t N=seq.dims[0]; 00668 int32_t cand_pos=seq.dims[1]; 00669 int32_t max_num_features=seq.dims[2]; 00670 00671 if (!m_svm_arrays_clean) 00672 { 00673 SG_ERROR( "SVM arrays not clean") ; 00674 return ; 00675 } ; 00676 00677 ASSERT(N==m_N); 00678 ASSERT(cand_pos==m_seq_len); 00679 ASSERT(m_initial_state_distribution_p.get_dim1()==N); 00680 ASSERT(m_end_state_distribution_q.get_dim1()==N); 00681 00682 m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ; 00683 } 00684 int32_t CDynProg::get_num_positions() 00685 { 00686 return m_seq_len; 00687 } 00688 00689 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path) 00690 { 00691 ASSERT(seg_path.num_rows==2); 00692 ASSERT(seg_path.num_cols==m_seq_len); 00693 00694 if (seg_path.matrix!=NULL) 00695 { 00696 int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len); 00697 float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len); 00698 for (int32_t i=0; i<m_seq_len; i++) 00699 { 00700 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ; 00701 segment_mask[i] = seg_path.matrix[2*i+1] ; 00702 } 00703 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ; 00704 SG_FREE(segment_ids); 00705 SG_FREE(segment_mask); 00706 } 00707 else 00708 { 00709 int32_t *izeros = SG_MALLOC(int32_t, m_seq_len); 00710 float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len); 00711 for (int32_t i=0; i<m_seq_len; i++) 00712 { 00713 izeros[i]=0 ; 00714 dzeros[i]=0.0 ; 00715 } 00716 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ; 00717 SG_FREE(izeros); 00718 SG_FREE(dzeros); 00719 } 00720 } 00721 00722 void CDynProg::set_pos(SGVector<int32_t> pos) 00723 { 00724 m_pos.set_array(pos.vector, pos.vlen, true, true) ; 00725 m_seq_len = pos.vlen; 00726 } 00727 00728 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info) 00729 { 00730 if (orf_info.num_cols!=2) 00731 SG_ERROR( "orf_info size incorrect %i!=2\n", orf_info.num_cols) ; 00732 00733 m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ; 00734 m_orf_info.set_array_name("orf_info") ; 00735 } 00736 00737 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2) 00738 { 00739 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2)) 00740 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n"); 00741 00742 SG_UNREF(m_seq_sparse1); 00743 SG_UNREF(m_seq_sparse2); 00744 00745 m_seq_sparse1=seq_sparse1; 00746 m_seq_sparse2=seq_sparse2; 00747 SG_REF(m_seq_sparse1); 00748 SG_REF(m_seq_sparse2); 00749 } 00750 00751 void CDynProg::set_plif_matrices(CPlifMatrix* pm) 00752 { 00753 SG_UNREF(m_plif_matrices); 00754 00755 m_plif_matrices=pm; 00756 00757 SG_REF(m_plif_matrices); 00758 } 00759 00760 void CDynProg::set_gene_string(SGVector<char> genestr) 00761 { 00762 ASSERT(genestr.vector); 00763 ASSERT(genestr.vlen>0); 00764 00765 m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ; 00766 } 00767 00768 void CDynProg::set_my_state_seq(int32_t* my_state_seq) 00769 { 00770 ASSERT(my_state_seq && m_seq_len>0); 00771 m_my_state_seq.resize_array(m_seq_len); 00772 for (int32_t i=0; i<m_seq_len; i++) 00773 m_my_state_seq[i]=my_state_seq[i]; 00774 } 00775 00776 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq) 00777 { 00778 ASSERT(my_pos_seq && m_seq_len>0); 00779 m_my_pos_seq.resize_array(m_seq_len); 00780 for (int32_t i=0; i<m_seq_len; i++) 00781 m_my_pos_seq[i]=my_pos_seq[i]; 00782 } 00783 00784 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights) 00785 { 00786 if (m_num_svms!=dictionary_weights.num_cols) 00787 { 00788 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", 00789 m_num_svms, dictionary_weights.num_cols) ; 00790 } 00791 00792 m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ; 00793 00794 // initialize, so it does not bother when not used 00795 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ; 00796 m_segment_loss.zero() ; 00797 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ; 00798 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ; 00799 m_segment_ids.zero() ; 00800 m_segment_mask.zero() ; 00801 } 00802 00803 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss) 00804 { 00805 int32_t m=segment_loss.num_rows; 00806 int32_t n=segment_loss.num_cols; 00807 // here we need two matrices. Store it in one: 2N x N 00808 if (2*m!=n) 00809 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ; 00810 00811 if (m!=m_max_a_id+1) 00812 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ; 00813 00814 m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ; 00815 /*for (int32_t i=0; i<n; i++) 00816 for (int32_t j=0; j<n; j++) 00817 SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/ 00818 } 00819 00820 void CDynProg::best_path_set_segment_ids_mask( 00821 int32_t* segment_ids, float64_t* segment_mask, int32_t m) 00822 { 00823 00824 if (m!=m_observation_matrix.get_dim2()) 00825 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2()); 00826 int32_t max_id = 0; 00827 for (int32_t i=1;i<m;i++) 00828 max_id = CMath::max(max_id,segment_ids[i]); 00829 //SG_PRINT("max_id: %i, m:%i\n",max_id, m); 00830 m_segment_ids.set_array(segment_ids, m, true, true) ; 00831 m_segment_ids.set_array_name("m_segment_ids"); 00832 m_segment_mask.set_array(segment_mask, m, true, true) ; 00833 m_segment_mask.set_array_name("m_segment_mask"); 00834 00835 m_seg_loss_obj->set_segment_mask(&m_segment_mask); 00836 m_seg_loss_obj->set_segment_ids(&m_segment_ids); 00837 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len); 00838 } 00839 00840 SGVector<float64_t> CDynProg::get_scores() 00841 { 00842 SGVector<float64_t> scores(m_scores.get_dim1()); 00843 memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1())); 00844 00845 return scores; 00846 } 00847 00848 SGMatrix<int32_t> CDynProg::get_states() 00849 { 00850 SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2()); 00851 00852 int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() ); 00853 memcpy(states.matrix ,m_states.get_array(),sz); 00854 00855 return states; 00856 } 00857 00858 SGMatrix<int32_t> CDynProg::get_positions() 00859 { 00860 SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2()); 00861 00862 int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2()); 00863 memcpy(positions.matrix, m_positions.get_array(),sz); 00864 00865 return positions; 00866 } 00867 00868 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len) 00869 { 00870 ASSERT(scores && seq_len); 00871 00872 *seq_len=m_my_scores.get_dim1(); 00873 00874 int32_t sz = sizeof(float64_t)*(*seq_len); 00875 00876 *scores = SG_MALLOC(float64_t, *seq_len); 00877 ASSERT(*scores); 00878 00879 memcpy(*scores,m_my_scores.get_array(),sz); 00880 } 00881 00882 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len) 00883 { 00884 ASSERT(losses && seq_len); 00885 00886 *seq_len=m_my_losses.get_dim1(); 00887 00888 int32_t sz = sizeof(float64_t)*(*seq_len); 00889 00890 *losses = SG_MALLOC(float64_t, *seq_len); 00891 ASSERT(*losses); 00892 00893 memcpy(*losses,m_my_losses.get_array(),sz); 00894 } 00895 00897 00898 bool CDynProg::extend_orf( 00899 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, 00900 int32_t to) 00901 { 00902 #ifdef DYNPROG_TIMING_DETAIL 00903 MyTime.start() ; 00904 #endif 00905 00906 if (start<0) 00907 start=0 ; 00908 if (to<0) 00909 to=0 ; 00910 00911 int32_t orf_target = orf_to-orf_from ; 00912 if (orf_target<0) orf_target+=3 ; 00913 00914 int32_t pos ; 00915 if (last_pos==to) 00916 pos = to-orf_to-3 ; 00917 else 00918 pos=last_pos ; 00919 00920 if (pos<0) 00921 { 00922 #ifdef DYNPROG_TIMING_DETAIL 00923 MyTime.stop() ; 00924 orf_time += MyTime.time_diff_sec() ; 00925 #endif 00926 return true ; 00927 } 00928 00929 for (; pos>=start; pos-=3) 00930 if (m_genestr_stop[pos]) 00931 { 00932 #ifdef DYNPROG_TIMING_DETAIL 00933 MyTime.stop() ; 00934 orf_time += MyTime.time_diff_sec() ; 00935 #endif 00936 return false ; 00937 } 00938 00939 00940 last_pos = CMath::min(pos+3,to-orf_to-3) ; 00941 00942 #ifdef DYNPROG_TIMING_DETAIL 00943 MyTime.stop() ; 00944 orf_time += MyTime.time_diff_sec() ; 00945 #endif 00946 return true ; 00947 } 00948 00949 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf, 00950 int16_t nbest, bool with_loss, bool with_multiple_sequences) 00951 { 00952 00953 //FIXME we need checks here if all the fields are of right size 00954 //SG_PRINT("m_seq_len: %i\n", m_seq_len); 00955 //SG_PRINT("m_pos[0]: %i\n", m_pos[0]); 00956 //SG_PRINT("\n"); 00957 00958 //FIXME these variables can go away when compute_nbest_paths uses them 00959 //instead of the local pointers below 00960 const float64_t* seq_array = m_observation_matrix.get_array(); 00961 m_scores.resize_array(nbest) ; 00962 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00963 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00964 00965 for (int32_t i=0; i<nbest; i++) 00966 { 00967 m_scores[i]=-1; 00968 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++) 00969 { 00970 m_states.element(i,j)=-1; 00971 m_positions.element(i,j)=-1; 00972 } 00973 } 00974 float64_t* prob_nbest=m_scores.get_array(); 00975 int32_t* my_state_seq=m_states.get_array(); 00976 int32_t* my_pos_seq=m_positions.get_array(); 00977 00978 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 00979 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 00980 //END FIXME 00981 00982 00983 #ifdef DYNPROG_TIMING 00984 segment_init_time = 0.0 ; 00985 segment_pos_time = 0.0 ; 00986 segment_extend_time = 0.0 ; 00987 segment_clean_time = 0.0 ; 00988 orf_time = 0.0 ; 00989 svm_init_time = 0.0 ; 00990 svm_pos_time = 0.0 ; 00991 svm_clean_time = 0.0 ; 00992 inner_loop_time = 0.0 ; 00993 content_svm_values_time = 0.0 ; 00994 content_plifs_time = 0.0 ; 00995 inner_loop_max_time = 0.0 ; 00996 long_transition_time = 0.0 ; 00997 00998 MyTime2.start() ; 00999 #endif 01000 01001 if (!m_svm_arrays_clean) 01002 { 01003 SG_ERROR( "SVM arrays not clean") ; 01004 return ; 01005 } 01006 01007 #ifdef DYNPROG_DEBUG 01008 m_transition_matrix_a.set_array_name("transition_matrix"); 01009 m_transition_matrix_a.display_array(); 01010 m_mod_words.display_array() ; 01011 m_sign_words.display_array() ; 01012 m_string_words.display_array() ; 01013 //SG_PRINT("use_orf = %i\n", use_orf) ; 01014 #endif 01015 01016 int32_t max_look_back = 1000 ; 01017 bool use_svm = false ; 01018 01019 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ; 01020 01021 //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++) 01022 // SG_PRINT("(%i)%0.2f ",i,seq_array[i]); 01023 01024 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ; 01025 PEN.set_array_name("PEN"); 01026 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ; 01027 PEN_state_signals.set_array_name("state_signals"); 01028 01029 CArray2<float64_t> seq(m_N, m_seq_len) ; 01030 seq.set_array_name("seq") ; 01031 seq.zero() ; 01032 01033 #ifdef DYNPROG_DEBUG 01034 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data); 01035 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs); 01036 SG_PRINT("m_num_svms: %i\n", m_num_svms); 01037 SG_PRINT("m_num_lin_feat_plifs_cum: "); 01038 for (int i=0; i<=m_num_raw_data; i++) 01039 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]); 01040 SG_PRINT("\n"); 01041 #endif 01042 01043 float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01044 { // initialize svm_svalue 01045 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01046 svm_value[s]=0 ; 01047 } 01048 01049 { // convert seq_input to seq 01050 // this is independent of the svm values 01051 01052 //CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 01053 CArray3<float64_t> *seq_input=NULL ; 01054 if (seq_array!=NULL) 01055 { 01056 //SG_PRINT("using dense seq_array\n") ; 01057 01058 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ; 01059 seq_input->set_array_name("seq_input") ; 01060 //seq_input.display_array() ; 01061 01062 ASSERT(m_seq_sparse1==NULL) ; 01063 ASSERT(m_seq_sparse2==NULL) ; 01064 } else 01065 { 01066 SG_PRINT("using sparse seq_array\n") ; 01067 01068 ASSERT(m_seq_sparse1!=NULL) ; 01069 ASSERT(m_seq_sparse2!=NULL) ; 01070 ASSERT(max_num_signals==2) ; 01071 } 01072 01073 for (int32_t i=0; i<m_N; i++) 01074 for (int32_t j=0; j<m_seq_len; j++) 01075 seq.element(i,j) = 0 ; 01076 01077 for (int32_t i=0; i<m_N; i++) 01078 for (int32_t j=0; j<m_seq_len; j++) 01079 for (int32_t k=0; k<max_num_signals; k++) 01080 { 01081 if ((PEN_state_signals.element(i,k)==NULL) && (k==0)) 01082 { 01083 // no plif 01084 if (seq_input!=NULL) 01085 seq.element(i,j) = seq_input->element(i,j,k) ; 01086 else 01087 { 01088 if (k==0) 01089 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ; 01090 if (k==1) 01091 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ; 01092 } 01093 break ; 01094 } 01095 if (PEN_state_signals.element(i,k)!=NULL) 01096 { 01097 if (seq_input!=NULL) 01098 { 01099 // just one plif 01100 if (CMath::is_finite(seq_input->element(i,j,k))) 01101 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ; 01102 else 01103 // keep infinity values 01104 seq.element(i,j) = seq_input->element(i, j, k) ; 01105 } 01106 else 01107 { 01108 if (k==0) 01109 { 01110 // just one plif 01111 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j))) 01112 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ; 01113 else 01114 // keep infinity values 01115 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ; 01116 } 01117 if (k==1) 01118 { 01119 // just one plif 01120 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j))) 01121 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ; 01122 else 01123 // keep infinity values 01124 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ; 01125 } 01126 } 01127 } 01128 else 01129 break ; 01130 } 01131 delete seq_input; 01132 SG_FREE(svm_value); 01133 } 01134 01135 // allow longer transitions than look_back 01136 bool long_transitions = m_long_transitions ; 01137 CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ; 01138 long_transition_content_start_position.set_array_name("long_transition_content_start_position"); 01139 #ifdef DYNPROG_DEBUG 01140 CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ; 01141 long_transition_content_end_position.set_array_name("long_transition_content_end_position"); 01142 #endif 01143 CArray2<int32_t> long_transition_content_start(m_N,m_N) ; 01144 long_transition_content_start.set_array_name("long_transition_content_start"); 01145 CArray2<float64_t> long_transition_content_scores(m_N,m_N) ; 01146 long_transition_content_scores.set_array_name("long_transition_content_scores"); 01147 #ifdef DYNPROG_DEBUG 01148 CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ; 01149 long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen"); 01150 CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ; 01151 long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev"); 01152 CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ; 01153 long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem"); 01154 #endif 01155 CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ; 01156 long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss"); 01157 01158 if (nbest!=1) 01159 { 01160 SG_ERROR("Long transitions are not supported for nbest!=1") ; 01161 long_transitions = false ; 01162 } 01163 long_transition_content_scores.set_const(-CMath::INFTY); 01164 #ifdef DYNPROG_DEBUG 01165 long_transition_content_scores_pen.set_const(0) ; 01166 long_transition_content_scores_elem.set_const(0) ; 01167 long_transition_content_scores_prev.set_const(0) ; 01168 #endif 01169 if (with_loss) 01170 long_transition_content_scores_loss.set_const(0) ; 01171 long_transition_content_start.zero() ; 01172 long_transition_content_start_position.zero() ; 01173 #ifdef DYNPROG_DEBUG 01174 long_transition_content_end_position.zero() ; 01175 #endif 01176 01177 svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01178 { // initialize svm_svalue 01179 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01180 svm_value[s]=0 ; 01181 } 01182 01183 CArray2<int32_t> look_back(m_N,m_N) ; 01184 look_back.set_array_name("look_back"); 01185 //CArray2<int32_t> look_back_orig(m_N,m_N) ; 01186 //look_back.set_array_name("look_back_orig"); 01187 01188 01189 { // determine maximal length of look-back 01190 for (int32_t i=0; i<m_N; i++) 01191 for (int32_t j=0; j<m_N; j++) 01192 { 01193 look_back.set_element(INT_MAX, i, j) ; 01194 //look_back_orig.set_element(INT_MAX, i, j) ; 01195 } 01196 01197 for (int32_t j=0; j<m_N; j++) 01198 { 01199 // only consider transitions that are actually allowed 01200 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01201 const T_STATES *elem_list = trans_list_forward[j] ; 01202 01203 for (int32_t i=0; i<num_elem; i++) 01204 { 01205 T_STATES ii = elem_list[i] ; 01206 01207 CPlifBase *penij=PEN.element(j, ii) ; 01208 if (penij==NULL) 01209 { 01210 if (long_transitions) 01211 { 01212 look_back.set_element(m_long_transition_threshold, j, ii) ; 01213 //look_back_orig.set_element(m_long_transition_max, j, ii) ; 01214 } 01215 continue ; 01216 } 01217 01218 /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */ 01219 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions)) 01220 { 01221 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01222 //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01223 if (CMath::ceil(penij->get_max_value()) > max_look_back) 01224 { 01225 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value()); 01226 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value())); 01227 } 01228 } 01229 else 01230 { 01231 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ; 01232 //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ; 01233 } 01234 01235 if (penij->uses_svm_values()) 01236 use_svm=true ; 01237 } 01238 } 01239 /* make sure max_look_back is at least as long as a long transition */ 01240 if (long_transitions) 01241 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ; 01242 01243 /* make sure max_look_back is not longer than the whole string */ 01244 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ; 01245 01246 int32_t num_long_transitions = 0 ; 01247 for (int32_t i=0; i<m_N; i++) 01248 for (int32_t j=0; j<m_N; j++) 01249 { 01250 if (look_back.get_element(i,j)==m_long_transition_threshold) 01251 num_long_transitions++ ; 01252 if (look_back.get_element(i,j)==INT_MAX) 01253 { 01254 if (long_transitions) 01255 { 01256 look_back.set_element(m_long_transition_threshold, i, j) ; 01257 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01258 } 01259 else 01260 { 01261 look_back.set_element(max_look_back, i, j) ; 01262 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01263 } 01264 } 01265 } 01266 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ; 01267 } 01268 //SG_PRINT("max_look_back: %i \n", max_look_back) ; 01269 01270 //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ; 01271 SG_DEBUG("use_svm=%i\n", use_svm) ; 01272 01273 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest); 01274 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ; 01275 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ; 01276 /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+ 01277 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+ 01278 m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+ 01279 m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/ 01280 01281 //bool is_big = (mem_use>200) || (m_seq_len>5000) ; 01282 01283 /*if (is_big) 01284 { 01285 SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 01286 m_seq_len, m_N, max_look_back, nbest) ; 01287 SG_DEBUG("allocating %1.2fMB of memory\n", 01288 mem_use) ; 01289 }*/ 01290 ASSERT(nbest<32000) ; 01291 01292 01293 01294 CArray3<float64_t> delta(m_seq_len, m_N, nbest) ; 01295 delta.set_array_name("delta"); 01296 float64_t* delta_array = delta.get_array() ; 01297 //delta.zero() ; 01298 01299 CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ; 01300 psi.set_array_name("psi"); 01301 //psi.zero() ; 01302 01303 CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ; 01304 ktable.set_array_name("ktable"); 01305 //ktable.zero() ; 01306 01307 CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ; 01308 ptable.set_array_name("ptable"); 01309 //ptable.zero() ; 01310 01311 CArray<float64_t> delta_end(nbest) ; 01312 delta_end.set_array_name("delta_end"); 01313 //delta_end.zero() ; 01314 01315 CArray<T_STATES> path_ends(nbest) ; 01316 path_ends.set_array_name("path_ends"); 01317 //path_ends.zero() ; 01318 01319 CArray<int16_t> ktable_end(nbest) ; 01320 ktable_end.set_array_name("ktable_end"); 01321 //ktable_end.zero() ; 01322 01323 float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen); 01324 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ; 01325 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen); 01326 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ; 01327 01328 CArray<float64_t> oldtempvv(look_back_buflen) ; 01329 oldtempvv.set_array_name("oldtempvv"); 01330 CArray<float64_t> oldtempvv2(look_back_buflen) ; 01331 oldtempvv2.set_array_name("oldtempvv2"); 01332 //oldtempvv.zero() ; 01333 //oldtempvv.display_size() ; 01334 01335 CArray<int32_t> oldtempii(look_back_buflen) ; 01336 oldtempii.set_array_name("oldtempii"); 01337 CArray<int32_t> oldtempii2(look_back_buflen) ; 01338 oldtempii2.set_array_name("oldtempii2"); 01339 //oldtempii.zero() ; 01340 01341 CArray<T_STATES> state_seq(m_seq_len) ; 01342 state_seq.set_array_name("state_seq"); 01343 //state_seq.zero() ; 01344 01345 CArray<int32_t> pos_seq(m_seq_len) ; 01346 pos_seq.set_array_name("pos_seq"); 01347 //pos_seq.zero() ; 01348 01349 01350 m_dict_weights.set_array_name("dict_weights") ; 01351 m_word_degree.set_array_name("word_degree") ; 01352 m_cum_num_words.set_array_name("cum_num_words") ; 01353 m_num_words.set_array_name("num_words") ; 01354 //word_used.set_array_name("word_used") ; 01355 //svm_values_unnormalized.set_array_name("svm_values_unnormalized") ; 01356 //m_svm_pos_start.set_array_name("svm_pos_start") ; 01357 m_num_unique_words.set_array_name("num_unique_words") ; 01358 01359 PEN.set_array_name("PEN") ; 01360 seq.set_array_name("seq") ; 01361 01362 delta.set_array_name("delta") ; 01363 psi.set_array_name("psi") ; 01364 ktable.set_array_name("ktable") ; 01365 ptable.set_array_name("ptable") ; 01366 delta_end.set_array_name("delta_end") ; 01367 path_ends.set_array_name("path_ends") ; 01368 ktable_end.set_array_name("ktable_end") ; 01369 01370 #ifdef USE_TMP_ARRAYCLASS 01371 fixedtempvv.set_array_name("fixedtempvv") ; 01372 fixedtempii.set_array_name("fixedtempvv") ; 01373 #endif 01374 01375 oldtempvv.set_array_name("oldtempvv") ; 01376 oldtempvv2.set_array_name("oldtempvv2") ; 01377 oldtempii.set_array_name("oldtempii") ; 01378 oldtempii2.set_array_name("oldtempii2") ; 01379 01380 01382 01383 #ifdef DYNPROG_DEBUG 01384 state_seq.display_size() ; 01385 pos_seq.display_size() ; 01386 01387 m_dict_weights.display_size() ; 01388 m_word_degree.display_array() ; 01389 m_cum_num_words.display_array() ; 01390 m_num_words.display_array() ; 01391 //word_used.display_size() ; 01392 //svm_values_unnormalized.display_size() ; 01393 //m_svm_pos_start.display_array() ; 01394 m_num_unique_words.display_array() ; 01395 01396 PEN.display_size() ; 01397 PEN_state_signals.display_size() ; 01398 seq.display_size() ; 01399 m_orf_info.display_size() ; 01400 01401 //m_genestr_stop.display_size() ; 01402 delta.display_size() ; 01403 psi.display_size() ; 01404 ktable.display_size() ; 01405 ptable.display_size() ; 01406 delta_end.display_size() ; 01407 path_ends.display_size() ; 01408 ktable_end.display_size() ; 01409 01410 #ifdef USE_TMP_ARRAYCLASS 01411 fixedtempvv.display_size() ; 01412 fixedtempii.display_size() ; 01413 #endif 01414 01415 //oldtempvv.display_size() ; 01416 //oldtempii.display_size() ; 01417 01418 state_seq.display_size() ; 01419 pos_seq.display_size() ; 01420 01421 //seq.zero() ; 01422 01423 #endif //DYNPROG_DEBUG 01424 01426 01427 01428 01429 { 01430 for (int32_t s=0; s<m_num_svms; s++) 01431 ASSERT(m_string_words_array[s]<1) ; 01432 } 01433 01434 01435 //CArray2<int32_t*> trans_matrix_svms(m_N,m_N); 01436 //CArray2<int32_t> trans_matrix_num_svms(m_N,m_N); 01437 01438 { // initialization 01439 01440 for (T_STATES i=0; i<m_N; i++) 01441 { 01442 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01443 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01444 psi.element(0,i,0) = 0 ; 01445 if (nbest>1) 01446 ktable.element(0,i,0) = 0 ; 01447 ptable.element(0,i,0) = 0 ; 01448 for (int16_t k=1; k<nbest; k++) 01449 { 01450 int32_t dim1, dim2, dim3 ; 01451 delta.get_array_size(dim1, dim2, dim3) ; 01452 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ; 01453 //delta.element(0, i, k) = -CMath::INFTY ; 01454 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ; 01455 psi.element(0,i,0) = 0 ; // <--- what's this for? 01456 if (nbest>1) 01457 ktable.element(0,i,k) = 0 ; 01458 ptable.element(0,i,k) = 0 ; 01459 } 01460 /* 01461 for (T_STATES j=0; j<m_N; j++) 01462 { 01463 CPlifBase * penalty = PEN.element(i,j) ; 01464 int32_t num_current_svms=0; 01465 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 01466 if (penalty) 01467 { 01468 SG_PRINT("trans %i -> %i \n",i,j); 01469 penalty->get_used_svms(&num_current_svms, svm_ids); 01470 trans_matrix_svms.set_element(svm_ids,i,j); 01471 for (int32_t l=0;l<num_current_svms;l++) 01472 SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]); 01473 trans_matrix_num_svms.set_element(num_current_svms,i,j); 01474 } 01475 } 01476 */ 01477 01478 } 01479 } 01480 01481 SG_DEBUG("START_RECURSION \n\n"); 01482 01483 // recursion 01484 for (int32_t t=1; t<m_seq_len; t++) 01485 { 01486 //if (is_big && t%(1+(m_seq_len/1000))==1) 01487 // SG_PROGRESS(t, 0, m_seq_len); 01488 //SG_PRINT("%i\n", t) ; 01489 01490 for (T_STATES j=0; j<m_N; j++) 01491 { 01492 if (seq.element(j,t)<=-1e20) 01493 { // if we cannot observe the symbol here, then we can omit the rest 01494 for (int16_t k=0; k<nbest; k++) 01495 { 01496 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ; 01497 psi.element(t,j,k) = 0 ; 01498 if (nbest>1) 01499 ktable.element(t,j,k) = 0 ; 01500 ptable.element(t,j,k) = 0 ; 01501 } 01502 } 01503 else 01504 { 01505 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01506 const T_STATES *elem_list = trans_list_forward[j] ; 01507 const float64_t *elem_val = trans_list_forward_val[j] ; 01508 const int32_t *elem_id = trans_list_forward_id[j] ; 01509 01510 int32_t fixed_list_len = 0 ; 01511 float64_t fixedtempvv_ = CMath::INFTY ; 01512 int32_t fixedtempii_ = 0 ; 01513 bool fixedtemplong = false ; 01514 01515 for (int32_t i=0; i<num_elem; i++) 01516 { 01517 T_STATES ii = elem_list[i] ; 01518 01519 const CPlifBase * penalty = PEN.element(j,ii) ; 01520 01521 /*int32_t look_back = max_look_back ; 01522 if (0) 01523 { // find lookback length 01524 CPlifBase *pen = (CPlifBase*) penalty ; 01525 if (pen!=NULL) 01526 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01527 if (look_back>=1e6) 01528 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01529 ASSERT(look_back<1e6); 01530 } */ 01531 01532 int32_t look_back_ = look_back.element(j, ii) ; 01533 01534 int32_t orf_from = m_orf_info.element(ii,0) ; 01535 int32_t orf_to = m_orf_info.element(j,1) ; 01536 if((orf_from!=-1)!=(orf_to!=-1)) 01537 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01538 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01539 01540 int32_t orf_target = -1 ; 01541 if (orf_from!=-1) 01542 { 01543 orf_target=orf_to-orf_from ; 01544 if (orf_target<0) 01545 orf_target+=3 ; 01546 ASSERT(orf_target>=0 && orf_target<3) ; 01547 } 01548 01549 int32_t orf_last_pos = m_pos[t] ; 01550 #ifdef DYNPROG_TIMING 01551 MyTime3.start() ; 01552 #endif 01553 int32_t num_ok_pos = 0 ; 01554 01555 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--) 01556 { 01557 bool ok ; 01558 //int32_t plen=t-ts; 01559 01560 /*for (int32_t s=0; s<m_num_svms; s++) 01561 if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) || 01562 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6)) 01563 { 01564 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]); 01565 }*/ 01566 01567 if (orf_target==-1) 01568 ok=true ; 01569 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target) 01570 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ; 01571 else 01572 ok=false ; 01573 01574 if (ok) 01575 { 01576 01577 float64_t segment_loss = 0.0 ; 01578 if (with_loss) 01579 { 01580 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]); 01581 //if (segment_loss!=segment_loss2) 01582 //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2); 01583 } 01585 // BEST_PATH_TRANS 01587 01588 int32_t frame = orf_from;//m_orf_info.element(ii,0); 01589 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01590 01591 float64_t pen_val = 0.0 ; 01592 if (penalty) 01593 { 01594 #ifdef DYNPROG_TIMING_DETAIL 01595 MyTime.start() ; 01596 #endif 01597 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01598 01599 #ifdef DYNPROG_TIMING_DETAIL 01600 MyTime.stop() ; 01601 content_plifs_time += MyTime.time_diff_sec() ; 01602 #endif 01603 } 01604 01605 #ifdef DYNPROG_TIMING_DETAIL 01606 MyTime.start() ; 01607 #endif 01608 num_ok_pos++ ; 01609 01610 if (nbest==1) 01611 { 01612 float64_t val = elem_val[i] + pen_val ; 01613 if (with_loss) 01614 val += segment_loss ; 01615 01616 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ; 01617 01618 if (mval<fixedtempvv_) 01619 { 01620 fixedtempvv_ = mval ; 01621 fixedtempii_ = ii + ts*m_N; 01622 fixed_list_len = 1 ; 01623 fixedtemplong = false ; 01624 } 01625 } 01626 else 01627 { 01628 for (int16_t diff=0; diff<nbest; diff++) 01629 { 01630 float64_t val = elem_val[i] ; 01631 val += pen_val ; 01632 if (with_loss) 01633 val += segment_loss ; 01634 01635 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ; 01636 01637 /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */ 01638 /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/ 01639 /* fixed_list_len has the number of elements in fixedtempvv */ 01640 01641 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1]))) 01642 { 01643 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) ) 01644 { 01645 fixedtempvv[fixed_list_len] = mval ; 01646 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest; 01647 fixed_list_len++ ; 01648 } 01649 else // must have mval < fixedtempvv[fixed_list_len-1] 01650 { 01651 int32_t addhere = fixed_list_len; 01652 while ((addhere > 0) && (mval < fixedtempvv[addhere-1])) 01653 addhere--; 01654 01655 // move everything from addhere+1 one forward 01656 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--) 01657 { 01658 fixedtempvv[jj] = fixedtempvv[jj-1]; 01659 fixedtempii[jj] = fixedtempii[jj-1]; 01660 } 01661 01662 fixedtempvv[addhere] = mval; 01663 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest; 01664 01665 if (fixed_list_len < nbest) 01666 fixed_list_len++; 01667 } 01668 } 01669 } 01670 } 01671 #ifdef DYNPROG_TIMING_DETAIL 01672 MyTime.stop() ; 01673 inner_loop_max_time += MyTime.time_diff_sec() ; 01674 #endif 01675 } 01676 } 01677 #ifdef DYNPROG_TIMING 01678 MyTime3.stop() ; 01679 inner_loop_time += MyTime3.time_diff_sec() ; 01680 #endif 01681 } 01682 for (int32_t i=0; i<num_elem; i++) 01683 { 01684 T_STATES ii = elem_list[i] ; 01685 01686 const CPlifBase * penalty = PEN.element(j,ii) ; 01687 01688 /*int32_t look_back = max_look_back ; 01689 if (0) 01690 { // find lookback length 01691 CPlifBase *pen = (CPlifBase*) penalty ; 01692 if (pen!=NULL) 01693 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01694 if (look_back>=1e6) 01695 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01696 ASSERT(look_back<1e6); 01697 } */ 01698 01699 int32_t look_back_ = look_back.element(j, ii) ; 01700 //int32_t look_back_orig_ = look_back_orig.element(j, ii) ; 01701 01702 int32_t orf_from = m_orf_info.element(ii,0) ; 01703 int32_t orf_to = m_orf_info.element(j,1) ; 01704 if((orf_from!=-1)!=(orf_to!=-1)) 01705 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01706 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01707 01708 int32_t orf_target = -1 ; 01709 if (orf_from!=-1) 01710 { 01711 orf_target=orf_to-orf_from ; 01712 if (orf_target<0) 01713 orf_target+=3 ; 01714 ASSERT(orf_target>=0 && orf_target<3) ; 01715 } 01716 01717 //int32_t loss_last_pos = t ; 01718 //float64_t last_loss = 0.0 ; 01719 01720 #ifdef DYNPROG_TIMING 01721 MyTime3.start() ; 01722 #endif 01723 01724 /* long transition stuff */ 01725 /* only do this, if 01726 * this feature is enabled 01727 * this is not a transition with ORF restrictions 01728 * the loss is switched off 01729 * nbest=1 01730 */ 01731 #ifdef DYNPROG_TIMING 01732 MyTime3.start() ; 01733 #endif 01734 // long transitions, only when not considering ORFs 01735 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold ) 01736 { 01737 01738 // update table for 5' part of the long segment 01739 01740 int32_t start = long_transition_content_start.get_element(ii, j) ; 01741 int32_t end_5p_part = start ; 01742 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++) 01743 { 01744 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away 01745 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold) 01746 end_5p_part++ ; 01747 01748 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ; 01749 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ; 01750 01751 float64_t pen_val = 0.0; 01752 /* recompute penalty, if necessary */ 01753 if (penalty) 01754 { 01755 int32_t frame = m_orf_info.element(ii,0); 01756 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part 01757 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ; 01758 } 01759 01760 /*if (m_pos[start_5p_part]==1003) 01761 { 01762 SG_PRINT("Part1: %i - %i vs %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) ; 01763 SG_PRINT("Part1: ts=%i t=%i start_5p_part=%i m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) ; 01764 }*/ 01765 01766 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ; 01767 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check 01768 01769 float64_t segment_loss_part1=0.0 ; 01770 if (with_loss) 01771 { // this is the loss from the start of the long segment (5' part + middle section) 01772 01773 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure 01774 01775 mval_trans -= segment_loss_part1 ; 01776 } 01777 01778 01779 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/) 01780 { 01781 // this restricts the maximal length of segments, 01782 // but the current implementation is not valid since the 01783 // long transition is discarded without loocking if there 01784 // is a second best long transition in between 01785 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ; 01786 long_transition_content_start_position.set_element(0, ii, j) ; 01787 if (with_loss) 01788 long_transition_content_scores_loss.set_element(0.0, ii, j) ; 01789 #ifdef DYNPROG_DEBUG 01790 long_transition_content_scores_pen.set_element(0.0, ii, j) ; 01791 long_transition_content_scores_elem.set_element(0.0, ii, j) ; 01792 long_transition_content_scores_prev.set_element(0.0, ii, j) ; 01793 long_transition_content_end_position.set_element(0, ii, j) ; 01794 #endif 01795 } 01796 if (with_loss) 01797 { 01798 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ; 01799 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]); 01800 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ; 01801 long_transition_content_scores.set_element(score, ii, j) ; 01802 long_transition_content_scores_loss.set_element(new_loss, ii, j) ; 01803 #ifdef DYNPROG_DEBUG 01804 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01805 #endif 01806 01807 } 01808 if (-long_transition_content_scores.get_element(ii, j) > mval_trans ) 01809 { 01810 /* then the old long transition is either too far away or worse than the current one */ 01811 long_transition_content_scores.set_element(-mval_trans, ii, j) ; 01812 long_transition_content_start_position.set_element(start_5p_part, ii, j) ; 01813 if (with_loss) 01814 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ; 01815 #ifdef DYNPROG_DEBUG 01816 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ; 01817 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ; 01818 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ; 01819 /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) + 01820 long_transition_content_scores_elem.get_element(ii, j) + 01821 long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/ 01822 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01823 #endif 01824 } 01825 // 01826 // this sets the position where the search for better 5'parts is started the next time 01827 // whithout this the prediction takes ages 01828 // 01829 long_transition_content_start.set_element(start_5p_part, ii, j) ; 01830 } 01831 01832 // consider the 3' part at the end of the long segment: 01833 // * with length = m_long_transition_threshold 01834 // * content prediction and loss only for this part 01835 01836 // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 01837 // precompute: only depends on t 01838 int ts = t; 01839 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold) 01840 ts-- ; 01841 01842 if (ts>0) 01843 { 01844 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ; 01845 01846 01847 /* only consider this transition, if the right position was found */ 01848 float pen_val_3p = 0.0 ; 01849 if (penalty) 01850 { 01851 int32_t frame = orf_from ; //m_orf_info.element(ii, 0); 01852 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01853 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01854 } 01855 01856 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ; 01857 01858 { 01859 #ifdef DYNPROG_DEBUG 01860 float64_t segment_loss_part2=0.0 ; 01861 float64_t segment_loss_part1=0.0 ; 01862 #endif 01863 float64_t segment_loss_total=0.0 ; 01864 01865 if (with_loss) 01866 { // this is the loss for the 3' end fragment of the segment 01867 // (the 5' end and the middle section loss is already contained in mval) 01868 01869 #ifdef DYNPROG_DEBUG 01870 // this is an alternative, which should be identical, if the loss is additive 01871 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]); 01872 //mval -= segment_loss_part2 ; 01873 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]); 01874 #endif 01875 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]); 01876 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ; 01877 } 01878 01879 #ifdef DYNPROG_DEBUG 01880 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561) 01881 { 01882 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n", 01883 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 01884 long_transition_content_scores.get_element(ii, j), 01885 long_transition_content_scores_pen.get_element(ii, j), 01886 long_transition_content_scores_prev.get_element(ii, j), 01887 long_transition_content_scores_elem.get_element(ii, j), 01888 long_transition_content_scores_loss.get_element(ii, j), 01889 m_pos[long_transition_content_start_position.get_element(ii,j)], 01890 m_pos[long_transition_content_end_position.get_element(ii,j)], 01891 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ; 01892 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] ); 01893 } 01894 01895 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3) 01896 { 01897 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n", 01898 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 01899 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)], 01900 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 01901 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 01902 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ; 01903 } 01904 #endif 01905 } 01906 01907 // prefer simpler version to guarantee optimality 01908 // 01909 // original: 01910 /* if ((mval < fixedtempvv_) && 01911 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */ 01912 if (mval < fixedtempvv_) 01913 { 01914 /* then the long transition is better than the short one => replace it */ 01915 int32_t fromtjk = fixedtempii_ ; 01916 /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 01917 m_pos[t], j, 01918 mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 01919 m_pos[long_transition_content_position.get_element(ii, j)], 01920 fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/ 01921 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ; 01922 01923 fixedtempvv_ = mval ; 01924 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ; 01925 fixed_list_len = 1 ; 01926 fixedtemplong = true ; 01927 } 01928 } 01929 } 01930 } 01931 #ifdef DYNPROG_TIMING 01932 MyTime3.stop() ; 01933 long_transition_time += MyTime3.time_diff_sec() ; 01934 #endif 01935 01936 01937 int32_t numEnt = fixed_list_len; 01938 01939 float64_t minusscore; 01940 int64_t fromtjk; 01941 01942 for (int16_t k=0; k<nbest; k++) 01943 { 01944 if (k<numEnt) 01945 { 01946 if (nbest==1) 01947 { 01948 minusscore = fixedtempvv_ ; 01949 fromtjk = fixedtempii_ ; 01950 } 01951 else 01952 { 01953 minusscore = fixedtempvv[k]; 01954 fromtjk = fixedtempii[k]; 01955 } 01956 01957 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t); 01958 psi.element(t,j,k) = (fromtjk%m_N) ; 01959 if (nbest>1) 01960 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ; 01961 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ; 01962 } 01963 else 01964 { 01965 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ; 01966 psi.element(t,j,k) = 0 ; 01967 if (nbest>1) 01968 ktable.element(t,j,k) = 0 ; 01969 ptable.element(t,j,k) = 0 ; 01970 } 01971 } 01972 } 01973 } 01974 } 01975 { //termination 01976 int32_t list_len = 0 ; 01977 for (int16_t diff=0; diff<nbest; diff++) 01978 { 01979 for (T_STATES i=0; i<m_N; i++) 01980 { 01981 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ; 01982 oldtempii[list_len] = i + diff*m_N ; 01983 list_len++ ; 01984 } 01985 } 01986 01987 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ; 01988 01989 for (int16_t k=0; k<nbest; k++) 01990 { 01991 delta_end.element(k) = -oldtempvv[k] ; 01992 path_ends.element(k) = (oldtempii[k]%m_N) ; 01993 if (nbest>1) 01994 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ; 01995 } 01996 01997 01998 } 01999 02000 { //state sequence backtracking 02001 for (int16_t k=0; k<nbest; k++) 02002 { 02003 prob_nbest[k]= delta_end.element(k) ; 02004 02005 int32_t i = 0 ; 02006 state_seq[i] = path_ends.element(k) ; 02007 int16_t q = 0 ; 02008 if (nbest>1) 02009 q=ktable_end.element(k) ; 02010 pos_seq[i] = m_seq_len-1 ; 02011 02012 while (pos_seq[i]>0) 02013 { 02014 ASSERT(i+1<m_seq_len); 02015 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02016 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q); 02017 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ; 02018 if (nbest>1) 02019 q = ktable.element(pos_seq[i], state_seq[i], q) ; 02020 i++ ; 02021 } 02022 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02023 int32_t num_states = i+1 ; 02024 for (i=0; i<num_states;i++) 02025 { 02026 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ; 02027 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ; 02028 } 02029 if (num_states<m_seq_len) 02030 { 02031 my_state_seq[num_states+k*m_seq_len]=-1 ; 02032 my_pos_seq[num_states+k*m_seq_len]=-1 ; 02033 } 02034 } 02035 } 02036 02037 //if (is_big) 02038 // SG_PRINT( "DONE. \n") ; 02039 02040 02041 #ifdef DYNPROG_TIMING 02042 MyTime2.stop() ; 02043 02044 //if (is_big) 02045 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ; 02046 #endif 02047 02048 SG_FREE(fixedtempvv); 02049 SG_FREE(fixedtempii); 02050 } 02051 02052 02053 void CDynProg::best_path_trans_deriv( 02054 int32_t *my_state_seq, int32_t *my_pos_seq, 02055 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) 02056 { 02057 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 02058 m_end_state_distribution_q_deriv.resize_array(m_N) ; 02059 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 02060 //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ; 02061 //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ; 02062 m_my_scores.resize_array(my_seq_len); 02063 m_my_losses.resize_array(my_seq_len); 02064 float64_t* my_scores=m_my_scores.get_array(); 02065 float64_t* my_losses=m_my_losses.get_array(); 02066 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 02067 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 02068 02069 if (!m_svm_arrays_clean) 02070 { 02071 SG_ERROR( "SVM arrays not clean") ; 02072 return ; 02073 } ; 02074 //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ; 02075 //m_mod_words.display() ; 02076 //m_sign_words.display() ; 02077 //m_string_words.display() ; 02078 02079 bool use_svm = false ; 02080 02081 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ; 02082 PEN.set_array_name("PEN"); 02083 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ; 02084 PEN_state_signals.set_array_name("PEN_state_signals"); 02085 CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 02086 seq_input.set_array_name("seq_input"); 02087 02088 { // determine whether to use svm outputs and clear derivatives 02089 for (int32_t i=0; i<m_N; i++) 02090 for (int32_t j=0; j<m_N; j++) 02091 { 02092 CPlifBase *penij=PEN.element(i,j) ; 02093 if (penij==NULL) 02094 continue ; 02095 02096 if (penij->uses_svm_values()) 02097 use_svm=true ; 02098 penij->penalty_clear_derivative() ; 02099 } 02100 for (int32_t i=0; i<m_N; i++) 02101 for (int32_t j=0; j<max_num_signals; j++) 02102 { 02103 CPlifBase *penij=PEN_state_signals.element(i,j) ; 02104 if (penij==NULL) 02105 continue ; 02106 if (penij->uses_svm_values()) 02107 use_svm=true ; 02108 penij->penalty_clear_derivative() ; 02109 } 02110 } 02111 02112 { // set derivatives of p, q and a to zero 02113 02114 for (int32_t i=0; i<m_N; i++) 02115 { 02116 m_initial_state_distribution_p_deriv.element(i)=0 ; 02117 m_end_state_distribution_q_deriv.element(i)=0 ; 02118 for (int32_t j=0; j<m_N; j++) 02119 m_transition_matrix_a_deriv.element(i,j)=0 ; 02120 } 02121 } 02122 02123 { // clear score vector 02124 for (int32_t i=0; i<my_seq_len; i++) 02125 { 02126 my_scores[i]=0.0 ; 02127 my_losses[i]=0.0 ; 02128 } 02129 } 02130 02131 //int32_t total_len = 0 ; 02132 02133 //m_transition_matrix_a.display_array() ; 02134 //m_transition_matrix_a_id.display_array() ; 02135 02136 // compute derivatives for given path 02137 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02138 float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02139 float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02140 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02141 { 02142 svm_value[s]=0 ; 02143 svm_value_part1[s]=0 ; 02144 svm_value_part2[s]=0 ; 02145 } 02146 02147 //#ifdef DYNPROG_DEBUG 02148 float64_t total_score = 0.0 ; 02149 float64_t total_loss = 0.0 ; 02150 //#endif 02151 02152 ASSERT(my_state_seq[0]>=0) ; 02153 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ; 02154 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ; 02155 02156 ASSERT(my_state_seq[my_seq_len-1]>=0) ; 02157 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ; 02158 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]); 02159 02160 //#ifdef DYNPROG_DEBUG 02161 total_score += my_scores[0] + my_scores[my_seq_len-1] ; 02162 //#endif 02163 02164 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ; 02165 for (int32_t i=0; i<my_seq_len-1; i++) 02166 { 02167 if (my_state_seq[i+1]==-1) 02168 break ; 02169 int32_t from_state = my_state_seq[i] ; 02170 int32_t to_state = my_state_seq[i+1] ; 02171 int32_t from_pos = my_pos_seq[i] ; 02172 int32_t to_pos = my_pos_seq[i+1] ; 02173 02174 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ; 02175 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id); 02176 02177 #ifdef DYNPROG_DEBUG 02178 02179 02180 if (i>0)// test if segment loss is additive 02181 { 02182 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id); 02183 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id); 02184 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id); 02185 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3); 02186 if (CMath::abs(loss1+loss2-loss3)>0) 02187 { 02188 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02189 } 02190 } 02191 io->set_loglevel(M_DEBUG) ; 02192 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02193 #endif 02194 // increase usage of this transition 02195 m_transition_matrix_a_deriv.element(from_state, to_state)++ ; 02196 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ; 02197 //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state)); 02198 #ifdef DYNPROG_DEBUG 02199 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02200 #endif 02201 02202 /*int32_t last_svm_pos[m_num_degrees] ; 02203 for (int32_t qq=0; qq<m_num_degrees; qq++) 02204 last_svm_pos[qq]=-1 ;*/ 02205 02206 bool is_long_transition = false ; 02207 if (m_long_transitions) 02208 { 02209 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold) 02210 is_long_transition = true ; 02211 if (m_orf_info.element(from_state,0)!=-1) 02212 is_long_transition = false ; 02213 } 02214 02215 int32_t from_pos_thresh = from_pos ; 02216 int32_t to_pos_thresh = to_pos ; 02217 02218 if (use_svm) 02219 { 02220 if (is_long_transition) 02221 { 02222 02223 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // * 02224 from_pos_thresh++ ; 02225 ASSERT(from_pos_thresh<to_pos) ; 02226 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // * 02227 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// * 02228 02229 int32_t frame = m_orf_info.element(from_state,0); 02230 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame); 02231 02232 #ifdef DYNPROG_DEBUG 02233 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ; 02234 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02235 SG_PRINT("%1.4f ", svm_value_part1[s]); 02236 SG_PRINT("\n"); 02237 #endif 02238 02239 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // * 02240 to_pos_thresh-- ; 02241 ASSERT(to_pos_thresh>0) ; 02242 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // * 02243 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // * 02244 02245 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame); 02246 02247 #ifdef DYNPROG_DEBUG 02248 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ; 02249 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02250 SG_PRINT("%1.4f ", svm_value_part2[s]); 02251 SG_PRINT("\n"); 02252 #endif 02253 } 02254 else 02255 { 02256 /* normal case */ 02257 02258 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]); 02259 int32_t frame = m_orf_info.element(from_state,0); 02260 if (false)//(frame>=0) 02261 { 02262 int32_t num_current_svms=0; 02263 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02264 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame); 02265 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids); 02266 SG_PRINT("\n"); 02267 } 02268 02269 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame); 02270 #ifdef DYNPROG_DEBUG 02271 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ; 02272 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02273 SG_PRINT("%1.4f ", svm_value[s]); 02274 SG_PRINT("\n"); 02275 #endif 02276 } 02277 } 02278 02279 if (PEN.element(to_state, from_state)!=NULL) 02280 { 02281 float64_t nscore = 0 ; 02282 if (is_long_transition) 02283 { 02284 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ; 02285 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ; 02286 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ; 02287 } 02288 else 02289 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ; 02290 02291 if (false)//(nscore<-1e9) 02292 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n", 02293 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ; 02294 02295 my_scores[i] += nscore ; 02296 02297 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/ 02298 { 02299 svm_value[s]=-CMath::INFTY; 02300 svm_value_part1[s]=-CMath::INFTY; 02301 svm_value_part2[s]=-CMath::INFTY; 02302 } 02303 02304 #ifdef DYNPROG_DEBUG 02305 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ; 02306 #endif 02307 if (is_long_transition) 02308 { 02309 #ifdef DYNPROG_DEBUG 02310 float64_t sum_score = 0.0 ; 02311 02312 for (int kk=0; kk<i; kk++) 02313 sum_score += my_scores[i] ; 02314 02315 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n", 02316 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 02317 nscore, sum_score, 02318 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 02319 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ; 02320 #endif 02321 } 02322 02323 if (is_long_transition) 02324 { 02325 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ; 02326 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ; 02327 } 02328 else 02329 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ; 02330 02331 //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ; 02332 02333 // for tiling array and rna-seq data every single measurement must be added to the derivative 02334 // in contrast to the content svm predictions where we have a single value per transition; 02335 // content svm predictions have already been added to the derivative, thus we start with d=1 02336 // instead of d=0 02337 if (is_long_transition) 02338 { 02339 for (int32_t d=1; d<=m_num_raw_data; d++) 02340 { 02341 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02342 svm_value[s]=-CMath::INFTY; 02343 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02344 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d); 02345 for (int32_t k=0;k<num_intensities;k++) 02346 { 02347 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02348 svm_value[j]=intensities[k]; 02349 02350 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02351 02352 } 02353 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d); 02354 for (int32_t k=0;k<num_intensities;k++) 02355 { 02356 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02357 svm_value[j]=intensities[k]; 02358 02359 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02360 02361 } 02362 SG_FREE(intensities); 02363 02364 } 02365 } 02366 else 02367 { 02368 for (int32_t d=1; d<=m_num_raw_data; d++) 02369 { 02370 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02371 svm_value[s]=-CMath::INFTY; 02372 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02373 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d); 02374 //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities); 02375 for (int32_t k=0;k<num_intensities;k++) 02376 { 02377 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02378 svm_value[j]=intensities[k]; 02379 02380 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ; 02381 02382 } 02383 SG_FREE(intensities); 02384 } 02385 } 02386 02387 } 02388 #ifdef DYNPROG_DEBUG 02389 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02390 #endif 02391 02392 //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ; 02393 for (int32_t k=0; k<max_num_signals; k++) 02394 { 02395 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0)) 02396 { 02397 #ifdef DYNPROG_DEBUG 02398 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ; 02399 #endif 02400 my_scores[i] += seq_input.element(to_state, to_pos, k) ; 02401 //if (seq_input.element(to_state, to_pos, k) !=0) 02402 // SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k)); 02403 break ; 02404 } 02405 if (PEN_state_signals.element(to_state, k)!=NULL) 02406 { 02407 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter) 02408 my_scores[i] += nscore ; 02409 #ifdef DYNPROG_DEBUG 02410 if (false)//(nscore<-1e9) 02411 { 02412 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n", 02413 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ; 02414 for (int x=0; x<23; x++) 02415 { 02416 for (int i=-10; i<10; i++) 02417 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k)); 02418 SG_PRINT("\n"); 02419 } 02420 02421 } 02422 #endif 02423 //break ; 02424 //int32_t num_current_svms=0; 02425 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02426 //SG_PRINT("PEN_state_signals->id: "); 02427 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids); 02428 //SG_PRINT("\n"); 02429 //if (nscore != 0) 02430 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02431 #ifdef DYNPROG_DEBUG 02432 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02433 #endif 02434 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter) 02435 } else 02436 break ; 02437 } 02438 02439 //#ifdef DYNPROG_DEBUG 02440 //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ; 02441 //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ; 02442 total_score += my_scores[i] ; 02443 total_loss += my_losses[i] ; 02444 //#endif 02445 } 02446 //#ifdef DYNPROG_DEBUG 02447 //SG_PRINT( "total score = %f \n", total_score) ; 02448 //SG_PRINT( "total loss = %f \n", total_loss) ; 02449 //#endif 02450 SG_FREE(svm_value); 02451 SG_FREE(svm_value_part1); 02452 SG_FREE(svm_value_part2); 02453 } 02454 02455 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type) 02456 { 02457 ASSERT(from_pos<to_pos); 02458 int32_t num_intensities = 0; 02459 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]]; 02460 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]]; 02461 int32_t last_pos; 02462 int32_t num = m_num_probes_cum[type-1]; 02463 while (*p_tiling_pos<to_pos) 02464 { 02465 if (*p_tiling_pos>=from_pos) 02466 { 02467 intensities[num_intensities] = *p_tiling_data; 02468 num_intensities++; 02469 } 02470 num++; 02471 if (num>=m_num_probes_cum[type]) 02472 break; 02473 last_pos = *p_tiling_pos; 02474 p_tiling_pos++; 02475 p_tiling_data++; 02476 ASSERT(last_pos<*p_tiling_pos); 02477 } 02478 return num_intensities; 02479 } 02480 02481 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame) 02482 { 02483 #ifdef DYNPROG_TIMING_DETAIL 02484 MyTime.start() ; 02485 #endif 02486 // ASSERT(from_state<to_state); 02487 // if (!(from_pos<to_pos)) 02488 // SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos); 02489 for (int32_t i=0;i<m_num_svms;i++) 02490 { 02491 float64_t to_val = m_lin_feat.get_element(i, to_state); 02492 float64_t from_val = m_lin_feat.get_element(i, from_state); 02493 svm_values[i] = (to_val-from_val)/(to_pos-from_pos); 02494 } 02495 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++) 02496 { 02497 float64_t to_val = m_lin_feat.get_element(i, to_state); 02498 float64_t from_val = m_lin_feat.get_element(i, from_state); 02499 svm_values[i] = to_val-from_val ; 02500 } 02501 if (m_intron_list) 02502 { 02503 int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs); 02504 m_intron_list->get_intron_support(support, from_state, to_state); 02505 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data]; 02506 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 02507 int32_t cnt = 0; 02508 for (int32_t i=intron_list_start; i<intron_list_end;i++) 02509 { 02510 svm_values[i] = (float64_t) (support[cnt]); 02511 cnt++; 02512 } 02513 //if (to_pos>3990 && to_pos<4010) 02514 // SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]); 02515 SG_FREE(support); 02516 } 02517 // find the correct row with precomputed frame predictions 02518 if (frame!=-1) 02519 { 02520 svm_values[frame_plifs[0]] = 1e10; 02521 svm_values[frame_plifs[1]] = 1e10; 02522 svm_values[frame_plifs[2]] = 1e10; 02523 int32_t global_frame = from_pos%3; 02524 int32_t row = ((global_frame+frame)%3)+4; 02525 float64_t to_val = m_lin_feat.get_element(row, to_state); 02526 float64_t from_val = m_lin_feat.get_element(row, from_state); 02527 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos); 02528 } 02529 #ifdef DYNPROG_TIMING_DETAIL 02530 MyTime.stop() ; 02531 content_svm_values_time += MyTime.time_diff_sec() ; 02532 #endif 02533 } 02534 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs) 02535 { 02536 m_intron_list = intron_list; 02537 m_num_intron_plifs = num_plifs; 02538 } 02539