SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2009 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _TRIE_H___ 00013 #define _TRIE_H___ 00014 00015 #include <string.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/base/DynArray.h> 00019 #include <shogun/mathematics/Math.h> 00020 #include <shogun/base/SGObject.h> 00021 00022 namespace shogun 00023 { 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 00026 // sentinel is 0xFFFFFFFC or float -2 00027 #define NO_CHILD ((int32_t)-1073741824) 00028 00029 #define WEIGHTS_IN_TRIE 00030 //#define TRIE_CHECK_EVERYTHING 00031 00032 #ifdef TRIE_CHECK_EVERYTHING 00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x) 00034 #else 00035 #define TRIE_ASSERT_EVERYTHING(x) 00036 #endif 00037 00038 //#define TRIE_ASSERT(x) ASSERT(x) 00039 #define TRIE_ASSERT(x) 00040 00041 #define TRIE_TERMINAL_CHARACTER 7 00042 00044 struct ConsensusEntry 00045 { 00047 uint64_t string; 00049 float32_t score; 00051 int32_t bt; 00052 }; 00053 00055 struct POIMTrie 00056 { 00058 float64_t weight; 00059 #ifdef TRIE_CHECK_EVERYTHING 00060 00061 bool has_seq; 00063 bool has_floats; 00064 #endif 00065 union 00066 { 00068 float32_t child_weights[4]; 00070 int32_t children[4]; 00072 uint8_t seq[16] ; 00073 }; 00074 00076 float64_t S; 00078 float64_t L; 00080 float64_t R; 00081 }; 00082 00084 struct DNATrie 00085 { 00087 float64_t weight; 00088 #ifdef TRIE_CHECK_EVERYTHING 00089 00090 bool has_seq; 00092 bool has_floats; 00093 #endif 00094 union 00095 { 00097 float32_t child_weights[4]; 00099 int32_t children[4]; 00101 uint8_t seq[16] ; 00102 }; 00103 }; 00104 00106 struct TreeParseInfo { 00108 int32_t num_sym; 00110 int32_t num_feat; 00112 int32_t p; 00114 int32_t k; 00116 int32_t* nofsKmers; 00118 float64_t* margFactors; 00120 int32_t* x; 00122 int32_t* substrs; 00124 int32_t y0; 00126 float64_t* C_k; 00128 float64_t* L_k; 00130 float64_t* R_k; 00131 }; 00132 00133 #endif // DOXYGEN_SHOULD_SKIP_THIS 00134 00135 template <class Trie> class CTrie; 00136 00137 #define IGNORE_IN_CLASSLIST 00138 00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject 00157 { 00158 public: 00160 CTrie(); 00161 00168 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true); 00169 00171 CTrie(const CTrie & to_copy); 00172 virtual ~CTrie(); 00173 00175 const CTrie & operator=(const CTrie & to_copy); 00176 00184 bool compare_traverse( 00185 int32_t node, const CTrie & other, int32_t other_node); 00186 00192 bool compare(const CTrie & other); 00193 00200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const; 00201 00208 int32_t find_deepest_node( 00209 int32_t start_node, int32_t &deepest_node) const; 00210 00215 void display_node(int32_t node) const; 00216 00218 void destroy(); 00219 00224 void set_degree(int32_t d); 00225 00232 void create(int32_t len, bool p_use_compact_terminal_nodes=true); 00233 00239 void delete_trees(bool p_use_compact_terminal_nodes=true); 00240 00251 void add_to_trie( 00252 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha, 00253 float64_t *weights, bool degree_times_position_weights); 00254 00261 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth); 00262 00268 float64_t* compute_abs_weights(int32_t &len); 00269 00282 float64_t compute_by_tree_helper( 00283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00284 int32_t weight_pos, float64_t * weights, 00285 bool degree_times_position_weights) ; 00286 00301 void compute_by_tree_helper( 00302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00303 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 00304 int32_t mkl_stepsize, float64_t * weights, 00305 bool degree_times_position_weights); 00306 00321 void compute_scoring_helper( 00322 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 00323 int32_t max_degree, int32_t num_feat, int32_t num_sym, 00324 int32_t sym_offset, int32_t offs, float64_t* result); 00325 00338 void add_example_to_tree_mismatch_recursion( 00339 int32_t tree, int32_t i, float64_t alpha, int32_t *vec, 00340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, 00341 int32_t max_mismatch, float64_t * weights); 00342 00352 void traverse( 00353 int32_t tree, const int32_t p, struct TreeParseInfo info, 00354 const int32_t depth, int32_t* const x, const int32_t k); 00355 00365 void count( 00366 const float64_t w, const int32_t depth, 00367 const struct TreeParseInfo info, const int32_t p, int32_t* x, 00368 const int32_t k); 00369 00376 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights); 00377 00386 float64_t get_cumulative_score( 00387 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights); 00388 00398 void fill_backtracking_table_recursion( 00399 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 00400 DynArray<ConsensusEntry>* table, float64_t* weights); 00401 00410 void fill_backtracking_table( 00411 int32_t pos, DynArray<ConsensusEntry>* prev, 00412 DynArray<ConsensusEntry>* cur, bool cumulative, 00413 float64_t* weights); 00414 00420 void POIMs_extract_W(float64_t* const* const W, const int32_t K); 00421 00426 void POIMs_precalc_SLR(const float64_t* const distrib); 00427 00437 void POIMs_get_SLR( 00438 const int32_t parentIdx, const int32_t sym, const int32_t depth, 00439 float64_t* S, float64_t* L, float64_t* R); 00440 00447 void POIMs_add_SLR( 00448 float64_t* const* const poims, const int32_t K, 00449 const int32_t debug); 00450 00455 inline bool get_use_compact_terminal_nodes() 00456 { 00457 return use_compact_terminal_nodes ; 00458 } 00459 00465 inline void set_use_compact_terminal_nodes( 00466 bool p_use_compact_terminal_nodes) 00467 { 00468 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 00469 } 00470 00475 inline int32_t get_num_used_nodes() 00476 { 00477 return TreeMemPtr; 00478 } 00479 00484 inline void set_position_weights(float64_t* p_position_weights) 00485 { 00486 position_weights=p_position_weights; 00487 } 00488 00493 inline int32_t get_node(bool last_node=false) 00494 { 00495 int32_t ret = TreeMemPtr++; 00496 check_treemem() ; 00497 00498 if (last_node) 00499 { 00500 for (int32_t q=0; q<4; q++) 00501 TreeMem[ret].child_weights[q]=0.0; 00502 } 00503 else 00504 { 00505 for (int32_t q=0; q<4; q++) 00506 TreeMem[ret].children[q]=NO_CHILD; 00507 } 00508 #ifdef TRIE_CHECK_EVERYTHING 00509 TreeMem[ret].has_seq=false ; 00510 TreeMem[ret].has_floats=false ; 00511 #endif 00512 TreeMem[ret].weight=0.0; 00513 return ret ; 00514 } 00515 00517 inline void check_treemem() 00518 { 00519 if (TreeMemPtr+10 < TreeMemPtrMax) 00520 return; 00521 SG_DEBUG( "Extending TreeMem from %i to %i elements\n", 00522 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2)); 00523 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2); 00524 TreeMem = SG_REALLOC(Trie, TreeMem, TreeMemPtrMax); 00525 } 00526 00531 inline void set_weights_in_tree(bool weights_in_tree_) 00532 { 00533 weights_in_tree = weights_in_tree_; 00534 } 00535 00540 inline bool get_weights_in_tree() 00541 { 00542 return weights_in_tree; 00543 } 00544 00554 void POIMs_extract_W_helper( 00555 const int32_t nodeIdx, const int32_t depth, const int32_t offset, 00556 const int32_t y0, float64_t* const* const W, const int32_t K); 00557 00570 void POIMs_calc_SLR_helper1( 00571 const float64_t* const distrib, const int32_t i, 00572 const int32_t nodeIdx, int32_t left_tries_idx[4], 00573 const int32_t depth, int32_t const lastSym, float64_t* S, 00574 float64_t* L, float64_t* R); 00575 00576 00587 void POIMs_calc_SLR_helper2( 00588 const float64_t* const distrib, const int32_t i, 00589 const int32_t nodeIdx, int32_t left_tries_idx[4], 00590 const int32_t depth, float64_t* S, float64_t* L, float64_t* R); 00591 00602 void POIMs_add_SLR_helper1( 00603 const int32_t nodeIdx, const int32_t depth,const int32_t i, 00604 const int32_t y0, float64_t* const* const poims, const int32_t K, 00605 const int32_t debug); 00606 00620 void POIMs_add_SLR_helper2( 00621 float64_t* const* const poims, const int32_t K, const int32_t k, 00622 const int32_t i, const int32_t y, const float64_t valW, 00623 const float64_t valS, const float64_t valL, const float64_t valR, 00624 const int32_t debug); 00625 00627 inline virtual const char* get_name() const { return "Trie"; } 00628 00629 public: 00631 int32_t NUM_SYMS; 00632 00633 protected: 00635 int32_t length; 00637 int32_t * trees; 00638 00640 int32_t degree; 00642 float64_t* position_weights; 00643 00645 Trie* TreeMem; 00647 int32_t TreeMemPtr; 00649 int32_t TreeMemPtrMax; 00651 bool use_compact_terminal_nodes; 00652 00654 bool weights_in_tree; 00655 00657 int32_t* nofsKmers; 00658 }; 00659 template <class Trie> 00660 CTrie<Trie>::CTrie() 00661 : CSGObject(), degree(0), position_weights(NULL), 00662 use_compact_terminal_nodes(false), 00663 weights_in_tree(true) 00664 { 00665 00666 TreeMemPtrMax=0; 00667 TreeMemPtr=0; 00668 TreeMem=NULL; 00669 00670 length=0; 00671 trees=NULL; 00672 00673 NUM_SYMS=4; 00674 } 00675 00676 template <class Trie> 00677 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes) 00678 : CSGObject(), degree(d), position_weights(NULL), 00679 use_compact_terminal_nodes(p_use_compact_terminal_nodes), 00680 weights_in_tree(true) 00681 { 00682 TreeMemPtrMax=1024*1024/sizeof(Trie); 00683 TreeMemPtr=0; 00684 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax); 00685 00686 length=0; 00687 trees=NULL; 00688 00689 NUM_SYMS=4; 00690 } 00691 00692 template <class Trie> 00693 CTrie<Trie>::CTrie(const CTrie & to_copy) 00694 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL), 00695 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes) 00696 { 00697 if (to_copy.position_weights!=NULL) 00698 { 00699 position_weights = to_copy.position_weights; 00700 /*SG_MALLOC(float64_t, to_copy.length); 00701 for (int32_t i=0; i<to_copy.length; i++) 00702 position_weights[i]=to_copy.position_weights[i]; */ 00703 } 00704 else 00705 position_weights=NULL; 00706 00707 TreeMemPtrMax=to_copy.TreeMemPtrMax; 00708 TreeMemPtr=to_copy.TreeMemPtr; 00709 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax); 00710 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)); 00711 00712 length=to_copy.length; 00713 trees=SG_MALLOC(int32_t, length); 00714 for (int32_t i=0; i<length; i++) 00715 trees[i]=to_copy.trees[i]; 00716 00717 NUM_SYMS=4; 00718 } 00719 00720 template <class Trie> 00721 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy) 00722 { 00723 degree=to_copy.degree ; 00724 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ; 00725 00726 SG_FREE(position_weights); 00727 position_weights=NULL ; 00728 if (to_copy.position_weights!=NULL) 00729 { 00730 position_weights=to_copy.position_weights ; 00731 /*position_weights = SG_MALLOC(float64_t, to_copy.length); 00732 for (int32_t i=0; i<to_copy.length; i++) 00733 position_weights[i]=to_copy.position_weights[i] ;*/ 00734 } 00735 else 00736 position_weights=NULL ; 00737 00738 TreeMemPtrMax=to_copy.TreeMemPtrMax ; 00739 TreeMemPtr=to_copy.TreeMemPtr ; 00740 SG_FREE(TreeMem) ; 00741 TreeMem = SG_MALLOC(Trie, TreeMemPtrMax); 00742 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ; 00743 00744 length = to_copy.length ; 00745 if (trees) 00746 SG_FREE(trees); 00747 trees=SG_MALLOC(int32_t, length); 00748 for (int32_t i=0; i<length; i++) 00749 trees[i]=to_copy.trees[i] ; 00750 00751 return *this ; 00752 } 00753 00754 template <class Trie> 00755 int32_t CTrie<Trie>::find_deepest_node( 00756 int32_t start_node, int32_t& deepest_node) const 00757 { 00758 #ifdef TRIE_CHECK_EVERYTHING 00759 int32_t ret=0 ; 00760 SG_DEBUG("start_node=%i\n", start_node) ; 00761 00762 if (start_node==NO_CHILD) 00763 { 00764 for (int32_t i=0; i<length; i++) 00765 { 00766 int32_t my_deepest_node ; 00767 int32_t depth=find_deepest_node(i, my_deepest_node) ; 00768 SG_DEBUG("start_node %i depth=%i\n", i, depth) ; 00769 if (depth>ret) 00770 { 00771 deepest_node=my_deepest_node ; 00772 ret=depth ; 00773 } 00774 } 00775 return ret ; 00776 } 00777 00778 if (TreeMem[start_node].has_seq) 00779 { 00780 for (int32_t q=0; q<16; q++) 00781 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER) 00782 ret++ ; 00783 deepest_node=start_node ; 00784 return ret ; 00785 } 00786 if (TreeMem[start_node].has_floats) 00787 { 00788 deepest_node=start_node ; 00789 return 1 ; 00790 } 00791 00792 for (int32_t q=0; q<4; q++) 00793 { 00794 int32_t my_deepest_node ; 00795 if (TreeMem[start_node].children[q]==NO_CHILD) 00796 continue ; 00797 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ; 00798 if (depth>ret) 00799 { 00800 deepest_node=my_deepest_node ; 00801 ret=depth ; 00802 } 00803 } 00804 return ret ; 00805 #else 00806 SG_ERROR( "not implemented\n") ; 00807 return 0 ; 00808 #endif 00809 } 00810 00811 template <class Trie> 00812 int32_t CTrie<Trie>::compact_nodes( 00813 int32_t start_node, int32_t depth, float64_t * weights) 00814 { 00815 SG_ERROR( "code buggy\n") ; 00816 00817 int32_t ret=0 ; 00818 00819 if (start_node==NO_CHILD) 00820 { 00821 for (int32_t i=0; i<length; i++) 00822 compact_nodes(i,1, weights) ; 00823 return 0 ; 00824 } 00825 if (start_node<0) 00826 return -1 ; 00827 00828 if (depth==degree-1) 00829 { 00830 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ; 00831 int32_t num_used=0 ; 00832 for (int32_t q=0; q<4; q++) 00833 if (TreeMem[start_node].child_weights[q]!=0.0) 00834 num_used++ ; 00835 if (num_used>1) 00836 return -1 ; 00837 return 1 ; 00838 } 00839 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ; 00840 00841 int32_t num_used = 0 ; 00842 int32_t q_used=-1 ; 00843 00844 for (int32_t q=0; q<4; q++) 00845 { 00846 if (TreeMem[start_node].children[q]==NO_CHILD) 00847 continue ; 00848 num_used++ ; 00849 q_used=q ; 00850 } 00851 if (num_used>1) 00852 { 00853 if (depth>=degree-2) 00854 return -1 ; 00855 for (int32_t q=0; q<4; q++) 00856 { 00857 if (TreeMem[start_node].children[q]==NO_CHILD) 00858 continue ; 00859 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ; 00860 if (num<=2) 00861 continue ; 00862 int32_t node=get_node() ; 00863 00864 int32_t last_node=TreeMem[start_node].children[q] ; 00865 if (weights_in_tree) 00866 { 00867 ASSERT(weights[depth]!=0.0) ; 00868 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ; 00869 } 00870 else 00871 TreeMem[node].weight=TreeMem[last_node].weight ; 00872 00873 #ifdef TRIE_CHECK_EVERYTHING 00874 TreeMem[node].has_seq=true ; 00875 #endif 00876 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 00877 for (int32_t n=0; n<num; n++) 00878 { 00879 ASSERT(depth+n+1<=degree-1) ; 00880 ASSERT(last_node!=NO_CHILD) ; 00881 if (depth+n+1==degree-1) 00882 { 00883 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ; 00884 int32_t k ; 00885 for (k=0; k<4; k++) 00886 if (TreeMem[last_node].child_weights[k]!=0.0) 00887 break ; 00888 if (k==4) 00889 break ; 00890 TreeMem[node].seq[n]=k ; 00891 break ; 00892 } 00893 else 00894 { 00895 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ; 00896 int32_t k ; 00897 for (k=0; k<4; k++) 00898 if (TreeMem[last_node].children[k]!=NO_CHILD) 00899 break ; 00900 if (k==4) 00901 break ; 00902 TreeMem[node].seq[n]=k ; 00903 last_node=TreeMem[last_node].children[k] ; 00904 } 00905 } 00906 TreeMem[start_node].children[q]=-node ; 00907 } 00908 return -1 ; 00909 } 00910 if (num_used==0) 00911 return 0 ; 00912 00913 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ; 00914 if (ret<0) 00915 return ret ; 00916 return ret+1 ; 00917 } 00918 00919 00920 template <class Trie> 00921 bool CTrie<Trie>::compare_traverse( 00922 int32_t node, const CTrie<Trie> & other, int32_t other_node) 00923 { 00924 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ; 00925 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5) 00926 { 00927 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ; 00928 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00929 display_node(node) ; 00930 SG_DEBUG( "============================================================\n") ; 00931 other.display_node(other_node) ; 00932 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00933 return false ; 00934 } 00935 00936 #ifdef TRIE_CHECK_EVERYTHING 00937 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq) 00938 { 00939 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ; 00940 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00941 display_node(node) ; 00942 SG_DEBUG( "============================================================\n") ; 00943 other.display_node(other_node) ; 00944 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00945 return false ; 00946 } 00947 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats) 00948 { 00949 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ; 00950 return false ; 00951 } 00952 if (other.TreeMem[other_node].has_floats) 00953 { 00954 for (int32_t q=0; q<4; q++) 00955 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5) 00956 { 00957 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ; 00958 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00959 display_node(node) ; 00960 SG_DEBUG( "============================================================\n") ; 00961 other.display_node(other_node) ; 00962 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00963 return false ; 00964 } 00965 } 00966 if (other.TreeMem[other_node].has_seq) 00967 { 00968 for (int32_t q=0; q<16; q++) 00969 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4))) 00970 { 00971 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ; 00972 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00973 display_node(node) ; 00974 SG_DEBUG( "============================================================\n") ; 00975 other.display_node(other_node) ; 00976 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00977 return false ; 00978 } 00979 } 00980 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats) 00981 { 00982 for (int32_t q=0; q<4; q++) 00983 { 00984 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD)) 00985 continue ; 00986 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD)) 00987 { 00988 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ; 00989 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ; 00990 display_node(node) ; 00991 SG_DEBUG( "============================================================\n") ; 00992 other.display_node(other_node) ; 00993 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ; 00994 return false ; 00995 } 00996 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q]))) 00997 return false ; 00998 } 00999 } 01000 #else 01001 SG_ERROR( "not implemented\n") ; 01002 #endif 01003 01004 return true ; 01005 } 01006 01007 template <class Trie> 01008 bool CTrie<Trie>::compare(const CTrie<Trie> & other) 01009 { 01010 bool ret=true ; 01011 for (int32_t i=0; i<length; i++) 01012 if (!compare_traverse(trees[i], other, other.trees[i])) 01013 return false ; 01014 else 01015 SG_DEBUG("two tries at %i identical\n", i) ; 01016 01017 return ret ; 01018 } 01019 01020 template <class Trie> 01021 bool CTrie<Trie>::find_node( 01022 int32_t node, int32_t * trace, int32_t& trace_len) const 01023 { 01024 #ifdef TRIE_CHECK_EVERYTHING 01025 ASSERT(trace_len-1>=0) ; 01026 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax)) 01027 if (TreeMem[trace[trace_len-1]].has_seq) 01028 return false ; 01029 if (TreeMem[trace[trace_len-1]].has_floats) 01030 return false ; 01031 01032 for (int32_t q=0; q<4; q++) 01033 { 01034 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD) 01035 continue ; 01036 int32_t tl=trace_len+1 ; 01037 if (TreeMem[trace[trace_len-1]].children[q]>=0) 01038 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ; 01039 else 01040 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ; 01041 01042 if (trace[trace_len]==node) 01043 { 01044 trace_len=tl ; 01045 return true ; 01046 } 01047 if (find_node(node, trace, tl)) 01048 { 01049 trace_len=tl ; 01050 return true ; 01051 } 01052 } 01053 trace_len=0 ; 01054 return false ; 01055 #else 01056 SG_ERROR( "not implemented\n") ; 01057 return false ; 01058 #endif 01059 } 01060 01061 template <class Trie> 01062 void CTrie<Trie>::display_node(int32_t node) const 01063 { 01064 #ifdef TRIE_CHECK_EVERYTHING 01065 int32_t * trace=SG_MALLOC(int32_t, 2*degree); 01066 int32_t trace_len=-1 ; 01067 bool found = false ; 01068 int32_t tree=-1 ; 01069 for (tree=0; tree<length; tree++) 01070 { 01071 trace[0]=trees[tree] ; 01072 trace_len=1 ; 01073 found=find_node(node, trace, trace_len) ; 01074 if (found) 01075 break ; 01076 } 01077 ASSERT(found) ; 01078 SG_PRINT( "position %i trace: ", tree) ; 01079 01080 for (int32_t i=0; i<trace_len-1; i++) 01081 { 01082 int32_t branch=-1 ; 01083 for (int32_t q=0; q<4; q++) 01084 if (abs(TreeMem[trace[i]].children[q])==trace[i+1]) 01085 { 01086 branch=q; 01087 break ; 01088 } 01089 ASSERT(branch!=-1) ; 01090 char acgt[5]="ACGT" ; 01091 SG_PRINT( "%c", acgt[branch]) ; 01092 } 01093 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ; 01094 if (TreeMem[node].has_floats) 01095 { 01096 for (int32_t q=0; q<4; q++) 01097 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ; 01098 } 01099 if (TreeMem[node].has_seq) 01100 { 01101 for (int32_t q=0; q<16; q++) 01102 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ; 01103 } 01104 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats) 01105 { 01106 for (int32_t q=0; q<4; q++) 01107 { 01108 if (TreeMem[node].children[q]!=NO_CHILD) 01109 { 01110 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ; 01111 display_node(abs(TreeMem[node].children[q])) ; 01112 } 01113 else 01114 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ; 01115 } 01116 01117 } 01118 01119 SG_FREE(trace); 01120 #else 01121 SG_ERROR( "not implemented\n") ; 01122 #endif 01123 } 01124 01125 01126 template <class Trie> CTrie<Trie>::~CTrie() 01127 { 01128 destroy() ; 01129 01130 SG_FREE(TreeMem) ; 01131 } 01132 01133 template <class Trie> void CTrie<Trie>::destroy() 01134 { 01135 if (trees!=NULL) 01136 { 01137 delete_trees(); 01138 for (int32_t i=0; i<length; i++) 01139 trees[i] = NO_CHILD; 01140 SG_FREE(trees); 01141 01142 TreeMemPtr=0; 01143 length=0; 01144 trees=NULL; 01145 } 01146 } 01147 01148 template <class Trie> void CTrie<Trie>::set_degree(int32_t d) 01149 { 01150 delete_trees(get_use_compact_terminal_nodes()); 01151 degree=d; 01152 } 01153 01154 template <class Trie> void CTrie<Trie>::create( 01155 int32_t len, bool p_use_compact_terminal_nodes) 01156 { 01157 destroy(); 01158 01159 trees=SG_MALLOC(int32_t, len); 01160 TreeMemPtr=0 ; 01161 for (int32_t i=0; i<len; i++) 01162 trees[i]=get_node(degree==1); 01163 length = len ; 01164 01165 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01166 } 01167 01168 01169 template <class Trie> void CTrie<Trie>::delete_trees( 01170 bool p_use_compact_terminal_nodes) 01171 { 01172 if (trees==NULL) 01173 return; 01174 01175 TreeMemPtr=0 ; 01176 for (int32_t i=0; i<length; i++) 01177 trees[i]=get_node(degree==1); 01178 01179 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01180 } 01181 01182 template <class Trie> 01183 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth) 01184 { 01185 float64_t ret=0 ; 01186 01187 if (tree==NO_CHILD) 01188 return 0 ; 01189 TRIE_ASSERT(tree>=0) ; 01190 01191 if (depth==degree-2) 01192 { 01193 ret+=(TreeMem[tree].weight) ; 01194 01195 for (int32_t k=0; k<4; k++) 01196 ret+=(TreeMem[tree].child_weights[k]) ; 01197 01198 return ret ; 01199 } 01200 01201 ret+=(TreeMem[tree].weight) ; 01202 01203 for (int32_t i=0; i<4; i++) 01204 if (TreeMem[tree].children[i]!=NO_CHILD) 01205 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ; 01206 01207 return ret ; 01208 } 01209 01210 01211 template <class Trie> 01212 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len) 01213 { 01214 float64_t * sum=SG_MALLOC(float64_t, length*4); 01215 for (int32_t i=0; i<length*4; i++) 01216 sum[i]=0 ; 01217 len=length ; 01218 01219 for (int32_t i=0; i<length; i++) 01220 { 01221 TRIE_ASSERT(trees[i]!=NO_CHILD) ; 01222 for (int32_t k=0; k<4; k++) 01223 { 01224 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ; 01225 } 01226 } 01227 01228 return sum ; 01229 } 01230 01231 template <class Trie> 01232 void CTrie<Trie>::add_example_to_tree_mismatch_recursion( 01233 int32_t tree, int32_t i, float64_t alpha, 01234 int32_t *vec, int32_t len_rem, 01235 int32_t degree_rec, int32_t mismatch_rec, 01236 int32_t max_mismatch, float64_t * weights) 01237 { 01238 if (tree==NO_CHILD) 01239 tree=trees[i] ; 01240 TRIE_ASSERT(tree!=NO_CHILD) ; 01241 01242 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree)) 01243 return ; 01244 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ; 01245 01246 int32_t subtree = NO_CHILD ; 01247 01248 if (degree_rec==degree-1) 01249 { 01250 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01251 if (weights_in_tree) 01252 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]; 01253 else 01254 if (weights[degree_rec]!=0.0) 01255 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01256 if (mismatch_rec+1<=max_mismatch) 01257 for (int32_t o=0; o<3; o++) 01258 { 01259 if (weights_in_tree) 01260 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01261 else 01262 if (weights[degree_rec]!=0.0) 01263 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01264 } 01265 return ; 01266 } 01267 else 01268 { 01269 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01270 if (TreeMem[tree].children[vec[0]]!=NO_CHILD) 01271 { 01272 subtree=TreeMem[tree].children[vec[0]] ; 01273 if (weights_in_tree) 01274 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]; 01275 else 01276 if (weights[degree_rec]!=0.0) 01277 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01278 } 01279 else 01280 { 01281 int32_t tmp = get_node(degree_rec==degree-2); 01282 ASSERT(tmp>=0) ; 01283 TreeMem[tree].children[vec[0]]=tmp ; 01284 subtree=tmp ; 01285 #ifdef TRIE_CHECK_EVERYTHING 01286 if (degree_rec==degree-2) 01287 TreeMem[subtree].has_floats=true ; 01288 #endif 01289 if (weights_in_tree) 01290 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ; 01291 else 01292 if (weights[degree_rec]!=0.0) 01293 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ; 01294 else 01295 TreeMem[subtree].weight = 0.0 ; 01296 } 01297 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01298 &vec[1], len_rem-1, 01299 degree_rec+1, mismatch_rec, max_mismatch, weights) ; 01300 01301 if (mismatch_rec+1<=max_mismatch) 01302 { 01303 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01304 for (int32_t o=0; o<3; o++) 01305 { 01306 int32_t ot = other[vec[0]][o] ; 01307 if (TreeMem[tree].children[ot]!=NO_CHILD) 01308 { 01309 subtree=TreeMem[tree].children[ot] ; 01310 if (weights_in_tree) 01311 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01312 else 01313 if (weights[degree_rec]!=0.0) 01314 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01315 } 01316 else 01317 { 01318 int32_t tmp = get_node(degree_rec==degree-2); 01319 ASSERT(tmp>=0) ; 01320 TreeMem[tree].children[ot]=tmp ; 01321 subtree=tmp ; 01322 #ifdef TRIE_CHECK_EVERYTHING 01323 if (degree_rec==degree-2) 01324 TreeMem[subtree].has_floats=true ; 01325 #endif 01326 01327 if (weights_in_tree) 01328 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ; 01329 else 01330 if (weights[degree_rec]!=0.0) 01331 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ; 01332 else 01333 TreeMem[subtree].weight = 0.0 ; 01334 } 01335 01336 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01337 &vec[1], len_rem-1, 01338 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ; 01339 } 01340 } 01341 } 01342 } 01343 01344 template <class Trie> 01345 void CTrie<Trie>::compute_scoring_helper( 01346 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 01347 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset, 01348 int32_t offs, float64_t* result) 01349 { 01350 if (i+j<num_feat) 01351 { 01352 float64_t decay=1.0; //no decay by default 01353 //if (j>d) 01354 // decay=pow(0.5,j); //marginalize out lower order matches 01355 01356 if (j<degree-1) 01357 { 01358 for (int32_t k=0; k<num_sym; k++) 01359 { 01360 if (TreeMem[tree].children[k]!=NO_CHILD) 01361 { 01362 int32_t child=TreeMem[tree].children[k]; 01363 //continue recursion if not yet at max_degree, else add to result 01364 if (d<max_degree-1) 01365 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01366 else 01367 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight; 01368 01370 if (d==0) 01371 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result); 01372 } 01373 } 01374 } 01375 else if (j==degree-1) 01376 { 01377 for (int32_t k=0; k<num_sym; k++) 01378 { 01379 //continue recursion if not yet at max_degree, else add to result 01380 if (d<max_degree-1 && i<num_feat-1) 01381 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01382 else 01383 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k]; 01384 } 01385 } 01386 } 01387 } 01388 01389 template <class Trie> 01390 void CTrie<Trie>::traverse( 01391 int32_t tree, const int32_t p, struct TreeParseInfo info, 01392 const int32_t depth, int32_t* const x, const int32_t k) 01393 { 01394 const int32_t num_sym = info.num_sym; 01395 const int32_t y0 = info.y0; 01396 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] ); 01397 //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] ); 01398 //if( !( info.y0 == temp ) ) { 01399 // printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth ); 01400 //} 01401 //ASSERT( info.y0 == temp ); 01402 int32_t sym; 01403 ASSERT( depth < degree ); 01404 //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] ); 01405 if (depth<degree-1) 01406 { 01407 for( sym=0; sym<num_sym; ++sym ) { 01408 const int32_t childNum = TreeMem[tree].children[ sym ]; 01409 if( childNum != NO_CHILD ) { 01410 int32_t child = childNum ; 01411 x[depth] = sym; 01412 info.substrs[depth+1] = y0 + sym; 01413 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01414 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01415 count( TreeMem[child].weight, depth, info, p, x, k ); 01416 traverse( child, p, info, depth+1, x, k ); 01417 x[depth] = -1; 01418 } 01419 } 01420 } 01421 else if( depth == degree-1 ) 01422 { 01423 for( sym=0; sym<num_sym; ++sym ) { 01424 const float64_t w = TreeMem[tree].child_weights[ sym ]; 01425 if( w != 0.0 ) { 01426 x[depth] = sym; 01427 info.substrs[depth+1] = y0 + sym; 01428 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01429 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ); 01430 count( w, depth, info, p, x, k ); 01431 x[depth] = -1; 01432 } 01433 } 01434 } 01435 //info.substrs[depth+1] = -1; 01436 //info.y0 = temp; 01437 } 01438 01439 template <class Trie> 01440 void CTrie<Trie>::count( 01441 const float64_t w, const int32_t depth, const struct TreeParseInfo info, 01442 const int32_t p, int32_t* x, const int32_t k) 01443 { 01444 ASSERT( fabs(w) < 1e10 ); 01445 ASSERT( x[depth] >= 0 ); 01446 ASSERT( x[depth+1] < 0 ); 01447 if ( depth < k ) { 01448 return; 01449 } 01450 //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) ); 01451 const int32_t nofKmers = info.nofsKmers[k]; 01452 const float64_t margWeight = w * info.margFactors[ depth-k ]; 01453 const int32_t m_a = depth - k + 1; 01454 const int32_t m_b = info.num_feat - p; 01455 const int32_t m = ( m_a < m_b ) ? m_a : m_b; 01456 // all proper k-substrings 01457 const int32_t offset0 = nofKmers * p; 01458 register int32_t i; 01459 register int32_t offset; 01460 offset = offset0; 01461 for( i = 0; i < m; ++i ) { 01462 const int32_t y = info.substrs[i+k+1]; 01463 info.C_k[ y + offset ] += margWeight; 01464 offset += nofKmers; 01465 } 01466 if( depth > k ) { 01467 // k-prefix 01468 const int32_t offsR = info.substrs[k+1] + offset0; 01469 info.R_k[offsR] += margWeight; 01470 // k-suffix 01471 if( p+depth-k < info.num_feat ) { 01472 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k); 01473 info.L_k[offsL] += margWeight; 01474 } 01475 } 01476 // # N.x = substring represented by N 01477 // # N.d = length of N.x 01478 // # N.s = starting position of N.x 01479 // # N.w = weight for feature represented by N 01480 // if( N.d >= k ) 01481 // margContrib = w / 4^(N.d-k) 01482 // for i = 1 to (N.d-k+1) 01483 // y = N.x[i:(i+k-1)] # overlapped k-mer 01484 // C_k[ N.s+i-1, y ] += margContrib 01485 // end; 01486 // if( N.d > k ) 01487 // L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib # j-suffix of N.x 01488 // R_k[ N.s, N.x[1:k] ] += margContrib # j-prefix of N.x 01489 // end; 01490 // end; 01491 } 01492 01493 template <class Trie> 01494 void CTrie<Trie>::add_to_trie( 01495 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha, 01496 float64_t *weights, bool degree_times_position_weights) 01497 { 01498 int32_t tree = trees[i] ; 01499 //ASSERT(seq_offset==0) ; 01500 01501 int32_t max_depth = 0 ; 01502 float64_t* weights_column ; 01503 if (degree_times_position_weights) 01504 weights_column = &weights[(i+seq_offset)*degree] ; 01505 else 01506 weights_column = weights ; 01507 01508 if (weights_in_tree) 01509 { 01510 for (int32_t j=0; (j<degree) && (i+j<length); j++) 01511 if (CMath::abs(weights_column[j]*alpha)>0) 01512 max_depth = j+1 ; 01513 } 01514 else 01515 // don't use the weights 01516 max_depth=degree ; 01517 01518 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++) 01519 { 01520 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ; 01521 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD)) 01522 { 01523 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0) 01524 { 01525 // special treatment of the next nodes 01526 TRIE_ASSERT(j >= degree-16) ; 01527 // get the right element 01528 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01529 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01530 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ; 01531 01532 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ; 01533 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ; 01534 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ; 01535 01536 // check whether the same string is stored 01537 int32_t mismatch_pos = -1 ; 01538 { 01539 int32_t k ; 01540 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++) 01541 { 01542 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01543 // ### 01544 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER)) 01545 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ; 01546 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ; 01547 TRIE_ASSERT(k<16) ; 01548 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k]) 01549 { 01550 mismatch_pos=k ; 01551 break ; 01552 } 01553 } 01554 } 01555 01556 // what happens when the .seq sequence is longer than vec? should we branch??? 01557 01558 if (mismatch_pos==-1) 01559 // if so, then just increase the weight by alpha and stop 01560 TreeMem[node].weight+=alpha ; 01561 else 01562 // otherwise 01563 // 1. replace current node with new node 01564 // 2. create new nodes until mismatching positon 01565 // 2. add a branch with old string (old node) and the new string (new node) 01566 { 01567 // replace old node 01568 int32_t last_node=tree ; 01569 01570 // create new nodes until mismatch 01571 int32_t k ; 01572 for (k=0; k<mismatch_pos; k++) 01573 { 01574 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ; 01575 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ; 01576 01577 int32_t tmp=get_node(); 01578 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ; 01579 last_node=tmp ; 01580 if (weights_in_tree) 01581 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ; 01582 else 01583 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ; 01584 TRIE_ASSERT(j+k!=degree-1) ; 01585 } 01586 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER)) 01587 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ; 01588 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ; 01589 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ; 01590 01591 if (j+k==degree-1) 01592 { 01593 // init child weights with zero if after dropping out 01594 // of the k<mismatch_pos loop we are one level below degree 01595 // (keep this even after get_node() change!) 01596 for (int32_t q=0; q<4; q++) 01597 TreeMem[last_node].child_weights[q]=0.0 ; 01598 if (weights_in_tree) 01599 { 01600 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01601 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ; 01602 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ; 01603 } 01604 else 01605 { 01606 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01607 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ; 01608 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ; 01609 } 01610 01611 #ifdef TRIE_CHECK_EVERYTHING 01612 TreeMem[last_node].has_floats=true ; 01613 #endif 01614 } 01615 else 01616 { 01617 // the branch for the existing string 01618 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01619 { 01620 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ; 01621 01622 // move string by mismatch_pos positions 01623 for (int32_t q=0; q<16; q++) 01624 { 01625 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length)) 01626 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ; 01627 else 01628 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ; 01629 } 01630 #ifdef TRIE_CHECK_EVERYTHING 01631 TreeMem[node].has_seq=true ; 01632 #endif 01633 } 01634 01635 // the new branch 01636 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ; 01637 int32_t tmp = get_node() ; 01638 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ; 01639 last_node=tmp ; 01640 01641 TreeMem[last_node].weight = alpha ; 01642 #ifdef TRIE_CHECK_EVERYTHING 01643 TreeMem[last_node].has_seq = true ; 01644 #endif 01645 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01646 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++) 01647 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ; 01648 } 01649 } 01650 break ; 01651 } 01652 else 01653 { 01654 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ; 01655 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01656 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01657 if (weights_in_tree) 01658 TreeMem[tree].weight += alpha*weights_column[j]; 01659 else 01660 TreeMem[tree].weight += alpha ; 01661 } 01662 } 01663 else if (j==degree-1) 01664 { 01665 // special treatment of the last node 01666 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01667 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01668 if (weights_in_tree) 01669 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ; 01670 else 01671 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha; 01672 01673 break; 01674 } 01675 else 01676 { 01677 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ; 01678 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01679 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01680 01681 int32_t tmp = get_node((j==degree-2) && (!use_seq)); 01682 if (use_seq) 01683 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ; 01684 else 01685 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ; 01686 tree=tmp ; 01687 01688 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ; 01689 #ifdef TRIE_CHECK_EVERYTHING 01690 TreeMem[tree].has_seq = use_seq ; 01691 #endif 01692 if (use_seq) 01693 { 01694 TreeMem[tree].weight = alpha ; 01695 // important to have the terminal characters (see ###) 01696 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01697 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++) 01698 { 01699 TRIE_ASSERT(q<16) ; 01700 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ; 01701 } 01702 break ; 01703 } 01704 else 01705 { 01706 if (weights_in_tree) 01707 TreeMem[tree].weight = alpha*weights_column[j] ; 01708 else 01709 TreeMem[tree].weight = alpha ; 01710 #ifdef TRIE_CHECK_EVERYTHING 01711 if (j==degree-2) 01712 TreeMem[tree].has_floats = true ; 01713 #endif 01714 } 01715 } 01716 } 01717 } 01718 01719 template <class Trie> 01720 float64_t CTrie<Trie>::compute_by_tree_helper( 01721 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01722 int32_t weight_pos, float64_t* weights, 01723 bool degree_times_position_weights) 01724 { 01725 int32_t tree = trees[tree_pos] ; 01726 01727 if ((position_weights!=NULL) && (position_weights[weight_pos]==0)) 01728 return 0.0; 01729 01730 float64_t *weights_column=NULL ; 01731 if (degree_times_position_weights) 01732 weights_column=&weights[weight_pos*degree] ; 01733 else // weights is a vector (1 x degree) 01734 weights_column=weights ; 01735 01736 float64_t sum=0 ; 01737 for (int32_t j=0; seq_pos+j < len; j++) 01738 { 01739 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ; 01740 01741 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01742 { 01743 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01744 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01745 { 01746 tree = - TreeMem[tree].children[vec[seq_pos+j]]; 01747 TRIE_ASSERT(tree>=0) ; 01748 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01749 float64_t this_weight=0.0 ; 01750 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01751 { 01752 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ; 01753 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01754 break ; 01755 this_weight += weights_column[j+k] ; 01756 } 01757 sum += TreeMem[tree].weight * this_weight ; 01758 break ; 01759 } 01760 else 01761 { 01762 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01763 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01764 if (weights_in_tree) 01765 sum += TreeMem[tree].weight ; 01766 else 01767 sum += TreeMem[tree].weight * weights_column[j] ; 01768 } ; 01769 } 01770 else 01771 { 01772 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01773 if (j==degree-1) 01774 { 01775 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ; 01776 if (weights_in_tree) 01777 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01778 else 01779 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ; 01780 } 01781 else 01782 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ; 01783 01784 break; 01785 } 01786 } 01787 01788 if (position_weights!=NULL) 01789 return sum*position_weights[weight_pos] ; 01790 else 01791 return sum ; 01792 } 01793 01794 template <class Trie> 01795 void CTrie<Trie>::compute_by_tree_helper( 01796 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01797 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 01798 int32_t mkl_stepsize, float64_t * weights, 01799 bool degree_times_position_weights) 01800 { 01801 int32_t tree = trees[tree_pos] ; 01802 if (factor==0) 01803 return ; 01804 01805 if (position_weights!=NULL) 01806 { 01807 factor *= position_weights[weight_pos] ; 01808 if (factor==0) 01809 return ; 01810 if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree) 01811 { 01812 for (int32_t j=0; seq_pos+j<len; j++) 01813 { 01814 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01815 { 01816 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01817 { 01818 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01819 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01820 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01821 { 01822 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01823 break ; 01824 if (weights_in_tree) 01825 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01826 else 01827 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01828 } 01829 break ; 01830 } 01831 else 01832 { 01833 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01834 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01835 if (weights_in_tree) 01836 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01837 else 01838 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01839 } 01840 } 01841 else 01842 { 01843 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01844 if (j==degree-1) 01845 { 01846 if (weights_in_tree) 01847 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01848 else 01849 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01850 } 01851 } 01852 } 01853 } 01854 else // with position_weigths, weights is a matrix (len x degree) 01855 { 01856 for (int32_t j=0; seq_pos+j<len; j++) 01857 { 01858 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01859 { 01860 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01861 { 01862 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01863 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01864 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01865 { 01866 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01867 break ; 01868 if (weights_in_tree) 01869 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01870 else 01871 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01872 } 01873 break ; 01874 } 01875 else 01876 { 01877 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01878 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01879 if (weights_in_tree) 01880 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01881 else 01882 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ; 01883 } 01884 } 01885 else 01886 { 01887 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01888 if (j==degree-1) 01889 { 01890 if (weights_in_tree) 01891 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01892 else 01893 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ; 01894 } 01895 break ; 01896 } 01897 } 01898 } 01899 } 01900 else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree) 01901 { 01902 for (int32_t j=0; seq_pos+j<len; j++) 01903 { 01904 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01905 { 01906 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01907 { 01908 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01909 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01910 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01911 { 01912 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01913 break ; 01914 if (weights_in_tree) 01915 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01916 else 01917 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01918 } 01919 break ; 01920 } 01921 else 01922 { 01923 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01924 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01925 if (weights_in_tree) 01926 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ; 01927 else 01928 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01929 } 01930 } 01931 else 01932 { 01933 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01934 if (j==degree-1) 01935 { 01936 if (weights_in_tree) 01937 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01938 else 01939 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01940 } 01941 break ; 01942 } 01943 } 01944 } 01945 else // no position_weigths, weights is a matrix (len x degree) 01946 { 01947 /*if (!position_mask) 01948 { 01949 position_mask = SG_MALLOC(bool, len); 01950 for (int32_t i=0; i<len; i++) 01951 { 01952 position_mask[i]=false ; 01953 01954 for (int32_t j=0; j<degree; j++) 01955 if (weights[i*degree+j]!=0.0) 01956 { 01957 position_mask[i]=true ; 01958 break ; 01959 } 01960 } 01961 } 01962 if (position_mask[weight_pos]==0) 01963 return ;*/ 01964 01965 for (int32_t j=0; seq_pos+j<len; j++) 01966 { 01967 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01968 { 01969 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01970 { 01971 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01972 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ; 01973 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01974 { 01975 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01976 break ; 01977 if (weights_in_tree) 01978 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01979 else 01980 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01981 } 01982 break ; 01983 } 01984 else 01985 { 01986 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01987 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01988 if (weights_in_tree) 01989 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ; 01990 else 01991 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ; 01992 } 01993 } 01994 else 01995 { 01996 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ; 01997 if (j==degree-1) 01998 { 01999 if (weights_in_tree) 02000 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ; 02001 else 02002 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ; 02003 } 02004 break ; 02005 } 02006 } 02007 } 02008 } 02009 02010 template <class Trie> 02011 void CTrie<Trie>::fill_backtracking_table_recursion( 02012 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 02013 DynArray<ConsensusEntry>* table, float64_t* weights) 02014 { 02015 float64_t w=1.0; 02016 02017 if (weights_in_tree || depth==0) 02018 value+=tree->weight; 02019 else 02020 { 02021 w=weights[degree-1]; 02022 value+=weights[depth-1]*tree->weight; 02023 } 02024 02025 if (degree-1==depth) 02026 { 02027 for (int32_t sym=0; sym<4; sym++) 02028 { 02029 float64_t v=w*tree->child_weights[sym]; 02030 if (v!=0.0) 02031 { 02032 ConsensusEntry entry; 02033 entry.bt=-1; 02034 entry.score=value+v; 02035 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02036 02037 table->append_element(entry); 02038 } 02039 } 02040 } 02041 else 02042 { 02043 for (int32_t sym=0; sym<4; sym++) 02044 { 02045 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02046 if (tree->children[sym] != NO_CHILD) 02047 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights); 02048 } 02049 } 02050 } 02051 02052 template <class Trie> 02053 float64_t CTrie<Trie>::get_cumulative_score( 02054 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights) 02055 { 02056 float64_t result=0.0; 02057 02058 //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq); 02059 02060 for (int32_t i=pos; i<pos+deg && i<length; i++) 02061 { 02062 //SG_PRINT("loop %d\n", i); 02063 Trie* tree = &TreeMem[trees[i]]; 02064 02065 for (int32_t d=0; d<deg-i+pos; d++) 02066 { 02067 //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos))); 02068 ASSERT(d-1<degree); 02069 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3); 02070 02071 float64_t w=1.0; 02072 if (!weights_in_tree) 02073 w=weights[d]; 02074 02075 ASSERT(tree->children[sym] != NO_CHILD); 02076 tree=&TreeMem[tree->children[sym]]; 02077 result+=w*tree->weight; 02078 } 02079 } 02080 //SG_PRINT("cum: %f\n", result); 02081 return result; 02082 } 02083 02084 template <class Trie> 02085 void CTrie<Trie>::fill_backtracking_table( 02086 int32_t pos, DynArray<ConsensusEntry>* prev, 02087 DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights) 02088 { 02089 ASSERT(pos>=0 && pos<length); 02090 ASSERT(!use_compact_terminal_nodes); 02091 02092 Trie* t = &TreeMem[trees[pos]]; 02093 02094 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights); 02095 02096 02097 if (cumulative) 02098 { 02099 int32_t num_cur=cur->get_num_elements(); 02100 for (int32_t i=0; i<num_cur; i++) 02101 { 02102 ConsensusEntry entry=cur->get_element(i); 02103 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights); 02104 cur->set_element(entry,i); 02105 //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt); 02106 } 02107 } 02108 02109 //if previous tree exists find maximum scoring path 02110 //for each element in cur and update bt table 02111 if (prev) 02112 { 02113 int32_t num_cur=cur->get_num_elements(); 02114 int32_t num_prev=prev->get_num_elements(); 02115 02116 for (int32_t i=0; i<num_cur; i++) 02117 { 02118 //uint64_t str_cur_old= cur->get_element(i).string; 02119 uint64_t str_cur= cur->get_element(i).string >> 2; 02120 //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur); 02121 02122 int32_t bt=-1; 02123 float64_t max_score=0.0; 02124 02125 for (int32_t j=0; j<num_prev; j++) 02126 { 02127 //uint64_t str_prev_old= prev->get_element(j).string; 02128 uint64_t mask= 02129 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1)))); 02130 uint64_t str_prev= mask & prev->get_element(j).string; 02131 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask); 02132 02133 if (str_cur == str_prev) 02134 { 02135 float64_t sc=prev->get_element(j).score+cur->get_element(i).score; 02136 if (bt==-1 || sc>max_score) 02137 { 02138 bt=j; 02139 max_score=sc; 02140 02141 //SG_PRINT("new_max[%i,%i] = %f\n", j,i, max_score); 02142 } 02143 } 02144 } 02145 02146 ASSERT(bt!=-1); 02147 ConsensusEntry entry; 02148 entry.bt=bt; 02149 entry.score=max_score; 02150 entry.string=cur->get_element(i).string; 02151 cur->set_element(entry, i); 02152 //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt); 02153 } 02154 } 02155 } 02156 } 02157 #endif // _TRIE_H___