SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Heiko Strathmann 00008 * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3 00009 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/kernel/DistantSegmentsKernel.h> 00013 #include <string> 00014 00015 using namespace shogun; 00016 00017 CDistantSegmentsKernel::CDistantSegmentsKernel() : CStringKernel<char>(), 00018 m_delta(0), m_theta(0) 00019 { 00020 init(); 00021 } 00022 00023 CDistantSegmentsKernel::CDistantSegmentsKernel(int32_t size, int32_t delta, 00024 int32_t theta) : CStringKernel<char>(), m_delta(delta), m_theta(theta) 00025 { 00026 init(); 00027 } 00028 00029 CDistantSegmentsKernel::CDistantSegmentsKernel(CStringFeatures<char>* l, 00030 CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) : 00031 CStringKernel<char>(), m_delta(delta), m_theta(theta) 00032 { 00033 init(); 00034 CStringKernel<char>::init(l, r); 00035 } 00036 00037 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r) 00038 { 00039 CKernel::init(l, r); 00040 return init_normalizer(); 00041 } 00042 00043 void CDistantSegmentsKernel::init() 00044 { 00045 SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE); 00046 SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE); 00047 } 00048 00049 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b) 00050 { 00051 bool free_a, free_b; 00052 int32_t aLength=0, bLength=0; 00053 char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength, 00054 free_a); 00055 char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength, 00056 free_b); 00057 ASSERT(a && b); 00058 00059 if ((aLength<1)||(bLength<1)) 00060 SG_ERROR("Empty sequences"); 00061 00062 float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta); 00063 00064 ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a); 00065 ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b); 00066 00067 return result; 00068 } 00069 00070 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i) 00071 { 00072 if (i>j) 00073 return 0; 00074 if (i==3 && j>=3) 00075 { 00076 return j*(j-1)*(j-2)/6; 00077 } 00078 else if (i==2 && j>=2) 00079 { 00080 return j*(j-1)/2; 00081 } 00082 return 0; 00083 } 00084 00085 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t, 00086 int32_t tLength, int32_t delta_m, int32_t theta_m) 00087 { 00088 int32_t c=0; 00089 int32_t* i_=SG_MALLOC(int32_t, delta_m+1); 00090 int32_t* l_=SG_MALLOC(int32_t, delta_m+1); 00091 for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++) 00092 { 00093 for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++) 00094 { 00095 if (s[j_s-1+1]==t[j_t-1+1]) 00096 { 00097 int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m); 00098 int32_t k=-1; 00099 int32_t i=1; 00100 while (i<=n) 00101 { 00102 k++; 00103 i_[2*k]=i; 00104 i++; 00105 while (i<=n&&s[j_s-1+i]==t[j_t-1+i]) 00106 i++; 00107 i_[2*k+1]=i; 00108 l_[k]=i_[2*k+1]-i_[2*k]+1; 00109 i++; 00110 while (i<=n&&s[j_s-1+i]!=t[j_t-1+i]) 00111 i++; 00112 } 00113 c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3) 00114 +bin(l_[0]-2*theta_m, 3); 00115 int32_t c1=0; 00116 for (int32_t r=1; r<=k; r++) 00117 { 00118 c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2); 00119 } 00120 c+=CMath::min(theta_m, i_[1]-i_[0])*c1; 00121 } 00122 } 00123 } 00124 delete l_; 00125 delete i_; 00126 return c; 00127 }