SHOGUN
v1.1.0
|
00001 /* 00002 SVM with Quasi-Newton stochastic gradient 00003 Copyright (C) 2009- Antoine Bordes 00004 00005 This program is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Lesser General Public 00007 License as published by the Free Software Foundation; either 00008 version 2.1 of the License, or (at your option) any later version. 00009 00010 This program is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with this program; if not, write to the Free Software 00017 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00018 00019 Shogun adjustments (w) 2011 Siddharth Kherada 00020 */ 00021 00022 #include <shogun/classifier/svm/SGDQN.h> 00023 #include <shogun/base/Parameter.h> 00024 #include <shogun/lib/Signal.h> 00025 #include <shogun/mathematics/Math.h> 00026 #include <shogun/loss/HingeLoss.h> 00027 00028 using namespace shogun; 00029 00030 CSGDQN::CSGDQN() 00031 : CLinearMachine() 00032 { 00033 init(); 00034 } 00035 00036 CSGDQN::CSGDQN(float64_t C) 00037 : CLinearMachine() 00038 { 00039 init(); 00040 00041 C1=C; 00042 C2=C; 00043 } 00044 00045 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00046 : CLinearMachine() 00047 { 00048 init(); 00049 C1=C; 00050 C2=C; 00051 00052 set_features(traindat); 00053 set_labels(trainlab); 00054 } 00055 00056 CSGDQN::~CSGDQN() 00057 { 00058 SG_UNREF(loss); 00059 } 00060 00061 void CSGDQN::set_loss_function(CLossFunction* loss_func) 00062 { 00063 if (loss) 00064 SG_UNREF(loss); 00065 loss=loss_func; 00066 SG_REF(loss); 00067 } 00068 00069 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val) 00070 { 00071 for (int32_t i=0; i < dim;i++) 00072 { 00073 float64_t diffw=W_1[i]-W[i]; 00074 if(diffw) 00075 B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]); 00076 else 00077 B[i]+=1/lambda; 00078 } 00079 } 00080 00081 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2) 00082 { 00083 for (int32_t i=0; i < dim;i++) 00084 { 00085 if(B[i]) 00086 { 00087 Bc[i] = Bc[i] * c1 + B[i] * c2; 00088 Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2); 00089 } 00090 } 00091 } 00092 00093 bool CSGDQN::train(CFeatures* data) 00094 { 00095 00096 ASSERT(labels); 00097 00098 if (data) 00099 { 00100 if (!data->has_property(FP_DOT)) 00101 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00102 set_features((CDotFeatures*) data); 00103 } 00104 00105 ASSERT(features); 00106 ASSERT(labels->is_two_class_labeling()); 00107 00108 int32_t num_train_labels=labels->get_num_labels(); 00109 w_dim=features->get_dim_feature_space(); 00110 int32_t num_vec=features->get_num_vectors(); 00111 00112 ASSERT(num_vec==num_train_labels); 00113 ASSERT(num_vec>0); 00114 00115 SG_FREE(w); 00116 w=SG_MALLOC(float64_t, w_dim); 00117 memset(w, 0, w_dim*sizeof(float64_t)); 00118 00119 float64_t lambda= 1.0/(C1*num_vec); 00120 00121 // Shift t in order to have a 00122 // reasonable initial learning rate. 00123 // This assumes |x| \approx 1. 00124 float64_t maxw = 1.0 / sqrt(lambda); 00125 float64_t typw = sqrt(maxw); 00126 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1)); 00127 t = 1 / (eta0 * lambda); 00128 00129 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0); 00130 00131 00132 float64_t* Bc=SG_MALLOC(float64_t, w_dim); 00133 CMath::fill_vector(Bc, w_dim, 1/lambda); 00134 00135 float64_t* result=SG_MALLOC(float64_t, w_dim); 00136 float64_t* B=SG_MALLOC(float64_t, w_dim); 00137 float64_t* w_1=SG_MALLOC(float64_t, w_dim); 00138 00139 //Calibrate 00140 calibrate(); 00141 00142 SG_INFO("Training on %d vectors\n", num_vec); 00143 CSignal::clear_cancel(); 00144 00145 ELossType loss_type = loss->get_loss_type(); 00146 bool is_log_loss = false; 00147 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN)) 00148 is_log_loss = true; 00149 00150 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++) 00151 { 00152 count = skip; 00153 bool updateB=false; 00154 for (int32_t i=0; i<num_vec; i++) 00155 { 00156 SGVector<float64_t> v = features->get_computed_dot_feature_vector(i); 00157 ASSERT(w_dim==v.vlen); 00158 float64_t eta = 1.0/t; 00159 float64_t y = labels->get_label(i); 00160 float64_t z = y * features->dense_dot(i, w, w_dim); 00161 if(updateB==true) 00162 { 00163 if (z < 1 || is_log_loss) 00164 { 00165 w_1=w; 00166 float64_t loss_1=-loss->first_derivative(z,1); 00167 CMath::vector_multiply(result,Bc,v.vector,w_dim); 00168 CMath::add(w,eta*loss_1*y,result,1.0,w,w_dim); 00169 float64_t z2 = y * features->dense_dot(i, w, w_dim); 00170 float64_t diffloss = -loss->first_derivative(z2,1) - loss_1; 00171 if(diffloss) 00172 { 00173 compute_ratio(w,w_1,B,v.vector,w_dim,lambda,y*diffloss); 00174 if(t>skip) 00175 combine_and_clip(Bc,B,w_dim,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda); 00176 else 00177 combine_and_clip(Bc,B,w_dim,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda); 00178 } 00179 } 00180 updateB=false; 00181 } 00182 else 00183 { 00184 if(--count<=0) 00185 { 00186 CMath::vector_multiply(result,Bc,w,w_dim); 00187 CMath::add(w,-skip*lambda*eta,result,1.0,w,w_dim); 00188 count = skip; 00189 updateB=true; 00190 } 00191 00192 if (z < 1 || is_log_loss) 00193 { 00194 CMath::vector_multiply(result,Bc,v.vector,w_dim); 00195 CMath::add(w,eta*-loss->first_derivative(z,1)*y,result,1.0,w,w_dim); 00196 } 00197 } 00198 t++; 00199 00200 v.free_vector(); 00201 } 00202 } 00203 SG_FREE(result); 00204 SG_FREE(w_1); 00205 SG_FREE(B); 00206 00207 return true; 00208 } 00209 00210 00211 00212 void CSGDQN::calibrate() 00213 { 00214 ASSERT(features); 00215 int32_t num_vec=features->get_num_vectors(); 00216 int32_t c_dim=features->get_dim_feature_space(); 00217 00218 ASSERT(num_vec>0); 00219 ASSERT(c_dim>0); 00220 00221 SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim); 00222 00223 int32_t n = 0; 00224 float64_t r = 0; 00225 00226 for (int32_t j=0; j<num_vec ; j++, n++) 00227 r += features->get_nnz_features_for_vector(j); 00228 00229 00230 // compute weight decay skip 00231 skip = (int32_t) ((16 * n * c_dim) / r); 00232 } 00233 00234 void CSGDQN::init() 00235 { 00236 t=0; 00237 C1=1; 00238 C2=1; 00239 epochs=5; 00240 skip=1000; 00241 count=1000; 00242 00243 loss=new CHingeLoss(); 00244 SG_REF(loss); 00245 00246 m_parameters->add(&C1, "C1", "Cost constant 1."); 00247 m_parameters->add(&C2, "C2", "Cost constant 2."); 00248 m_parameters->add(&epochs, "epochs", "epochs"); 00249 m_parameters->add(&skip, "skip", "skip"); 00250 m_parameters->add(&count, "count", "count"); 00251 }