SHOGUN
v1.1.0
|
00001 /* 00002 * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 * 00033 * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg 00034 */ 00035 00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00037 00038 #include <shogun/lib/memory.h> 00039 #include <shogun/classifier/svm/SVM_libsvm.h> 00040 #include <shogun/kernel/Kernel.h> 00041 #include <shogun/io/SGIO.h> 00042 #include <shogun/lib/Time.h> 00043 #include <shogun/lib/Signal.h> 00044 #include <shogun/lib/common.h> 00045 #include <shogun/mathematics/Math.h> 00046 00047 #include <stdio.h> 00048 #include <stdlib.h> 00049 #include <ctype.h> 00050 #include <float.h> 00051 #include <string.h> 00052 #include <stdarg.h> 00053 00054 #ifdef HAVE_PTHREAD 00055 #include <pthread.h> 00056 #endif 00057 00058 namespace shogun 00059 { 00060 00061 typedef KERNELCACHE_ELEM Qfloat; 00062 typedef float64_t schar; 00063 00064 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n) 00065 { 00066 dst = SG_MALLOC(T, n); 00067 memcpy((void *)dst,(void *)src,sizeof(T)*n); 00068 } 00069 #define INF HUGE_VAL 00070 #define TAU 1e-12 00071 00072 class QMatrix; 00073 class SVC_QMC; 00074 00075 // 00076 // Kernel Cache 00077 // 00078 // l is the number of total data items 00079 // size is the cache size limit in bytes 00080 // 00081 class Cache 00082 { 00083 public: 00084 Cache(int32_t l, int64_t size); 00085 ~Cache(); 00086 00087 // request data [0,len) 00088 // return some position p where [p,len) need to be filled 00089 // (p >= len if nothing needs to be filled) 00090 int32_t get_data(const int32_t index, Qfloat **data, int32_t len); 00091 void swap_index(int32_t i, int32_t j); // future_option 00092 00093 private: 00094 int32_t l; 00095 int64_t size; 00096 struct head_t 00097 { 00098 head_t *prev, *next; // a circular list 00099 Qfloat *data; 00100 int32_t len; // data[0,len) is cached in this entry 00101 }; 00102 00103 head_t *head; 00104 head_t lru_head; 00105 void lru_delete(head_t *h); 00106 void lru_insert(head_t *h); 00107 }; 00108 00109 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_) 00110 { 00111 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 00112 size /= sizeof(Qfloat); 00113 size -= l * sizeof(head_t) / sizeof(Qfloat); 00114 size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns 00115 lru_head.next = lru_head.prev = &lru_head; 00116 } 00117 00118 Cache::~Cache() 00119 { 00120 for(head_t *h = lru_head.next; h != &lru_head; h=h->next) 00121 SG_FREE(h->data); 00122 SG_FREE(head); 00123 } 00124 00125 void Cache::lru_delete(head_t *h) 00126 { 00127 // delete from current location 00128 h->prev->next = h->next; 00129 h->next->prev = h->prev; 00130 } 00131 00132 void Cache::lru_insert(head_t *h) 00133 { 00134 // insert to last position 00135 h->next = &lru_head; 00136 h->prev = lru_head.prev; 00137 h->prev->next = h; 00138 h->next->prev = h; 00139 } 00140 00141 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len) 00142 { 00143 head_t *h = &head[index]; 00144 if(h->len) lru_delete(h); 00145 int32_t more = len - h->len; 00146 00147 if(more > 0) 00148 { 00149 // free old space 00150 while(size < more) 00151 { 00152 head_t *old = lru_head.next; 00153 lru_delete(old); 00154 SG_FREE(old->data); 00155 size += old->len; 00156 old->data = 0; 00157 old->len = 0; 00158 } 00159 00160 // allocate new space 00161 h->data = SG_REALLOC(Qfloat, h->data, len); 00162 size -= more; 00163 CMath::swap(h->len,len); 00164 } 00165 00166 lru_insert(h); 00167 *data = h->data; 00168 return len; 00169 } 00170 00171 void Cache::swap_index(int32_t i, int32_t j) 00172 { 00173 if(i==j) return; 00174 00175 if(head[i].len) lru_delete(&head[i]); 00176 if(head[j].len) lru_delete(&head[j]); 00177 CMath::swap(head[i].data,head[j].data); 00178 CMath::swap(head[i].len,head[j].len); 00179 if(head[i].len) lru_insert(&head[i]); 00180 if(head[j].len) lru_insert(&head[j]); 00181 00182 if(i>j) CMath::swap(i,j); 00183 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) 00184 { 00185 if(h->len > i) 00186 { 00187 if(h->len > j) 00188 CMath::swap(h->data[i],h->data[j]); 00189 else 00190 { 00191 // give up 00192 lru_delete(h); 00193 SG_FREE(h->data); 00194 size += h->len; 00195 h->data = 0; 00196 h->len = 0; 00197 } 00198 } 00199 } 00200 } 00201 00202 // 00203 // Kernel evaluation 00204 // 00205 // the static method k_function is for doing single kernel evaluation 00206 // the constructor of Kernel prepares to calculate the l*l kernel matrix 00207 // the member function get_Q is for getting one column from the Q Matrix 00208 // 00209 class QMatrix { 00210 public: 00211 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00212 virtual Qfloat *get_QD() const = 0; 00213 virtual void swap_index(int32_t i, int32_t j) const = 0; 00214 virtual ~QMatrix() {} 00215 00216 float64_t max_train_time; 00217 }; 00218 00219 class LibSVMKernel; 00220 00221 // helper struct for threaded processing 00222 struct Q_THREAD_PARAM 00223 { 00224 int32_t i; 00225 int32_t start; 00226 int32_t end; 00227 Qfloat* data; 00228 float64_t* y; 00229 const LibSVMKernel* q; 00230 }; 00231 00232 extern Parallel* sg_parallel; 00233 00234 class LibSVMKernel: public QMatrix { 00235 public: 00236 LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param); 00237 virtual ~LibSVMKernel(); 00238 00239 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00240 virtual Qfloat *get_QD() const = 0; 00241 virtual void swap_index(int32_t i, int32_t j) const // no so const... 00242 { 00243 CMath::swap(x[i],x[j]); 00244 if(x_square) CMath::swap(x_square[i],x_square[j]); 00245 } 00246 00247 static void* compute_Q_parallel_helper(void* p) 00248 { 00249 Q_THREAD_PARAM* params= (Q_THREAD_PARAM*) p; 00250 int32_t i=params->i; 00251 int32_t start=params->start; 00252 int32_t end=params->end; 00253 float64_t* y=params->y; 00254 Qfloat* data=params->data; 00255 const LibSVMKernel* q=params->q; 00256 00257 if (y) // two class 00258 { 00259 for(int32_t j=start;j<end;j++) 00260 data[j] = (Qfloat) y[i]*y[j]*q->kernel_function(i,j); 00261 } 00262 else // one class, eps svr 00263 { 00264 for(int32_t j=start;j<end;j++) 00265 data[j] = (Qfloat) q->kernel_function(i,j); 00266 } 00267 00268 return NULL; 00269 } 00270 00271 void compute_Q_parallel(Qfloat* data, float64_t* lab, int32_t i, int32_t start, int32_t len) const 00272 { 00273 int32_t num_threads=sg_parallel->get_num_threads(); 00274 if (num_threads < 2) 00275 { 00276 Q_THREAD_PARAM params; 00277 params.i=i; 00278 params.start=start; 00279 params.end=len; 00280 params.y=lab; 00281 params.data=data; 00282 params.q=this; 00283 compute_Q_parallel_helper((void*) ¶ms); 00284 } 00285 else 00286 { 00287 int32_t total_num=(len-start); 00288 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00289 Q_THREAD_PARAM* params = SG_MALLOC(Q_THREAD_PARAM, num_threads); 00290 int32_t step= total_num/num_threads; 00291 00292 int32_t t; 00293 00294 num_threads--; 00295 for (t=0; t<num_threads; t++) 00296 { 00297 params[t].i=i; 00298 params[t].start=t*step; 00299 params[t].end=(t+1)*step; 00300 params[t].y=lab; 00301 params[t].data=data; 00302 params[t].q=this; 00303 00304 int code=pthread_create(&threads[t], NULL, 00305 compute_Q_parallel_helper, (void*)¶ms[t]); 00306 00307 if (code != 0) 00308 { 00309 SG_SWARNING("Thread creation failed (thread %d of %d) " 00310 "with error:'%s'\n",t, num_threads, strerror(code)); 00311 num_threads=t; 00312 break; 00313 } 00314 } 00315 00316 params[t].i=i; 00317 params[t].start=t*step; 00318 params[t].end=len; 00319 params[t].y=lab; 00320 params[t].data=data; 00321 params[t].q=this; 00322 compute_Q_parallel_helper(¶ms[t]); 00323 00324 for (t=0; t<num_threads; t++) 00325 { 00326 if (pthread_join(threads[t], NULL) != 0) 00327 SG_SWARNING("pthread_join of thread %d/%d failed\n", t, num_threads); 00328 } 00329 00330 SG_FREE(params); 00331 SG_FREE(threads); 00332 } 00333 } 00334 00335 inline float64_t kernel_function(int32_t i, int32_t j) const 00336 { 00337 return kernel->kernel(x[i]->index,x[j]->index); 00338 } 00339 00340 private: 00341 CKernel* kernel; 00342 const svm_node **x; 00343 float64_t *x_square; 00344 00345 // svm_parameter 00346 const int32_t kernel_type; 00347 const int32_t degree; 00348 const float64_t gamma; 00349 const float64_t coef0; 00350 }; 00351 00352 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param) 00353 : kernel_type(param.kernel_type), degree(param.degree), 00354 gamma(param.gamma), coef0(param.coef0) 00355 { 00356 clone(x,x_,l); 00357 x_square = 0; 00358 kernel=param.kernel; 00359 max_train_time=param.max_train_time; 00360 } 00361 00362 LibSVMKernel::~LibSVMKernel() 00363 { 00364 SG_FREE(x); 00365 SG_FREE(x_square); 00366 } 00367 00368 // Generalized SMO+SVMlight algorithm 00369 // Solves: 00370 // 00371 // min 0.5(\alpha^T Q \alpha) + b^T \alpha 00372 // 00373 // y^T \alpha = \delta 00374 // y_i = +1 or -1 00375 // 0 <= alpha_i <= Cp for y_i = 1 00376 // 0 <= alpha_i <= Cn for y_i = -1 00377 // 00378 // Given: 00379 // 00380 // Q, p, y, Cp, Cn, and an initial feasible point \alpha 00381 // l is the size of vectors and matrices 00382 // eps is the stopping tolerance 00383 // 00384 // solution will be put in \alpha, objective value will be put in obj 00385 // 00386 class Solver { 00387 public: 00388 Solver() {}; 00389 virtual ~Solver() {}; 00390 00391 struct SolutionInfo { 00392 float64_t obj; 00393 float64_t rho; 00394 float64_t upper_bound_p; 00395 float64_t upper_bound_n; 00396 float64_t r; // for Solver_NU 00397 }; 00398 00399 void Solve( 00400 int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_, 00401 float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps, 00402 SolutionInfo* si, int32_t shrinking, bool use_bias); 00403 00404 protected: 00405 int32_t active_size; 00406 schar *y; 00407 float64_t *G; // gradient of objective function 00408 enum { LOWER_BOUND, UPPER_BOUND, FREE }; 00409 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE 00410 float64_t *alpha; 00411 const QMatrix *Q; 00412 const Qfloat *QD; 00413 float64_t eps; 00414 float64_t Cp,Cn; 00415 float64_t *p; 00416 int32_t *active_set; 00417 float64_t *G_bar; // gradient, if we treat free variables as 0 00418 int32_t l; 00419 bool unshrink; // XXX 00420 00421 float64_t get_C(int32_t i) 00422 { 00423 return (y[i] > 0)? Cp : Cn; 00424 } 00425 void update_alpha_status(int32_t i) 00426 { 00427 if(alpha[i] >= get_C(i)) 00428 alpha_status[i] = UPPER_BOUND; 00429 else if(alpha[i] <= 0) 00430 alpha_status[i] = LOWER_BOUND; 00431 else alpha_status[i] = FREE; 00432 } 00433 bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; } 00434 bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; } 00435 bool is_free(int32_t i) { return alpha_status[i] == FREE; } 00436 void swap_index(int32_t i, int32_t j); 00437 void reconstruct_gradient(); 00438 virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 00439 virtual float64_t calculate_rho(); 00440 virtual void do_shrinking(); 00441 private: 00442 bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2); 00443 }; 00444 00445 void Solver::swap_index(int32_t i, int32_t j) 00446 { 00447 Q->swap_index(i,j); 00448 CMath::swap(y[i],y[j]); 00449 CMath::swap(G[i],G[j]); 00450 CMath::swap(alpha_status[i],alpha_status[j]); 00451 CMath::swap(alpha[i],alpha[j]); 00452 CMath::swap(p[i],p[j]); 00453 CMath::swap(active_set[i],active_set[j]); 00454 CMath::swap(G_bar[i],G_bar[j]); 00455 } 00456 00457 void Solver::reconstruct_gradient() 00458 { 00459 // reconstruct inactive elements of G from G_bar and free variables 00460 00461 if(active_size == l) return; 00462 00463 int32_t i,j; 00464 int32_t nr_free = 0; 00465 00466 for(j=active_size;j<l;j++) 00467 G[j] = G_bar[j] + p[j]; 00468 00469 for(j=0;j<active_size;j++) 00470 if(is_free(j)) 00471 nr_free++; 00472 00473 if (nr_free*l > 2*active_size*(l-active_size)) 00474 { 00475 for(i=active_size;i<l;i++) 00476 { 00477 const Qfloat *Q_i = Q->get_Q(i,active_size); 00478 for(j=0;j<active_size;j++) 00479 if(is_free(j)) 00480 G[i] += alpha[j] * Q_i[j]; 00481 } 00482 } 00483 else 00484 { 00485 for(i=0;i<active_size;i++) 00486 if(is_free(i)) 00487 { 00488 const Qfloat *Q_i = Q->get_Q(i,l); 00489 float64_t alpha_i = alpha[i]; 00490 for(j=active_size;j<l;j++) 00491 G[j] += alpha_i * Q_i[j]; 00492 } 00493 } 00494 } 00495 00496 void Solver::Solve( 00497 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 00498 const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn, 00499 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 00500 { 00501 this->l = p_l; 00502 this->Q = &p_Q; 00503 QD=Q->get_QD(); 00504 clone(p, p_p,l); 00505 clone(y, p_y,l); 00506 clone(alpha,p_alpha,l); 00507 this->Cp = p_Cp; 00508 this->Cn = p_Cn; 00509 this->eps = p_eps; 00510 unshrink = false; 00511 00512 // initialize alpha_status 00513 { 00514 alpha_status = SG_MALLOC(char, l); 00515 for(int32_t i=0;i<l;i++) 00516 update_alpha_status(i); 00517 } 00518 00519 // initialize active set (for shrinking) 00520 { 00521 active_set = SG_MALLOC(int32_t, l); 00522 for(int32_t i=0;i<l;i++) 00523 active_set[i] = i; 00524 active_size = l; 00525 } 00526 00527 // initialize gradient 00528 CSignal::clear_cancel(); 00529 CTime start_time; 00530 { 00531 G = SG_MALLOC(float64_t, l); 00532 G_bar = SG_MALLOC(float64_t, l); 00533 int32_t i; 00534 for(i=0;i<l;i++) 00535 { 00536 G[i] = p_p[i]; 00537 G_bar[i] = 0; 00538 } 00539 SG_SINFO("Computing gradient for initial set of non-zero alphas\n"); 00540 //CMath::display_vector(alpha, l, "alphas"); 00541 for(i=0;i<l && !CSignal::cancel_computations(); i++) 00542 { 00543 if(!is_lower_bound(i)) 00544 { 00545 const Qfloat *Q_i = Q->get_Q(i,l); 00546 float64_t alpha_i = alpha[i]; 00547 int32_t j; 00548 for(j=0;j<l;j++) 00549 G[j] += alpha_i*Q_i[j]; 00550 if(is_upper_bound(i)) 00551 for(j=0;j<l;j++) 00552 G_bar[j] += get_C(i) * Q_i[j]; 00553 } 00554 SG_SPROGRESS(i, 0, l); 00555 } 00556 SG_SDONE(); 00557 } 00558 00559 // optimization step 00560 00561 int32_t iter = 0; 00562 int32_t counter = CMath::min(l,1000)+1; 00563 00564 while (!CSignal::cancel_computations()) 00565 { 00566 if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time) 00567 break; 00568 00569 // show progress and do shrinking 00570 00571 if(--counter == 0) 00572 { 00573 counter = CMath::min(l,1000); 00574 if(shrinking) do_shrinking(); 00575 //SG_SINFO("."); 00576 } 00577 00578 int32_t i,j; 00579 float64_t gap; 00580 if(select_working_set(i,j, gap)!=0) 00581 { 00582 // reconstruct the whole gradient 00583 reconstruct_gradient(); 00584 // reset active set size and check 00585 active_size = l; 00586 //SG_SINFO("*"); 00587 if(select_working_set(i,j, gap)!=0) 00588 break; 00589 else 00590 counter = 1; // do shrinking next iteration 00591 } 00592 00593 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00594 00595 ++iter; 00596 00597 // update alpha[i] and alpha[j], handle bounds carefully 00598 00599 const Qfloat *Q_i = Q->get_Q(i,active_size); 00600 const Qfloat *Q_j = Q->get_Q(j,active_size); 00601 00602 float64_t C_i = get_C(i); 00603 float64_t C_j = get_C(j); 00604 00605 float64_t old_alpha_i = alpha[i]; 00606 float64_t old_alpha_j = alpha[j]; 00607 00608 if (!use_bias) 00609 { 00610 double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j]; 00611 double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j]; 00612 double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j]; 00613 double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det; 00614 alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i)); 00615 double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det; 00616 alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j)); 00617 00618 if (alpha_i==0 || alpha_i == C_i) 00619 alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j])); 00620 if (alpha_j==0 || alpha_j == C_j) 00621 alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i])); 00622 00623 alpha[i]=alpha_i; alpha[j]=alpha_j; 00624 } 00625 else 00626 { 00627 if(y[i]!=y[j]) 00628 { 00629 float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; 00630 if (quad_coef <= 0) 00631 quad_coef = TAU; 00632 float64_t delta = (-G[i]-G[j])/quad_coef; 00633 float64_t diff = alpha[i] - alpha[j]; 00634 alpha[i] += delta; 00635 alpha[j] += delta; 00636 00637 if(diff > 0) 00638 { 00639 if(alpha[j] < 0) 00640 { 00641 alpha[j] = 0; 00642 alpha[i] = diff; 00643 } 00644 } 00645 else 00646 { 00647 if(alpha[i] < 0) 00648 { 00649 alpha[i] = 0; 00650 alpha[j] = -diff; 00651 } 00652 } 00653 if(diff > C_i - C_j) 00654 { 00655 if(alpha[i] > C_i) 00656 { 00657 alpha[i] = C_i; 00658 alpha[j] = C_i - diff; 00659 } 00660 } 00661 else 00662 { 00663 if(alpha[j] > C_j) 00664 { 00665 alpha[j] = C_j; 00666 alpha[i] = C_j + diff; 00667 } 00668 } 00669 } 00670 else 00671 { 00672 float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; 00673 if (quad_coef <= 0) 00674 quad_coef = TAU; 00675 float64_t delta = (G[i]-G[j])/quad_coef; 00676 float64_t sum = alpha[i] + alpha[j]; 00677 alpha[i] -= delta; 00678 alpha[j] += delta; 00679 00680 if(sum > C_i) 00681 { 00682 if(alpha[i] > C_i) 00683 { 00684 alpha[i] = C_i; 00685 alpha[j] = sum - C_i; 00686 } 00687 } 00688 else 00689 { 00690 if(alpha[j] < 0) 00691 { 00692 alpha[j] = 0; 00693 alpha[i] = sum; 00694 } 00695 } 00696 if(sum > C_j) 00697 { 00698 if(alpha[j] > C_j) 00699 { 00700 alpha[j] = C_j; 00701 alpha[i] = sum - C_j; 00702 } 00703 } 00704 else 00705 { 00706 if(alpha[i] < 0) 00707 { 00708 alpha[i] = 0; 00709 alpha[j] = sum; 00710 } 00711 } 00712 } 00713 } 00714 00715 // update G 00716 00717 float64_t delta_alpha_i = alpha[i] - old_alpha_i; 00718 float64_t delta_alpha_j = alpha[j] - old_alpha_j; 00719 00720 for(int32_t k=0;k<active_size;k++) 00721 { 00722 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; 00723 } 00724 00725 // update alpha_status and G_bar 00726 00727 { 00728 bool ui = is_upper_bound(i); 00729 bool uj = is_upper_bound(j); 00730 update_alpha_status(i); 00731 update_alpha_status(j); 00732 int32_t k; 00733 if(ui != is_upper_bound(i)) 00734 { 00735 Q_i = Q->get_Q(i,l); 00736 if(ui) 00737 for(k=0;k<l;k++) 00738 G_bar[k] -= C_i * Q_i[k]; 00739 else 00740 for(k=0;k<l;k++) 00741 G_bar[k] += C_i * Q_i[k]; 00742 } 00743 00744 if(uj != is_upper_bound(j)) 00745 { 00746 Q_j = Q->get_Q(j,l); 00747 if(uj) 00748 for(k=0;k<l;k++) 00749 G_bar[k] -= C_j * Q_j[k]; 00750 else 00751 for(k=0;k<l;k++) 00752 G_bar[k] += C_j * Q_j[k]; 00753 } 00754 } 00755 00756 #ifdef MCSVM_DEBUG 00757 // calculate objective value 00758 { 00759 float64_t v = 0; 00760 for(i=0;i<l;i++) 00761 v += alpha[i] * (G[i] + p[i]); 00762 00763 p_si->obj = v/2; 00764 00765 float64_t primal=0; 00766 //float64_t gap=100000; 00767 SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap); 00768 } 00769 #endif 00770 } 00771 00772 // calculate rho 00773 00774 if (!use_bias) 00775 p_si->rho = 0; 00776 else 00777 p_si->rho = calculate_rho(); 00778 00779 // calculate objective value 00780 { 00781 float64_t v = 0; 00782 int32_t i; 00783 for(i=0;i<l;i++) 00784 v += alpha[i] * (G[i] + p[i]); 00785 00786 p_si->obj = v/2; 00787 } 00788 00789 // put back the solution 00790 { 00791 for(int32_t i=0;i<l;i++) 00792 p_alpha[active_set[i]] = alpha[i]; 00793 } 00794 00795 p_si->upper_bound_p = Cp; 00796 p_si->upper_bound_n = Cn; 00797 00798 SG_SINFO("\noptimization finished, #iter = %d\n",iter); 00799 00800 SG_FREE(p); 00801 SG_FREE(y); 00802 SG_FREE(alpha); 00803 SG_FREE(alpha_status); 00804 SG_FREE(active_set); 00805 SG_FREE(G); 00806 SG_FREE(G_bar); 00807 } 00808 00809 // return 1 if already optimal, return 0 otherwise 00810 int32_t Solver::select_working_set( 00811 int32_t &out_i, int32_t &out_j, float64_t &gap) 00812 { 00813 // return i,j such that 00814 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 00815 // j: minimizes the decrease of obj value 00816 // (if quadratic coefficient <= 0, replace it with tau) 00817 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 00818 00819 float64_t Gmax = -INF; 00820 float64_t Gmax2 = -INF; 00821 int32_t Gmax_idx = -1; 00822 int32_t Gmin_idx = -1; 00823 float64_t obj_diff_min = INF; 00824 00825 for(int32_t t=0;t<active_size;t++) 00826 if(y[t]==+1) 00827 { 00828 if(!is_upper_bound(t)) 00829 if(-G[t] >= Gmax) 00830 { 00831 Gmax = -G[t]; 00832 Gmax_idx = t; 00833 } 00834 } 00835 else 00836 { 00837 if(!is_lower_bound(t)) 00838 if(G[t] >= Gmax) 00839 { 00840 Gmax = G[t]; 00841 Gmax_idx = t; 00842 } 00843 } 00844 00845 int32_t i = Gmax_idx; 00846 const Qfloat *Q_i = NULL; 00847 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 00848 Q_i = Q->get_Q(i,active_size); 00849 00850 for(int32_t j=0;j<active_size;j++) 00851 { 00852 if(y[j]==+1) 00853 { 00854 if (!is_lower_bound(j)) 00855 { 00856 float64_t grad_diff=Gmax+G[j]; 00857 if (G[j] >= Gmax2) 00858 Gmax2 = G[j]; 00859 if (grad_diff > 0) 00860 { 00861 float64_t obj_diff; 00862 float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j]; 00863 if (quad_coef > 0) 00864 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00865 else 00866 obj_diff = -(grad_diff*grad_diff)/TAU; 00867 00868 if (obj_diff <= obj_diff_min) 00869 { 00870 Gmin_idx=j; 00871 obj_diff_min = obj_diff; 00872 } 00873 } 00874 } 00875 } 00876 else 00877 { 00878 if (!is_upper_bound(j)) 00879 { 00880 float64_t grad_diff= Gmax-G[j]; 00881 if (-G[j] >= Gmax2) 00882 Gmax2 = -G[j]; 00883 if (grad_diff > 0) 00884 { 00885 float64_t obj_diff; 00886 float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j]; 00887 if (quad_coef > 0) 00888 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00889 else 00890 obj_diff = -(grad_diff*grad_diff)/TAU; 00891 00892 if (obj_diff <= obj_diff_min) 00893 { 00894 Gmin_idx=j; 00895 obj_diff_min = obj_diff; 00896 } 00897 } 00898 } 00899 } 00900 } 00901 00902 gap=Gmax+Gmax2; 00903 if(gap < eps) 00904 return 1; 00905 00906 out_i = Gmax_idx; 00907 out_j = Gmin_idx; 00908 return 0; 00909 } 00910 00911 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2) 00912 { 00913 if(is_upper_bound(i)) 00914 { 00915 if(y[i]==+1) 00916 return(-G[i] > Gmax1); 00917 else 00918 return(-G[i] > Gmax2); 00919 } 00920 else if(is_lower_bound(i)) 00921 { 00922 if(y[i]==+1) 00923 return(G[i] > Gmax2); 00924 else 00925 return(G[i] > Gmax1); 00926 } 00927 else 00928 return(false); 00929 } 00930 00931 00932 void Solver::do_shrinking() 00933 { 00934 int32_t i; 00935 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } 00936 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } 00937 00938 // find maximal violating pair first 00939 for(i=0;i<active_size;i++) 00940 { 00941 if(y[i]==+1) 00942 { 00943 if(!is_upper_bound(i)) 00944 { 00945 if(-G[i] >= Gmax1) 00946 Gmax1 = -G[i]; 00947 } 00948 if(!is_lower_bound(i)) 00949 { 00950 if(G[i] >= Gmax2) 00951 Gmax2 = G[i]; 00952 } 00953 } 00954 else 00955 { 00956 if(!is_upper_bound(i)) 00957 { 00958 if(-G[i] >= Gmax2) 00959 Gmax2 = -G[i]; 00960 } 00961 if(!is_lower_bound(i)) 00962 { 00963 if(G[i] >= Gmax1) 00964 Gmax1 = G[i]; 00965 } 00966 } 00967 } 00968 00969 if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 00970 { 00971 unshrink = true; 00972 reconstruct_gradient(); 00973 active_size = l; 00974 } 00975 00976 for(i=0;i<active_size;i++) 00977 if (be_shrunk(i, Gmax1, Gmax2)) 00978 { 00979 active_size--; 00980 while (active_size > i) 00981 { 00982 if (!be_shrunk(active_size, Gmax1, Gmax2)) 00983 { 00984 swap_index(i,active_size); 00985 break; 00986 } 00987 active_size--; 00988 } 00989 } 00990 } 00991 00992 float64_t Solver::calculate_rho() 00993 { 00994 float64_t r; 00995 int32_t nr_free = 0; 00996 float64_t ub = INF, lb = -INF, sum_free = 0; 00997 for(int32_t i=0;i<active_size;i++) 00998 { 00999 float64_t yG = y[i]*G[i]; 01000 01001 if(is_upper_bound(i)) 01002 { 01003 if(y[i]==-1) 01004 ub = CMath::min(ub,yG); 01005 else 01006 lb = CMath::max(lb,yG); 01007 } 01008 else if(is_lower_bound(i)) 01009 { 01010 if(y[i]==+1) 01011 ub = CMath::min(ub,yG); 01012 else 01013 lb = CMath::max(lb,yG); 01014 } 01015 else 01016 { 01017 ++nr_free; 01018 sum_free += yG; 01019 } 01020 } 01021 01022 if(nr_free>0) 01023 r = sum_free/nr_free; 01024 else 01025 r = (ub+lb)/2; 01026 01027 return r; 01028 } 01029 01030 01031 // 01032 //Solve with individually weighted examples 01033 // 01034 class WeightedSolver : public Solver 01035 { 01036 01037 public: 01038 01039 WeightedSolver(float64_t* cost_vec) 01040 { 01041 01042 this->Cs = cost_vec; 01043 01044 } 01045 01046 virtual float64_t get_C(int32_t i) 01047 { 01048 01049 return Cs[i]; 01050 } 01051 01052 protected: 01053 01054 float64_t* Cs; 01055 01056 }; 01057 01058 01059 // 01060 // Solver for nu-svm classification and regression 01061 // 01062 // additional constraint: e^T \alpha = constant 01063 // 01064 class Solver_NU : public Solver 01065 { 01066 public: 01067 Solver_NU() {} 01068 void Solve( 01069 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 01070 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 01071 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 01072 { 01073 this->si = p_si; 01074 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si, 01075 shrinking,use_bias); 01076 } 01077 private: 01078 SolutionInfo *si; 01079 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 01080 float64_t calculate_rho(); 01081 bool be_shrunk( 01082 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01083 float64_t Gmax4); 01084 void do_shrinking(); 01085 }; 01086 01087 // return 1 if already optimal, return 0 otherwise 01088 int32_t Solver_NU::select_working_set( 01089 int32_t &out_i, int32_t &out_j, float64_t &gap) 01090 { 01091 // return i,j such that y_i = y_j and 01092 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01093 // j: minimizes the decrease of obj value 01094 // (if quadratic coefficient <= 0, replace it with tau) 01095 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01096 01097 float64_t Gmaxp = -INF; 01098 float64_t Gmaxp2 = -INF; 01099 int32_t Gmaxp_idx = -1; 01100 01101 float64_t Gmaxn = -INF; 01102 float64_t Gmaxn2 = -INF; 01103 int32_t Gmaxn_idx = -1; 01104 01105 int32_t Gmin_idx = -1; 01106 float64_t obj_diff_min = INF; 01107 01108 for(int32_t t=0;t<active_size;t++) 01109 if(y[t]==+1) 01110 { 01111 if(!is_upper_bound(t)) 01112 if(-G[t] >= Gmaxp) 01113 { 01114 Gmaxp = -G[t]; 01115 Gmaxp_idx = t; 01116 } 01117 } 01118 else 01119 { 01120 if(!is_lower_bound(t)) 01121 if(G[t] >= Gmaxn) 01122 { 01123 Gmaxn = G[t]; 01124 Gmaxn_idx = t; 01125 } 01126 } 01127 01128 int32_t ip = Gmaxp_idx; 01129 int32_t in = Gmaxn_idx; 01130 const Qfloat *Q_ip = NULL; 01131 const Qfloat *Q_in = NULL; 01132 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01133 Q_ip = Q->get_Q(ip,active_size); 01134 if(in != -1) 01135 Q_in = Q->get_Q(in,active_size); 01136 01137 for(int32_t j=0;j<active_size;j++) 01138 { 01139 if(y[j]==+1) 01140 { 01141 if (!is_lower_bound(j)) 01142 { 01143 float64_t grad_diff=Gmaxp+G[j]; 01144 if (G[j] >= Gmaxp2) 01145 Gmaxp2 = G[j]; 01146 if (grad_diff > 0) 01147 { 01148 float64_t obj_diff; 01149 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01150 if (quad_coef > 0) 01151 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01152 else 01153 obj_diff = -(grad_diff*grad_diff)/TAU; 01154 01155 if (obj_diff <= obj_diff_min) 01156 { 01157 Gmin_idx=j; 01158 obj_diff_min = obj_diff; 01159 } 01160 } 01161 } 01162 } 01163 else 01164 { 01165 if (!is_upper_bound(j)) 01166 { 01167 float64_t grad_diff=Gmaxn-G[j]; 01168 if (-G[j] >= Gmaxn2) 01169 Gmaxn2 = -G[j]; 01170 if (grad_diff > 0) 01171 { 01172 float64_t obj_diff; 01173 float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j]; 01174 if (quad_coef > 0) 01175 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01176 else 01177 obj_diff = -(grad_diff*grad_diff)/TAU; 01178 01179 if (obj_diff <= obj_diff_min) 01180 { 01181 Gmin_idx=j; 01182 obj_diff_min = obj_diff; 01183 } 01184 } 01185 } 01186 } 01187 } 01188 01189 gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2); 01190 if(gap < eps) 01191 return 1; 01192 01193 if (y[Gmin_idx] == +1) 01194 out_i = Gmaxp_idx; 01195 else 01196 out_i = Gmaxn_idx; 01197 out_j = Gmin_idx; 01198 01199 return 0; 01200 } 01201 01202 bool Solver_NU::be_shrunk( 01203 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01204 float64_t Gmax4) 01205 { 01206 if(is_upper_bound(i)) 01207 { 01208 if(y[i]==+1) 01209 return(-G[i] > Gmax1); 01210 else 01211 return(-G[i] > Gmax4); 01212 } 01213 else if(is_lower_bound(i)) 01214 { 01215 if(y[i]==+1) 01216 return(G[i] > Gmax2); 01217 else 01218 return(G[i] > Gmax3); 01219 } 01220 else 01221 return(false); 01222 } 01223 01224 void Solver_NU::do_shrinking() 01225 { 01226 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } 01227 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } 01228 float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } 01229 float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } 01230 01231 // find maximal violating pair first 01232 int32_t i; 01233 for(i=0;i<active_size;i++) 01234 { 01235 if(!is_upper_bound(i)) 01236 { 01237 if(y[i]==+1) 01238 { 01239 if(-G[i] > Gmax1) Gmax1 = -G[i]; 01240 } 01241 else if(-G[i] > Gmax4) Gmax4 = -G[i]; 01242 } 01243 if(!is_lower_bound(i)) 01244 { 01245 if(y[i]==+1) 01246 { 01247 if(G[i] > Gmax2) Gmax2 = G[i]; 01248 } 01249 else if(G[i] > Gmax3) Gmax3 = G[i]; 01250 } 01251 } 01252 01253 if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 01254 { 01255 unshrink = true; 01256 reconstruct_gradient(); 01257 active_size = l; 01258 } 01259 01260 for(i=0;i<active_size;i++) 01261 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4)) 01262 { 01263 active_size--; 01264 while (active_size > i) 01265 { 01266 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) 01267 { 01268 swap_index(i,active_size); 01269 break; 01270 } 01271 active_size--; 01272 } 01273 } 01274 } 01275 01276 float64_t Solver_NU::calculate_rho() 01277 { 01278 int32_t nr_free1 = 0,nr_free2 = 0; 01279 float64_t ub1 = INF, ub2 = INF; 01280 float64_t lb1 = -INF, lb2 = -INF; 01281 float64_t sum_free1 = 0, sum_free2 = 0; 01282 01283 for(int32_t i=0; i<active_size; i++) 01284 { 01285 if(y[i]==+1) 01286 { 01287 if(is_upper_bound(i)) 01288 lb1 = CMath::max(lb1,G[i]); 01289 else if(is_lower_bound(i)) 01290 ub1 = CMath::min(ub1,G[i]); 01291 else 01292 { 01293 ++nr_free1; 01294 sum_free1 += G[i]; 01295 } 01296 } 01297 else 01298 { 01299 if(is_upper_bound(i)) 01300 lb2 = CMath::max(lb2,G[i]); 01301 else if(is_lower_bound(i)) 01302 ub2 = CMath::min(ub2,G[i]); 01303 else 01304 { 01305 ++nr_free2; 01306 sum_free2 += G[i]; 01307 } 01308 } 01309 } 01310 01311 float64_t r1,r2; 01312 if(nr_free1 > 0) 01313 r1 = sum_free1/nr_free1; 01314 else 01315 r1 = (ub1+lb1)/2; 01316 01317 if(nr_free2 > 0) 01318 r2 = sum_free2/nr_free2; 01319 else 01320 r2 = (ub2+lb2)/2; 01321 01322 si->r = (r1+r2)/2; 01323 return (r1-r2)/2; 01324 } 01325 01326 class SVC_QMC: public LibSVMKernel 01327 { 01328 public: 01329 SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac) 01330 :LibSVMKernel(prob.l, prob.x, param) 01331 { 01332 nr_class=n_class; 01333 factor=fac; 01334 clone(y,y_,prob.l); 01335 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01336 QD = SG_MALLOC(Qfloat, prob.l); 01337 for(int32_t i=0;i<prob.l;i++) 01338 { 01339 QD[i]= factor*(nr_class-1)*kernel_function(i,i); 01340 } 01341 } 01342 01343 Qfloat *get_Q(int32_t i, int32_t len) const 01344 { 01345 Qfloat *data; 01346 int32_t start; 01347 if((start = cache->get_data(i,&data,len)) < len) 01348 { 01349 compute_Q_parallel(data, NULL, i, start, len); 01350 01351 for(int32_t j=start;j<len;j++) 01352 { 01353 if (y[i]==y[j]) 01354 data[j] *= (factor*(nr_class-1)); 01355 else 01356 data[j] *= (-factor); 01357 } 01358 } 01359 return data; 01360 } 01361 01362 inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j) 01363 { 01364 if (y[i]==y[j]) 01365 return Q/(nr_class-1); 01366 else 01367 return -Q; 01368 } 01369 01370 Qfloat *get_QD() const 01371 { 01372 return QD; 01373 } 01374 01375 void swap_index(int32_t i, int32_t j) const 01376 { 01377 cache->swap_index(i,j); 01378 LibSVMKernel::swap_index(i,j); 01379 CMath::swap(y[i],y[j]); 01380 CMath::swap(QD[i],QD[j]); 01381 } 01382 01383 ~SVC_QMC() 01384 { 01385 SG_FREE(y); 01386 delete cache; 01387 SG_FREE(QD); 01388 } 01389 private: 01390 float64_t factor; 01391 float64_t nr_class; 01392 schar *y; 01393 Cache *cache; 01394 Qfloat *QD; 01395 }; 01396 01397 // 01398 // Solver for nu-svm classification and regression 01399 // 01400 // additional constraint: e^T \alpha = constant 01401 // 01402 class Solver_NUMC : public Solver 01403 { 01404 public: 01405 Solver_NUMC(int32_t n_class, float64_t svm_nu) 01406 { 01407 nr_class=n_class; 01408 nu=svm_nu; 01409 } 01410 01411 void Solve( 01412 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 01413 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 01414 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 01415 { 01416 this->si = p_si; 01417 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias); 01418 } 01419 float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw); 01420 01421 private: 01422 SolutionInfo *si; 01423 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 01424 float64_t calculate_rho(); 01425 bool be_shrunk( 01426 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01427 float64_t Gmax4); 01428 void do_shrinking(); 01429 01430 private: 01431 int32_t nr_class; 01432 float64_t nu; 01433 }; 01434 01435 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw) 01436 { 01437 clone(y, p_y,l); 01438 clone(alpha,p_alpha,l); 01439 01440 alpha_status = SG_MALLOC(char, l); 01441 for(int32_t i=0;i<l;i++) 01442 update_alpha_status(i); 01443 01444 float64_t* class_count = SG_MALLOC(float64_t, nr_class); 01445 float64_t* outputs = SG_MALLOC(float64_t, l); 01446 01447 for (int32_t i=0; i<nr_class; i++) 01448 { 01449 class_count[i]=0; 01450 biases[i+1]=0; 01451 } 01452 01453 for (int32_t i=0; i<active_size; i++) 01454 { 01455 update_alpha_status(i); 01456 if(!is_upper_bound(i) && !is_lower_bound(i)) 01457 class_count[(int32_t) y[i]]++; 01458 } 01459 01460 //CMath::display_vector(class_count, nr_class, "class_count"); 01461 01462 float64_t mu=((float64_t) nr_class)/(nu*l); 01463 //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu); 01464 01465 float64_t rho=0; 01466 float64_t quad=0; 01467 float64_t* zero_counts = SG_MALLOC(float64_t, nr_class); 01468 float64_t normwc_const = 0; 01469 01470 for (int32_t i=0; i<nr_class; i++) 01471 { 01472 zero_counts[i]=-INF; 01473 normwcw[i]=0; 01474 } 01475 01476 for (int32_t i=0; i<active_size; i++) 01477 { 01478 float64_t sum_free=0; 01479 float64_t sum_atbound=0; 01480 float64_t sum_zero_count=0; 01481 01482 Qfloat* Q_i = Q->get_Q(i,active_size); 01483 outputs[i]=0; 01484 01485 for (int j=0; j<active_size; j++) 01486 { 01487 quad+= alpha[i]*alpha[j]*Q_i[j]; 01488 float64_t tmp= alpha[j]*Q_i[j]/mu; 01489 01490 if(!is_upper_bound(i) && !is_lower_bound(i)) 01491 sum_free+=tmp; 01492 else 01493 sum_atbound+=tmp; 01494 01495 if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i]) 01496 sum_zero_count+= tmp; 01497 01498 SVC_QMC* QMC=(SVC_QMC*) Q; 01499 float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j); 01500 if (y[i]==y[j]) 01501 normwcw[(int32_t) y[i]]+=norm_tmp; 01502 01503 normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp; 01504 normwc_const+=norm_tmp; 01505 } 01506 01507 if (class_count[(int32_t) y[i]] == 0) 01508 { 01509 if (zero_counts[(int32_t) y[i]]<sum_zero_count) 01510 zero_counts[(int32_t) y[i]]=sum_zero_count; 01511 } 01512 01513 biases[(int32_t) y[i]+1]-=sum_free; 01514 if (class_count[(int32_t) y[i]] != 0.0) 01515 rho+=sum_free/class_count[(int32_t) y[i]]; 01516 outputs[i]+=sum_free+sum_atbound; 01517 } 01518 01519 for (int32_t i=0; i<nr_class; i++) 01520 { 01521 if (class_count[i] == 0.0) 01522 rho+=zero_counts[i]; 01523 01524 normwcw[i]+=normwc_const/CMath::sq(nr_class); 01525 normwcw[i]=CMath::sqrt(normwcw[i]); 01526 } 01527 01528 CMath::display_vector(normwcw, nr_class, "normwcw"); 01529 01530 rho/=nr_class; 01531 01532 SG_SPRINT("rho=%f\n", rho); 01533 01534 float64_t sumb=0; 01535 for (int32_t i=0; i<nr_class; i++) 01536 { 01537 if (class_count[i] != 0.0) 01538 biases[i+1]=biases[i+1]/class_count[i]+rho; 01539 else 01540 biases[i+1]+=rho-zero_counts[i]; 01541 01542 SG_SPRINT("biases=%f\n", biases[i+1]); 01543 01544 sumb+=biases[i+1]; 01545 } 01546 SG_SPRINT("sumb=%f\n", sumb); 01547 01548 SG_FREE(zero_counts); 01549 01550 for (int32_t i=0; i<l; i++) 01551 outputs[i]+=biases[(int32_t) y[i]+1]; 01552 01553 biases[0]=rho; 01554 01555 //CMath::display_vector(outputs, l, "outputs"); 01556 01557 01558 float64_t xi=0; 01559 for (int32_t i=0; i<active_size; i++) 01560 { 01561 if (is_lower_bound(i)) 01562 continue; 01563 xi+=rho-outputs[i]; 01564 } 01565 01566 //SG_SPRINT("xi=%f\n", xi); 01567 01568 //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu); 01569 01570 float64_t primal=0.5*quad- nr_class*rho+xi*mu; 01571 01572 //SG_SPRINT("primal=%10.10f\n", primal); 01573 01574 SG_FREE(y); 01575 SG_FREE(alpha); 01576 SG_FREE(alpha_status); 01577 01578 return primal; 01579 } 01580 01581 01582 // return 1 if already optimal, return 0 otherwise 01583 int32_t Solver_NUMC::select_working_set( 01584 int32_t &out_i, int32_t &out_j, float64_t &gap) 01585 { 01586 // return i,j such that y_i = y_j and 01587 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01588 // j: minimizes the decrease of obj value 01589 // (if quadratic coefficient <= 0, replace it with tau) 01590 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01591 01592 int32_t retval=0; 01593 float64_t best_gap=0; 01594 int32_t best_out_i=-1; 01595 int32_t best_out_j=-1; 01596 01597 float64_t* Gmaxp = SG_MALLOC(float64_t, nr_class); 01598 float64_t* Gmaxp2 = SG_MALLOC(float64_t, nr_class); 01599 int32_t* Gmaxp_idx = SG_MALLOC(int32_t, nr_class); 01600 01601 int32_t* Gmin_idx = SG_MALLOC(int32_t, nr_class); 01602 float64_t* obj_diff_min = SG_MALLOC(float64_t, nr_class); 01603 01604 for (int32_t i=0; i<nr_class; i++) 01605 { 01606 Gmaxp[i]=-INF; 01607 Gmaxp2[i]=-INF; 01608 Gmaxp_idx[i]=-1; 01609 Gmin_idx[i]=-1; 01610 obj_diff_min[i]=INF; 01611 } 01612 01613 for(int32_t t=0;t<active_size;t++) 01614 { 01615 int32_t cidx=y[t]; 01616 if(!is_upper_bound(t)) 01617 { 01618 if(-G[t] >= Gmaxp[cidx]) 01619 { 01620 Gmaxp[cidx] = -G[t]; 01621 Gmaxp_idx[cidx] = t; 01622 } 01623 } 01624 } 01625 01626 for(int32_t j=0;j<active_size;j++) 01627 { 01628 int32_t cidx=y[j]; 01629 int32_t ip = Gmaxp_idx[cidx]; 01630 const Qfloat *Q_ip = NULL; 01631 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01632 Q_ip = Q->get_Q(ip,active_size); 01633 01634 if (!is_lower_bound(j)) 01635 { 01636 float64_t grad_diff=Gmaxp[cidx]+G[j]; 01637 if (G[j] >= Gmaxp2[cidx]) 01638 Gmaxp2[cidx] = G[j]; 01639 if (grad_diff > 0) 01640 { 01641 float64_t obj_diff; 01642 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01643 if (quad_coef > 0) 01644 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01645 else 01646 obj_diff = -(grad_diff*grad_diff)/TAU; 01647 01648 if (obj_diff <= obj_diff_min[cidx]) 01649 { 01650 Gmin_idx[cidx]=j; 01651 obj_diff_min[cidx] = obj_diff; 01652 } 01653 } 01654 } 01655 01656 gap=Gmaxp[cidx]+Gmaxp2[cidx]; 01657 if (gap>=best_gap && Gmin_idx[cidx]>=0 && 01658 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size) 01659 { 01660 out_i = Gmaxp_idx[cidx]; 01661 out_j = Gmin_idx[cidx]; 01662 01663 best_gap=gap; 01664 best_out_i=out_i; 01665 best_out_j=out_j; 01666 } 01667 } 01668 01669 gap=best_gap; 01670 out_i=best_out_i; 01671 out_j=best_out_j; 01672 01673 SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]); 01674 01675 01676 if(gap < eps) 01677 retval=1; 01678 01679 SG_FREE(Gmaxp); 01680 SG_FREE(Gmaxp2); 01681 SG_FREE(Gmaxp_idx); 01682 SG_FREE(Gmin_idx); 01683 SG_FREE(obj_diff_min); 01684 01685 return retval; 01686 } 01687 01688 bool Solver_NUMC::be_shrunk( 01689 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01690 float64_t Gmax4) 01691 { 01692 return false; 01693 } 01694 01695 void Solver_NUMC::do_shrinking() 01696 { 01697 } 01698 01699 float64_t Solver_NUMC::calculate_rho() 01700 { 01701 return 0; 01702 } 01703 01704 01705 // 01706 // Q matrices for various formulations 01707 // 01708 class SVC_Q: public LibSVMKernel 01709 { 01710 public: 01711 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) 01712 :LibSVMKernel(prob.l, prob.x, param) 01713 { 01714 clone(y,y_,prob.l); 01715 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01716 QD = SG_MALLOC(Qfloat, prob.l); 01717 for(int32_t i=0;i<prob.l;i++) 01718 QD[i]= (Qfloat)kernel_function(i,i); 01719 } 01720 01721 Qfloat *get_Q(int32_t i, int32_t len) const 01722 { 01723 Qfloat *data; 01724 int32_t start; 01725 if((start = cache->get_data(i,&data,len)) < len) 01726 compute_Q_parallel(data, y, i, start, len); 01727 01728 return data; 01729 } 01730 01731 Qfloat *get_QD() const 01732 { 01733 return QD; 01734 } 01735 01736 void swap_index(int32_t i, int32_t j) const 01737 { 01738 cache->swap_index(i,j); 01739 LibSVMKernel::swap_index(i,j); 01740 CMath::swap(y[i],y[j]); 01741 CMath::swap(QD[i],QD[j]); 01742 } 01743 01744 ~SVC_Q() 01745 { 01746 SG_FREE(y); 01747 delete cache; 01748 SG_FREE(QD); 01749 } 01750 private: 01751 schar *y; 01752 Cache *cache; 01753 Qfloat *QD; 01754 }; 01755 01756 01757 class ONE_CLASS_Q: public LibSVMKernel 01758 { 01759 public: 01760 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) 01761 :LibSVMKernel(prob.l, prob.x, param) 01762 { 01763 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01764 QD = SG_MALLOC(Qfloat, prob.l); 01765 for(int32_t i=0;i<prob.l;i++) 01766 QD[i]= (Qfloat)kernel_function(i,i); 01767 } 01768 01769 Qfloat *get_Q(int32_t i, int32_t len) const 01770 { 01771 Qfloat *data; 01772 int32_t start; 01773 if((start = cache->get_data(i,&data,len)) < len) 01774 compute_Q_parallel(data, NULL, i, start, len); 01775 01776 return data; 01777 } 01778 01779 Qfloat *get_QD() const 01780 { 01781 return QD; 01782 } 01783 01784 void swap_index(int32_t i, int32_t j) const 01785 { 01786 cache->swap_index(i,j); 01787 LibSVMKernel::swap_index(i,j); 01788 CMath::swap(QD[i],QD[j]); 01789 } 01790 01791 ~ONE_CLASS_Q() 01792 { 01793 delete cache; 01794 SG_FREE(QD); 01795 } 01796 private: 01797 Cache *cache; 01798 Qfloat *QD; 01799 }; 01800 01801 class SVR_Q: public LibSVMKernel 01802 { 01803 public: 01804 SVR_Q(const svm_problem& prob, const svm_parameter& param) 01805 :LibSVMKernel(prob.l, prob.x, param) 01806 { 01807 l = prob.l; 01808 cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20))); 01809 QD = SG_MALLOC(Qfloat, 2*l); 01810 sign = SG_MALLOC(schar, 2*l); 01811 index = SG_MALLOC(int32_t, 2*l); 01812 for(int32_t k=0;k<l;k++) 01813 { 01814 sign[k] = 1; 01815 sign[k+l] = -1; 01816 index[k] = k; 01817 index[k+l] = k; 01818 QD[k]= (Qfloat)kernel_function(k,k); 01819 QD[k+l]=QD[k]; 01820 } 01821 buffer[0] = SG_MALLOC(Qfloat, 2*l); 01822 buffer[1] = SG_MALLOC(Qfloat, 2*l); 01823 next_buffer = 0; 01824 } 01825 01826 void swap_index(int32_t i, int32_t j) const 01827 { 01828 CMath::swap(sign[i],sign[j]); 01829 CMath::swap(index[i],index[j]); 01830 CMath::swap(QD[i],QD[j]); 01831 } 01832 01833 Qfloat *get_Q(int32_t i, int32_t len) const 01834 { 01835 Qfloat *data; 01836 int32_t real_i = index[i]; 01837 if(cache->get_data(real_i,&data,l) < l) 01838 compute_Q_parallel(data, NULL, real_i, 0, l); 01839 01840 // reorder and copy 01841 Qfloat *buf = buffer[next_buffer]; 01842 next_buffer = 1 - next_buffer; 01843 schar si = sign[i]; 01844 for(int32_t j=0;j<len;j++) 01845 buf[j] = si * sign[j] * data[index[j]]; 01846 return buf; 01847 } 01848 01849 Qfloat *get_QD() const 01850 { 01851 return QD; 01852 } 01853 01854 ~SVR_Q() 01855 { 01856 delete cache; 01857 SG_FREE(sign); 01858 SG_FREE(index); 01859 SG_FREE(buffer[0]); 01860 SG_FREE(buffer[1]); 01861 SG_FREE(QD); 01862 } 01863 01864 private: 01865 int32_t l; 01866 Cache *cache; 01867 schar *sign; 01868 int32_t *index; 01869 mutable int32_t next_buffer; 01870 Qfloat *buffer[2]; 01871 Qfloat *QD; 01872 }; 01873 01874 // 01875 // construct and solve various formulations 01876 // 01877 static void solve_c_svc( 01878 const svm_problem *prob, const svm_parameter* param, 01879 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01880 { 01881 int32_t l = prob->l; 01882 schar *y = SG_MALLOC(schar, l); 01883 01884 int32_t i; 01885 01886 for(i=0;i<l;i++) 01887 { 01888 alpha[i] = 0; 01889 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01890 } 01891 01892 Solver s; 01893 s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y, 01894 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01895 01896 float64_t sum_alpha=0; 01897 for(i=0;i<l;i++) 01898 sum_alpha += alpha[i]; 01899 01900 if (Cp==Cn) 01901 SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l)); 01902 01903 for(i=0;i<l;i++) 01904 alpha[i] *= y[i]; 01905 01906 SG_FREE(y); 01907 } 01908 01909 01910 //two weighted datasets 01911 void solve_c_svc_weighted( 01912 const svm_problem *prob, const svm_parameter* param, 01913 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01914 { 01915 int l = prob->l; 01916 float64_t *minus_ones = SG_MALLOC(float64_t, l); 01917 schar *y = SG_MALLOC(schar, l); 01918 01919 int i; 01920 01921 for(i=0;i<l;i++) 01922 { 01923 alpha[i] = 0; 01924 minus_ones[i] = -1; 01925 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01926 } 01927 01928 WeightedSolver s = WeightedSolver(prob->C); 01929 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, 01930 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01931 01932 float64_t sum_alpha=0; 01933 for(i=0;i<l;i++) 01934 sum_alpha += alpha[i]; 01935 01936 //if (Cp==Cn) 01937 // SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l)); 01938 01939 for(i=0;i<l;i++) 01940 alpha[i] *= y[i]; 01941 01942 SG_FREE(minus_ones); 01943 SG_FREE(y); 01944 } 01945 01946 static void solve_nu_svc( 01947 const svm_problem *prob, const svm_parameter *param, 01948 float64_t *alpha, Solver::SolutionInfo* si) 01949 { 01950 int32_t i; 01951 int32_t l = prob->l; 01952 float64_t nu = param->nu; 01953 01954 schar *y = SG_MALLOC(schar, l); 01955 01956 for(i=0;i<l;i++) 01957 if(prob->y[i]>0) 01958 y[i] = +1; 01959 else 01960 y[i] = -1; 01961 01962 float64_t sum_pos = nu*l/2; 01963 float64_t sum_neg = nu*l/2; 01964 01965 for(i=0;i<l;i++) 01966 if(y[i] == +1) 01967 { 01968 alpha[i] = CMath::min(1.0,sum_pos); 01969 sum_pos -= alpha[i]; 01970 } 01971 else 01972 { 01973 alpha[i] = CMath::min(1.0,sum_neg); 01974 sum_neg -= alpha[i]; 01975 } 01976 01977 float64_t *zeros = SG_MALLOC(float64_t, l); 01978 01979 for(i=0;i<l;i++) 01980 zeros[i] = 0; 01981 01982 Solver_NU s; 01983 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, 01984 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 01985 float64_t r = si->r; 01986 01987 SG_SINFO("C = %f\n",1/r); 01988 01989 for(i=0;i<l;i++) 01990 alpha[i] *= y[i]/r; 01991 01992 si->rho /= r; 01993 si->obj /= (r*r); 01994 si->upper_bound_p = 1/r; 01995 si->upper_bound_n = 1/r; 01996 01997 SG_FREE(y); 01998 SG_FREE(zeros); 01999 } 02000 02001 static void solve_nu_multiclass_svc(const svm_problem *prob, 02002 const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model) 02003 { 02004 int32_t l = prob->l; 02005 float64_t nu = param->nu; 02006 02007 float64_t *alpha = SG_MALLOC(float64_t, prob->l); 02008 schar *y = SG_MALLOC(schar, l); 02009 02010 for(int32_t i=0;i<l;i++) 02011 { 02012 alpha[i] = 0; 02013 y[i]=prob->y[i]; 02014 } 02015 02016 int32_t nr_class=param->nr_class; 02017 float64_t* sum_class = SG_MALLOC(float64_t, nr_class); 02018 02019 for (int32_t j=0; j<nr_class; j++) 02020 sum_class[j] = nu*l/nr_class; 02021 02022 for(int32_t i=0;i<l;i++) 02023 { 02024 alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]); 02025 sum_class[int32_t(y[i])] -= alpha[i]; 02026 } 02027 SG_FREE(sum_class); 02028 02029 02030 float64_t *zeros = SG_MALLOC(float64_t, l); 02031 02032 for (int32_t i=0;i<l;i++) 02033 zeros[i] = 0; 02034 02035 Solver_NUMC s(nr_class, nu); 02036 SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l)); 02037 02038 s.Solve(l, Q, zeros, y, 02039 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 02040 02041 02042 int32_t* class_sv_count=SG_MALLOC(int32_t, nr_class); 02043 02044 for (int32_t i=0; i<nr_class; i++) 02045 class_sv_count[i]=0; 02046 02047 for (int32_t i=0; i<l; i++) 02048 { 02049 if (CMath::abs(alpha[i]) > 0) 02050 class_sv_count[(int32_t) y[i]]++; 02051 } 02052 02053 model->l=l; 02054 // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper 02055 model->rho = SG_MALLOC(float64_t, nr_class+1); 02056 model->nr_class = nr_class; 02057 model->label = NULL; 02058 model->SV = SG_MALLOC(svm_node*,nr_class); 02059 model->nSV = SG_MALLOC(int32_t, nr_class); 02060 model->sv_coef = SG_MALLOC(float64_t *,nr_class); 02061 model->normwcw = SG_MALLOC(float64_t,nr_class); 02062 02063 for (int32_t i=0; i<nr_class+1; i++) 02064 model->rho[i]=0; 02065 02066 model->objective = si->obj; 02067 02068 if (param->use_bias) 02069 { 02070 SG_SDEBUG("Computing biases and primal objective\n"); 02071 float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw); 02072 SG_SINFO("Primal = %10.10f\n", primal); 02073 } 02074 02075 for (int32_t i=0; i<nr_class; i++) 02076 { 02077 model->nSV[i]=class_sv_count[i]; 02078 model->SV[i] = SG_MALLOC(svm_node,class_sv_count[i]); 02079 model->sv_coef[i] = SG_MALLOC(float64_t,class_sv_count[i]); 02080 class_sv_count[i]=0; 02081 } 02082 02083 for (int32_t i=0;i<prob->l;i++) 02084 { 02085 if(fabs(alpha[i]) > 0) 02086 { 02087 model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index; 02088 model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i]; 02089 class_sv_count[(int32_t) y[i]]++; 02090 } 02091 } 02092 02093 SG_FREE(y); 02094 SG_FREE(zeros); 02095 SG_FREE(alpha); 02096 } 02097 02098 static void solve_one_class( 02099 const svm_problem *prob, const svm_parameter *param, 02100 float64_t *alpha, Solver::SolutionInfo* si) 02101 { 02102 int32_t l = prob->l; 02103 float64_t *zeros = SG_MALLOC(float64_t, l); 02104 schar *ones = SG_MALLOC(schar, l); 02105 int32_t i; 02106 02107 int32_t n = (int32_t)(param->nu*prob->l); // # of alpha's at upper bound 02108 02109 for(i=0;i<n;i++) 02110 alpha[i] = 1; 02111 if(n<prob->l) 02112 alpha[n] = param->nu * prob->l - n; 02113 for(i=n+1;i<l;i++) 02114 alpha[i] = 0; 02115 02116 for(i=0;i<l;i++) 02117 { 02118 zeros[i] = 0; 02119 ones[i] = 1; 02120 } 02121 02122 Solver s; 02123 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, 02124 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 02125 02126 SG_FREE(zeros); 02127 SG_FREE(ones); 02128 } 02129 02130 static void solve_epsilon_svr( 02131 const svm_problem *prob, const svm_parameter *param, 02132 float64_t *alpha, Solver::SolutionInfo* si) 02133 { 02134 int32_t l = prob->l; 02135 float64_t *alpha2 = SG_MALLOC(float64_t, 2*l); 02136 float64_t *linear_term = SG_MALLOC(float64_t, 2*l); 02137 schar *y = SG_MALLOC(schar, 2*l); 02138 int32_t i; 02139 02140 for(i=0;i<l;i++) 02141 { 02142 alpha2[i] = 0; 02143 linear_term[i] = param->p - prob->y[i]; 02144 y[i] = 1; 02145 02146 alpha2[i+l] = 0; 02147 linear_term[i+l] = param->p + prob->y[i]; 02148 y[i+l] = -1; 02149 } 02150 02151 Solver s; 02152 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02153 alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias); 02154 02155 float64_t sum_alpha = 0; 02156 for(i=0;i<l;i++) 02157 { 02158 alpha[i] = alpha2[i] - alpha2[i+l]; 02159 sum_alpha += fabs(alpha[i]); 02160 } 02161 SG_SINFO("nu = %f\n",sum_alpha/(param->C*l)); 02162 02163 SG_FREE(alpha2); 02164 SG_FREE(linear_term); 02165 SG_FREE(y); 02166 } 02167 02168 static void solve_nu_svr( 02169 const svm_problem *prob, const svm_parameter *param, 02170 float64_t *alpha, Solver::SolutionInfo* si) 02171 { 02172 int32_t l = prob->l; 02173 float64_t C = param->C; 02174 float64_t *alpha2 = SG_MALLOC(float64_t, 2*l); 02175 float64_t *linear_term = SG_MALLOC(float64_t, 2*l); 02176 schar *y = SG_MALLOC(schar, 2*l); 02177 int32_t i; 02178 02179 float64_t sum = C * param->nu * l / 2; 02180 for(i=0;i<l;i++) 02181 { 02182 alpha2[i] = alpha2[i+l] = CMath::min(sum,C); 02183 sum -= alpha2[i]; 02184 02185 linear_term[i] = - prob->y[i]; 02186 y[i] = 1; 02187 02188 linear_term[i+l] = prob->y[i]; 02189 y[i+l] = -1; 02190 } 02191 02192 Solver_NU s; 02193 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02194 alpha2, C, C, param->eps, si, param->shrinking, param->use_bias); 02195 02196 SG_SINFO("epsilon = %f\n",-si->r); 02197 02198 for(i=0;i<l;i++) 02199 alpha[i] = alpha2[i] - alpha2[i+l]; 02200 02201 SG_FREE(alpha2); 02202 SG_FREE(linear_term); 02203 SG_FREE(y); 02204 } 02205 02206 // 02207 // decision_function 02208 // 02209 struct decision_function 02210 { 02211 float64_t *alpha; 02212 float64_t rho; 02213 float64_t objective; 02214 }; 02215 02216 decision_function svm_train_one( 02217 const svm_problem *prob, const svm_parameter *param, 02218 float64_t Cp, float64_t Cn) 02219 { 02220 float64_t *alpha = SG_MALLOC(float64_t, prob->l); 02221 Solver::SolutionInfo si; 02222 switch(param->svm_type) 02223 { 02224 case C_SVC: 02225 solve_c_svc(prob,param,alpha,&si,Cp,Cn); 02226 break; 02227 case NU_SVC: 02228 solve_nu_svc(prob,param,alpha,&si); 02229 break; 02230 case ONE_CLASS: 02231 solve_one_class(prob,param,alpha,&si); 02232 break; 02233 case EPSILON_SVR: 02234 solve_epsilon_svr(prob,param,alpha,&si); 02235 break; 02236 case NU_SVR: 02237 solve_nu_svr(prob,param,alpha,&si); 02238 break; 02239 } 02240 02241 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02242 02243 // output SVs 02244 if (param->svm_type != ONE_CLASS) 02245 { 02246 int32_t nSV = 0; 02247 int32_t nBSV = 0; 02248 for(int32_t i=0;i<prob->l;i++) 02249 { 02250 if(fabs(alpha[i]) > 0) 02251 { 02252 ++nSV; 02253 if(prob->y[i] > 0) 02254 { 02255 if(fabs(alpha[i]) >= si.upper_bound_p) 02256 ++nBSV; 02257 } 02258 else 02259 { 02260 if(fabs(alpha[i]) >= si.upper_bound_n) 02261 ++nBSV; 02262 } 02263 } 02264 } 02265 SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV); 02266 } 02267 02268 decision_function f; 02269 f.alpha = alpha; 02270 f.rho = si.rho; 02271 f.objective=si.obj; 02272 return f; 02273 } 02274 02275 // 02276 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data 02277 // perm, length l, must be allocated before calling this subroutine 02278 void svm_group_classes( 02279 const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret, 02280 int32_t **start_ret, int32_t **count_ret, int32_t *perm) 02281 { 02282 int32_t l = prob->l; 02283 int32_t max_nr_class = 16; 02284 int32_t nr_class = 0; 02285 int32_t *label = SG_MALLOC(int32_t, max_nr_class); 02286 int32_t *count = SG_MALLOC(int32_t, max_nr_class); 02287 int32_t *data_label = SG_MALLOC(int32_t, l); 02288 int32_t i; 02289 02290 for(i=0;i<l;i++) 02291 { 02292 int32_t this_label=(int32_t) prob->y[i]; 02293 int32_t j; 02294 for(j=0;j<nr_class;j++) 02295 { 02296 if(this_label == label[j]) 02297 { 02298 ++count[j]; 02299 break; 02300 } 02301 } 02302 data_label[i] = j; 02303 if(j == nr_class) 02304 { 02305 if(nr_class == max_nr_class) 02306 { 02307 max_nr_class *= 2; 02308 label=SG_REALLOC(int32_t, label,max_nr_class); 02309 count=SG_REALLOC(int32_t, count,max_nr_class); 02310 } 02311 label[nr_class] = this_label; 02312 count[nr_class] = 1; 02313 ++nr_class; 02314 } 02315 } 02316 02317 int32_t *start = SG_MALLOC(int32_t, nr_class); 02318 start[0] = 0; 02319 for(i=1;i<nr_class;i++) 02320 start[i] = start[i-1]+count[i-1]; 02321 for(i=0;i<l;i++) 02322 { 02323 perm[start[data_label[i]]] = i; 02324 ++start[data_label[i]]; 02325 } 02326 start[0] = 0; 02327 for(i=1;i<nr_class;i++) 02328 start[i] = start[i-1]+count[i-1]; 02329 02330 *nr_class_ret = nr_class; 02331 *label_ret = label; 02332 *start_ret = start; 02333 *count_ret = count; 02334 SG_FREE(data_label); 02335 } 02336 02337 // 02338 // Interface functions 02339 // 02340 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param) 02341 { 02342 svm_model *model = SG_MALLOC(svm_model,1); 02343 model->param = *param; 02344 model->free_sv = 0; // XXX 02345 02346 if(param->svm_type == ONE_CLASS || 02347 param->svm_type == EPSILON_SVR || 02348 param->svm_type == NU_SVR) 02349 { 02350 SG_SINFO("training one class svm or doing epsilon sv regression\n"); 02351 02352 // regression or one-class-svm 02353 model->nr_class = 2; 02354 model->label = NULL; 02355 model->nSV = NULL; 02356 model->sv_coef = SG_MALLOC(float64_t *,1); 02357 decision_function f = svm_train_one(prob,param,0,0); 02358 model->rho = SG_MALLOC(float64_t, 1); 02359 model->rho[0] = f.rho; 02360 model->objective = f.objective; 02361 02362 int32_t nSV = 0; 02363 int32_t i; 02364 for(i=0;i<prob->l;i++) 02365 if(fabs(f.alpha[i]) > 0) ++nSV; 02366 model->l = nSV; 02367 model->SV = SG_MALLOC(svm_node *,nSV); 02368 model->sv_coef[0] = SG_MALLOC(float64_t, nSV); 02369 int32_t j = 0; 02370 for(i=0;i<prob->l;i++) 02371 if(fabs(f.alpha[i]) > 0) 02372 { 02373 model->SV[j] = prob->x[i]; 02374 model->sv_coef[0][j] = f.alpha[i]; 02375 ++j; 02376 } 02377 02378 SG_FREE(f.alpha); 02379 } 02380 else if(param->svm_type == NU_MULTICLASS_SVC) 02381 { 02382 Solver::SolutionInfo si; 02383 solve_nu_multiclass_svc(prob,param,&si,model); 02384 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02385 } 02386 else 02387 { 02388 // classification 02389 int32_t l = prob->l; 02390 int32_t nr_class; 02391 int32_t *label = NULL; 02392 int32_t *start = NULL; 02393 int32_t *count = NULL; 02394 int32_t *perm = SG_MALLOC(int32_t, l); 02395 02396 // group training data of the same class 02397 svm_group_classes(prob,&nr_class,&label,&start,&count,perm); 02398 svm_node **x = SG_MALLOC(svm_node *,l); 02399 float64_t *C = SG_MALLOC(float64_t,l); 02400 float64_t *pv = SG_MALLOC(float64_t,l); 02401 02402 02403 int32_t i; 02404 for(i=0;i<l;i++) { 02405 x[i] = prob->x[perm[i]]; 02406 C[i] = prob->C[perm[i]]; 02407 02408 if (prob->pv) 02409 { 02410 pv[i] = prob->pv[perm[i]]; 02411 } 02412 else 02413 { 02414 //no custom linear term is set 02415 pv[i] = -1.0; 02416 } 02417 02418 } 02419 02420 02421 // calculate weighted C 02422 float64_t *weighted_C = SG_MALLOC(float64_t, nr_class); 02423 for(i=0;i<nr_class;i++) 02424 weighted_C[i] = param->C; 02425 for(i=0;i<param->nr_weight;i++) 02426 { 02427 int32_t j; 02428 for(j=0;j<nr_class;j++) 02429 if(param->weight_label[i] == label[j]) 02430 break; 02431 if(j == nr_class) 02432 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]); 02433 else 02434 weighted_C[j] *= param->weight[i]; 02435 } 02436 02437 // train k*(k-1)/2 models 02438 02439 bool *nonzero = SG_MALLOC(bool,l); 02440 for(i=0;i<l;i++) 02441 nonzero[i] = false; 02442 decision_function *f = SG_MALLOC(decision_function,nr_class*(nr_class-1)/2); 02443 02444 int32_t p = 0; 02445 for(i=0;i<nr_class;i++) 02446 for(int32_t j=i+1;j<nr_class;j++) 02447 { 02448 svm_problem sub_prob; 02449 int32_t si = start[i], sj = start[j]; 02450 int32_t ci = count[i], cj = count[j]; 02451 sub_prob.l = ci+cj; 02452 sub_prob.x = SG_MALLOC(svm_node *,sub_prob.l); 02453 sub_prob.y = SG_MALLOC(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err 02454 sub_prob.C = SG_MALLOC(float64_t,sub_prob.l+1); 02455 sub_prob.pv = SG_MALLOC(float64_t,sub_prob.l+1); 02456 02457 int32_t k; 02458 for(k=0;k<ci;k++) 02459 { 02460 sub_prob.x[k] = x[si+k]; 02461 sub_prob.y[k] = +1; 02462 sub_prob.C[k] = C[si+k]; 02463 sub_prob.pv[k] = pv[si+k]; 02464 02465 } 02466 for(k=0;k<cj;k++) 02467 { 02468 sub_prob.x[ci+k] = x[sj+k]; 02469 sub_prob.y[ci+k] = -1; 02470 sub_prob.C[ci+k] = C[sj+k]; 02471 sub_prob.pv[ci+k] = pv[sj+k]; 02472 } 02473 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err 02474 sub_prob.C[sub_prob.l]=-1; 02475 sub_prob.pv[sub_prob.l]=-1; 02476 02477 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); 02478 for(k=0;k<ci;k++) 02479 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0) 02480 nonzero[si+k] = true; 02481 for(k=0;k<cj;k++) 02482 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) 02483 nonzero[sj+k] = true; 02484 SG_FREE(sub_prob.x); 02485 SG_FREE(sub_prob.y); 02486 SG_FREE(sub_prob.C); 02487 SG_FREE(sub_prob.pv); 02488 ++p; 02489 } 02490 02491 // build output 02492 02493 model->objective = f[0].objective; 02494 model->nr_class = nr_class; 02495 02496 model->label = SG_MALLOC(int32_t, nr_class); 02497 for(i=0;i<nr_class;i++) 02498 model->label[i] = label[i]; 02499 02500 model->rho = SG_MALLOC(float64_t, nr_class*(nr_class-1)/2); 02501 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02502 model->rho[i] = f[i].rho; 02503 02504 int32_t total_sv = 0; 02505 int32_t *nz_count = SG_MALLOC(int32_t, nr_class); 02506 model->nSV = SG_MALLOC(int32_t, nr_class); 02507 for(i=0;i<nr_class;i++) 02508 { 02509 int32_t nSV = 0; 02510 for(int32_t j=0;j<count[i];j++) 02511 if(nonzero[start[i]+j]) 02512 { 02513 ++nSV; 02514 ++total_sv; 02515 } 02516 model->nSV[i] = nSV; 02517 nz_count[i] = nSV; 02518 } 02519 02520 SG_SINFO("Total nSV = %d\n",total_sv); 02521 02522 model->l = total_sv; 02523 model->SV = SG_MALLOC(svm_node *,total_sv); 02524 p = 0; 02525 for(i=0;i<l;i++) 02526 if(nonzero[i]) model->SV[p++] = x[i]; 02527 02528 int32_t *nz_start = SG_MALLOC(int32_t, nr_class); 02529 nz_start[0] = 0; 02530 for(i=1;i<nr_class;i++) 02531 nz_start[i] = nz_start[i-1]+nz_count[i-1]; 02532 02533 model->sv_coef = SG_MALLOC(float64_t *,nr_class-1); 02534 for(i=0;i<nr_class-1;i++) 02535 model->sv_coef[i] = SG_MALLOC(float64_t, total_sv); 02536 02537 p = 0; 02538 for(i=0;i<nr_class;i++) 02539 for(int32_t j=i+1;j<nr_class;j++) 02540 { 02541 // classifier (i,j): coefficients with 02542 // i are in sv_coef[j-1][nz_start[i]...], 02543 // j are in sv_coef[i][nz_start[j]...] 02544 02545 int32_t si = start[i]; 02546 int32_t sj = start[j]; 02547 int32_t ci = count[i]; 02548 int32_t cj = count[j]; 02549 02550 int32_t q = nz_start[i]; 02551 int32_t k; 02552 for(k=0;k<ci;k++) 02553 if(nonzero[si+k]) 02554 model->sv_coef[j-1][q++] = f[p].alpha[k]; 02555 q = nz_start[j]; 02556 for(k=0;k<cj;k++) 02557 if(nonzero[sj+k]) 02558 model->sv_coef[i][q++] = f[p].alpha[ci+k]; 02559 ++p; 02560 } 02561 02562 SG_FREE(label); 02563 SG_FREE(count); 02564 SG_FREE(perm); 02565 SG_FREE(start); 02566 SG_FREE(x); 02567 SG_FREE(C); 02568 SG_FREE(pv); 02569 SG_FREE(weighted_C); 02570 SG_FREE(nonzero); 02571 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02572 SG_FREE(f[i].alpha); 02573 SG_FREE(f); 02574 SG_FREE(nz_count); 02575 SG_FREE(nz_start); 02576 } 02577 return model; 02578 } 02579 02580 void svm_destroy_model(svm_model* model) 02581 { 02582 if(model->free_sv && model->l > 0) 02583 SG_FREE((void *)(model->SV[0])); 02584 for(int32_t i=0;i<model->nr_class-1;i++) 02585 SG_FREE(model->sv_coef[i]); 02586 SG_FREE(model->SV); 02587 SG_FREE(model->sv_coef); 02588 SG_FREE(model->rho); 02589 SG_FREE(model->label); 02590 SG_FREE(model->nSV); 02591 SG_FREE(model); 02592 } 02593 02594 void svm_destroy_param(svm_parameter* param) 02595 { 02596 SG_FREE(param->weight_label); 02597 SG_FREE(param->weight); 02598 } 02599 02600 const char *svm_check_parameter( 02601 const svm_problem *prob, const svm_parameter *param) 02602 { 02603 // svm_type 02604 02605 int32_t svm_type = param->svm_type; 02606 if(svm_type != C_SVC && 02607 svm_type != NU_SVC && 02608 svm_type != ONE_CLASS && 02609 svm_type != EPSILON_SVR && 02610 svm_type != NU_SVR && 02611 svm_type != NU_MULTICLASS_SVC) 02612 return "unknown svm type"; 02613 02614 // kernel_type, degree 02615 02616 int32_t kernel_type = param->kernel_type; 02617 if(kernel_type != LINEAR && 02618 kernel_type != POLY && 02619 kernel_type != RBF && 02620 kernel_type != SIGMOID && 02621 kernel_type != PRECOMPUTED) 02622 return "unknown kernel type"; 02623 02624 if(param->degree < 0) 02625 return "degree of polynomial kernel < 0"; 02626 02627 // cache_size,eps,C,nu,p,shrinking 02628 02629 if(param->cache_size <= 0) 02630 return "cache_size <= 0"; 02631 02632 if(param->eps <= 0) 02633 return "eps <= 0"; 02634 02635 if(svm_type == C_SVC || 02636 svm_type == EPSILON_SVR || 02637 svm_type == NU_SVR) 02638 if(param->C <= 0) 02639 return "C <= 0"; 02640 02641 if(svm_type == NU_SVC || 02642 svm_type == ONE_CLASS || 02643 svm_type == NU_SVR) 02644 if(param->nu <= 0 || param->nu > 1) 02645 return "nu <= 0 or nu > 1"; 02646 02647 if(svm_type == EPSILON_SVR) 02648 if(param->p < 0) 02649 return "p < 0"; 02650 02651 if(param->shrinking != 0 && 02652 param->shrinking != 1) 02653 return "shrinking != 0 and shrinking != 1"; 02654 02655 02656 // check whether nu-svc is feasible 02657 02658 if(svm_type == NU_SVC) 02659 { 02660 int32_t l = prob->l; 02661 int32_t max_nr_class = 16; 02662 int32_t nr_class = 0; 02663 int32_t *label = SG_MALLOC(int32_t, max_nr_class); 02664 int32_t *count = SG_MALLOC(int32_t, max_nr_class); 02665 02666 int32_t i; 02667 for(i=0;i<l;i++) 02668 { 02669 int32_t this_label = (int32_t) prob->y[i]; 02670 int32_t j; 02671 for(j=0;j<nr_class;j++) 02672 if(this_label == label[j]) 02673 { 02674 ++count[j]; 02675 break; 02676 } 02677 if(j == nr_class) 02678 { 02679 if(nr_class == max_nr_class) 02680 { 02681 max_nr_class *= 2; 02682 label=SG_REALLOC(int32_t, label, max_nr_class); 02683 count=SG_REALLOC(int32_t, count, max_nr_class); 02684 } 02685 label[nr_class] = this_label; 02686 count[nr_class] = 1; 02687 ++nr_class; 02688 } 02689 } 02690 02691 for(i=0;i<nr_class;i++) 02692 { 02693 int32_t n1 = count[i]; 02694 for(int32_t j=i+1;j<nr_class;j++) 02695 { 02696 int32_t n2 = count[j]; 02697 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2)) 02698 { 02699 SG_FREE(label); 02700 SG_FREE(count); 02701 return "specified nu is infeasible"; 02702 } 02703 } 02704 } 02705 SG_FREE(label); 02706 SG_FREE(count); 02707 } 02708 02709 return NULL; 02710 } 02711 } 02712 #endif // DOXYGEN_SHOULD_SKIP_THIS