SHOGUN
v1.1.0
|
00001 // -*- C++ -*- 00002 // Main functions of the LaRank algorithm for soving Multiclass SVM 00003 // Copyright (C) 2008- Antoine Bordes 00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg 00005 00006 // This library is free software; you can redistribute it and/or 00007 // modify it under the terms of the GNU Lesser General Public 00008 // License as published by the Free Software Foundation; either 00009 // version 2.1 of the License, or (at your option) any later version. 00010 // 00011 // This program is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 // 00016 // You should have received a copy of the GNU General Public License 00017 // aint64_t with this program; if not, write to the Free Software 00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00019 // 00020 /*********************************************************************** 00021 * 00022 * LUSH Lisp Universal Shell 00023 * Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI. 00024 * Includes parts of TL3: 00025 * Copyright (C) 1987-1999 Leon Bottou and Neuristique. 00026 * Includes selected parts of SN3.2: 00027 * Copyright (C) 1991-2001 AT&T Corp. 00028 * 00029 * This program is free software; you can redistribute it and/or modify 00030 * it under the terms of the GNU General Public License as published by 00031 * the Free Software Foundation; either version 2 of the License, or 00032 * (at your option) any later version. 00033 * 00034 * This program is distributed in the hope that it will be useful, 00035 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00036 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00037 * GNU General Public License for more details. 00038 * 00039 * You should have received a copy of the GNU General Public License 00040 * aint64_t with this program; if not, write to the Free Software 00041 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00042 * 00043 ***********************************************************************/ 00044 00045 /*********************************************************************** 00046 * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $ 00047 **********************************************************************/ 00048 00049 #include <vector> 00050 #include <algorithm> 00051 00052 #include <shogun/io/SGIO.h> 00053 #include <shogun/lib/Signal.h> 00054 #include <shogun/lib/Time.h> 00055 #include <shogun/mathematics/Math.h> 00056 #include <shogun/classifier/svm/LaRank.h> 00057 #include <shogun/kernel/Kernel.h> 00058 00059 using namespace shogun; 00060 00061 namespace shogun 00062 { 00063 static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc) 00064 { 00065 larank_kcache_t *self; 00066 self = SG_CALLOC (larank_kcache_t, 1); 00067 self->l = 0; 00068 self->maxrowlen = 0; 00069 self->func = kernelfunc; 00070 self->prevbuddy = self; 00071 self->nextbuddy = self; 00072 self->cursize = sizeof (larank_kcache_t); 00073 self->maxsize = 256 * 1024 * 1024; 00074 self->qprev = SG_MALLOC(int32_t, 1); 00075 self->qnext = SG_MALLOC(int32_t, 1); 00076 self->rnext = self->qnext + 1; 00077 self->rprev = self->qprev + 1; 00078 self->rprev[-1] = -1; 00079 self->rnext[-1] = -1; 00080 return self; 00081 } 00082 00083 static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen) 00084 { 00085 int32_t olen = self->rsize[k]; 00086 if (nlen < olen) 00087 { 00088 float32_t *ndata; 00089 float32_t *odata = self->rdata[k]; 00090 if (nlen > 0) 00091 { 00092 ndata = SG_MALLOC(float32_t, nlen); 00093 memcpy (ndata, odata, nlen * sizeof (float32_t)); 00094 } 00095 else 00096 { 00097 ndata = 0; 00098 self->rnext[self->rprev[k]] = self->rnext[k]; 00099 self->rprev[self->rnext[k]] = self->rprev[k]; 00100 self->rnext[k] = self->rprev[k] = k; 00101 } 00102 SG_FREE (odata); 00103 self->rdata[k] = ndata; 00104 self->rsize[k] = nlen; 00105 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00106 } 00107 } 00108 00109 static void xpurge (larank_kcache_t * self) 00110 { 00111 if (self->cursize > self->maxsize) 00112 { 00113 int32_t k = self->rprev[-1]; 00114 while (self->cursize > self->maxsize && k != self->rnext[-1]) 00115 { 00116 int32_t pk = self->rprev[k]; 00117 xtruncate (self, k, 0); 00118 k = pk; 00119 } 00120 } 00121 } 00122 00123 static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries) 00124 { 00125 ASSERT (self); 00126 ASSERT (entries > 0); 00127 self->maxsize = entries; 00128 xpurge (self); 00129 } 00130 00131 static void larank_kcache_destroy (larank_kcache_t * self) 00132 { 00133 if (self) 00134 { 00135 int32_t i; 00136 larank_kcache_t *nb = self->nextbuddy; 00137 larank_kcache_t *pb = self->prevbuddy; 00138 pb->nextbuddy = nb; 00139 nb->prevbuddy = pb; 00140 /* delete */ 00141 if (self->i2r) 00142 SG_FREE (self->i2r); 00143 if (self->r2i) 00144 SG_FREE (self->r2i); 00145 if (self->rdata) 00146 for (i = 0; i < self->l; i++) 00147 if (self->rdata[i]) 00148 SG_FREE (self->rdata[i]); 00149 if (self->rdata) 00150 SG_FREE (self->rdata); 00151 if (self->rsize) 00152 SG_FREE (self->rsize); 00153 if (self->rdiag) 00154 SG_FREE (self->rdiag); 00155 if (self->qnext) 00156 SG_FREE (self->qnext); 00157 if (self->qprev) 00158 SG_FREE (self->qprev); 00159 memset (self, 0, sizeof (larank_kcache_t)); 00160 SG_FREE (self); 00161 } 00162 } 00163 00164 static void xminsize (larank_kcache_t * self, int32_t n) 00165 { 00166 int32_t ol = self->l; 00167 if (n > ol) 00168 { 00169 int32_t i; 00170 int32_t nl = CMath::max (256, ol); 00171 while (nl < n) 00172 nl = nl + nl; 00173 self->i2r = SG_REALLOC (int32_t, self->i2r, nl); 00174 self->r2i = SG_REALLOC (int32_t, self->r2i, nl); 00175 self->rsize = SG_REALLOC (int32_t, self->rsize, nl); 00176 self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl)); 00177 self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl)); 00178 self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl); 00179 self->rdata = SG_REALLOC (float32_t*, self->rdata, nl); 00180 self->rnext = self->qnext + 1; 00181 self->rprev = self->qprev + 1; 00182 for (i = ol; i < nl; i++) 00183 { 00184 self->i2r[i] = i; 00185 self->r2i[i] = i; 00186 self->rsize[i] = -1; 00187 self->rnext[i] = i; 00188 self->rprev[i] = i; 00189 self->rdata[i] = 0; 00190 } 00191 self->l = nl; 00192 } 00193 } 00194 00195 static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n) 00196 { 00197 xminsize (self, n); 00198 return self->r2i; 00199 } 00200 00201 static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen) 00202 { 00203 int32_t olen = self->rsize[k]; 00204 if (nlen > olen) 00205 { 00206 float32_t *ndata = SG_MALLOC(float32_t, nlen); 00207 if (olen > 0) 00208 { 00209 float32_t *odata = self->rdata[k]; 00210 memcpy (ndata, odata, olen * sizeof (float32_t)); 00211 SG_FREE (odata); 00212 } 00213 self->rdata[k] = ndata; 00214 self->rsize[k] = nlen; 00215 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00216 self->maxrowlen = CMath::max (self->maxrowlen, nlen); 00217 } 00218 } 00219 00220 static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2) 00221 { 00222 /* swap row data */ 00223 if (r1 < self->maxrowlen || r2 < self->maxrowlen) 00224 { 00225 int32_t mrl = 0; 00226 int32_t k = self->rnext[-1]; 00227 while (k >= 0) 00228 { 00229 int32_t nk = self->rnext[k]; 00230 int32_t n = self->rsize[k]; 00231 int32_t rr = self->i2r[k]; 00232 float32_t *d = self->rdata[k]; 00233 if (r1 < n) 00234 { 00235 if (r2 < n) 00236 { 00237 float32_t t1 = d[r1]; 00238 float32_t t2 = d[r2]; 00239 d[r1] = t2; 00240 d[r2] = t1; 00241 } 00242 else if (rr == r2) 00243 { 00244 d[r1] = self->rdiag[k]; 00245 } 00246 else 00247 { 00248 int32_t arsize = self->rsize[i2]; 00249 if (rr < arsize && rr != r1) 00250 d[r1] = self->rdata[i2][rr]; 00251 else 00252 xtruncate (self, k, r1); 00253 } 00254 } 00255 else if (r2 < n) 00256 { 00257 if (rr == r1) 00258 { 00259 d[r2] = self->rdiag[k]; 00260 } 00261 else 00262 { 00263 int32_t arsize = self->rsize[i1]; 00264 if (rr < arsize && rr != r2) 00265 d[r2] = self->rdata[i1][rr]; 00266 else 00267 xtruncate (self, k, r2); 00268 } 00269 } 00270 mrl = CMath::max (mrl, self->rsize[k]); 00271 k = nk; 00272 } 00273 self->maxrowlen = mrl; 00274 } 00275 /* swap r2i and i2r */ 00276 self->r2i[r1] = i2; 00277 self->r2i[r2] = i1; 00278 self->i2r[i1] = r2; 00279 self->i2r[i2] = r1; 00280 } 00281 00282 static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2) 00283 { 00284 xminsize (self, 1 + CMath::max(r1, r2)); 00285 xswap (self, self->r2i[r1], self->r2i[r2], r1, r2); 00286 } 00287 00288 static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2) 00289 { 00290 xminsize (self, 1 + CMath::max (r1, i2)); 00291 xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]); 00292 } 00293 00294 static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j) 00295 { 00296 /* search buddies */ 00297 larank_kcache_t *cache = self->nextbuddy; 00298 do 00299 { 00300 int32_t l = cache->l; 00301 if (i < l && j < l) 00302 { 00303 int32_t s = cache->rsize[i]; 00304 int32_t p = cache->i2r[j]; 00305 if (p < s) 00306 return cache->rdata[i][p]; 00307 if (i == j && s >= 0) 00308 return cache->rdiag[i]; 00309 p = cache->i2r[i]; 00310 s = cache->rsize[j]; 00311 if (p < s) 00312 return cache->rdata[j][p]; 00313 } 00314 cache = cache->nextbuddy; 00315 } 00316 while (cache != self); 00317 /* compute */ 00318 return self->func->kernel(i, j); 00319 } 00320 00321 00322 static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j) 00323 { 00324 ASSERT (self); 00325 ASSERT (i >= 0); 00326 ASSERT (j >= 0); 00327 return xquery (self, i, j); 00328 } 00329 00330 00331 static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy) 00332 { 00333 larank_kcache_t *p = self; 00334 larank_kcache_t *selflast = self->prevbuddy; 00335 larank_kcache_t *buddylast = buddy->prevbuddy; 00336 /* check functions are identical */ 00337 ASSERT (self->func == buddy->func); 00338 /* make sure we are not already buddies */ 00339 do 00340 { 00341 if (p == buddy) 00342 return; 00343 p = p->nextbuddy; 00344 } 00345 while (p != self); 00346 /* link */ 00347 selflast->nextbuddy = buddy; 00348 buddy->prevbuddy = selflast; 00349 buddylast->nextbuddy = self; 00350 self->prevbuddy = buddylast; 00351 } 00352 00353 00354 static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len) 00355 { 00356 ASSERT (i >= 0); 00357 if (i < self->l && len <= self->rsize[i]) 00358 { 00359 self->rnext[self->rprev[i]] = self->rnext[i]; 00360 self->rprev[self->rnext[i]] = self->rprev[i]; 00361 } 00362 else 00363 { 00364 int32_t olen, p; 00365 float32_t *d; 00366 if (i >= self->l || len >= self->l) 00367 xminsize (self, CMath::max (1 + i, len)); 00368 olen = self->rsize[i]; 00369 if (olen < len) 00370 { 00371 if (olen < 0) 00372 { 00373 self->rdiag[i] = self->func->kernel(i, i); 00374 olen = self->rsize[i] = 0; 00375 } 00376 xextend (self, i, len); 00377 d = self->rdata[i]; 00378 self->rsize[i] = olen; 00379 for (p = olen; p < len; p++) 00380 d[p] = larank_kcache_query (self, self->r2i[p], i); 00381 self->rsize[i] = len; 00382 } 00383 self->rnext[self->rprev[i]] = self->rnext[i]; 00384 self->rprev[self->rnext[i]] = self->rprev[i]; 00385 xpurge (self); 00386 } 00387 self->rprev[i] = -1; 00388 self->rnext[i] = self->rnext[-1]; 00389 self->rnext[self->rprev[i]] = i; 00390 self->rprev[self->rnext[i]] = i; 00391 return self->rdata[i]; 00392 } 00393 00394 } 00395 00396 00397 // Initializing an output class (basically creating a kernel cache for it) 00398 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache) 00399 { 00400 kernel = larank_kcache_create (kfunc); 00401 larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024); 00402 beta = SG_MALLOC(float32_t, 1); 00403 g = SG_MALLOC(float32_t, 1); 00404 *beta=0; 00405 *g=0; 00406 l = 0; 00407 } 00408 00409 // Destroying an output class (basically destroying the kernel cache) 00410 void LaRankOutput::destroy () 00411 { 00412 larank_kcache_destroy (kernel); 00413 kernel=NULL; 00414 SG_FREE(beta); 00415 SG_FREE(g); 00416 beta=NULL; 00417 g=NULL; 00418 } 00419 00420 // !Important! Computing the score of a given input vector for the actual output 00421 float64_t LaRankOutput::computeScore (int32_t x_id) 00422 { 00423 if (l == 0) 00424 return 0; 00425 else 00426 { 00427 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00428 return CMath::dot (beta, row, l); 00429 } 00430 } 00431 00432 // !Important! Computing the gradient of a given input vector for the actual output 00433 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis) 00434 { 00435 return (yi == ythis ? 1 : 0) - computeScore (xi_id); 00436 } 00437 00438 // Updating the solution in the actual output 00439 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp) 00440 { 00441 int32_t *r2i = larank_kcache_r2i (kernel, l); 00442 int64_t xr = l + 1; 00443 for (int32_t r = 0; r < l; r++) 00444 if (r2i[r] == x_id) 00445 { 00446 xr = r; 00447 break; 00448 } 00449 00450 // updates the cache order and the beta coefficient 00451 if (xr < l) 00452 { 00453 beta[xr]+=lambda; 00454 } 00455 else 00456 { 00457 larank_kcache_swap_ri (kernel, l, x_id); 00458 CMath::resize(g, l, l+1); 00459 CMath::resize(beta, l, l+1); 00460 g[l]=gp; 00461 beta[l]=lambda; 00462 l++; 00463 } 00464 00465 // update stored gradients 00466 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00467 for (int32_t r = 0; r < l; r++) 00468 { 00469 float64_t oldg = g[r]; 00470 g[r]=oldg - lambda * row[r]; 00471 } 00472 } 00473 00474 // Linking the cahe of this output to the cache of an other "buddy" output 00475 // so that if a requested value is not found in this cache, you can ask your buddy if it has it. 00476 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud) 00477 { 00478 larank_kcache_set_buddy (bud, kernel); 00479 } 00480 00481 // Removing useless support vectors (for which beta=0) 00482 int32_t LaRankOutput::cleanup () 00483 { 00484 int32_t count = 0; 00485 std::vector < int32_t >idx; 00486 for (int32_t x = 0; x < l; x++) 00487 { 00488 if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON)) 00489 { 00490 idx.push_back (x); 00491 count++; 00492 } 00493 } 00494 int32_t new_l = l - count; 00495 for (int32_t xx = 0; xx < count; xx++) 00496 { 00497 int32_t i = idx[xx] - xx; 00498 for (int32_t r = i; r < (l - 1); r++) 00499 { 00500 larank_kcache_swap_rr (kernel, r, int64_t(r) + 1); 00501 beta[r]=beta[r + 1]; 00502 g[r]=g[r + 1]; 00503 } 00504 } 00505 CMath::resize(beta, l, new_l+1); 00506 CMath::resize(g, l, new_l+1); 00507 beta[new_l]=0; 00508 g[new_l]=0; 00509 l = new_l; 00510 return count; 00511 } 00512 00513 // --- Below are information or "get" functions --- // 00514 // 00515 float64_t LaRankOutput::getW2 () 00516 { 00517 float64_t sum = 0; 00518 int32_t *r2i = larank_kcache_r2i (kernel, l + 1); 00519 for (int32_t r = 0; r < l; r++) 00520 { 00521 float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l); 00522 sum += beta[r] * CMath::dot (beta, row_r, l); 00523 } 00524 return sum; 00525 } 00526 00527 float64_t LaRankOutput::getKii (int32_t x_id) 00528 { 00529 return larank_kcache_query (kernel, x_id, x_id); 00530 } 00531 00532 // 00533 float64_t LaRankOutput::getBeta (int32_t x_id) 00534 { 00535 int32_t *r2i = larank_kcache_r2i (kernel, l); 00536 int32_t xr = -1; 00537 for (int32_t r = 0; r < l; r++) 00538 if (r2i[r] == x_id) 00539 { 00540 xr = r; 00541 break; 00542 } 00543 return (xr < 0 ? 0 : beta[xr]); 00544 } 00545 00546 // 00547 float64_t LaRankOutput::getGradient (int32_t x_id) 00548 { 00549 int32_t *r2i = larank_kcache_r2i (kernel, l); 00550 int32_t xr = -1; 00551 for (int32_t r = 0; r < l; r++) 00552 if (r2i[r] == x_id) 00553 { 00554 xr = r; 00555 break; 00556 } 00557 return (xr < 0 ? 0 : g[xr]); 00558 } 00559 bool LaRankOutput::isSupportVector (int32_t x_id) const 00560 { 00561 int32_t *r2i = larank_kcache_r2i (kernel, l); 00562 int32_t xr = -1; 00563 for (int32_t r = 0; r < l; r++) 00564 if (r2i[r] == x_id) 00565 { 00566 xr = r; 00567 break; 00568 } 00569 return (xr >= 0); 00570 } 00571 00572 // 00573 int32_t LaRankOutput::getSV (float32_t* &sv) const 00574 { 00575 sv=SG_MALLOC(float32_t, l); 00576 int32_t *r2i = larank_kcache_r2i (kernel, l); 00577 for (int32_t r = 0; r < l; r++) 00578 sv[r]=r2i[r]; 00579 return l; 00580 } 00581 00582 CLaRank::CLaRank (): CMultiClassSVM(ONE_VS_REST), 00583 nb_seen_examples (0), nb_removed (0), 00584 n_pro (0), n_rep (0), n_opt (0), 00585 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00586 batch_mode(true), step(0) 00587 { 00588 } 00589 00590 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab): 00591 CMultiClassSVM(ONE_VS_REST, C, k, lab), 00592 nb_seen_examples (0), nb_removed (0), 00593 n_pro (0), n_rep (0), n_opt (0), 00594 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00595 batch_mode(true), step(0) 00596 { 00597 } 00598 00599 CLaRank::~CLaRank () 00600 { 00601 destroy(); 00602 } 00603 00604 bool CLaRank::train_machine(CFeatures* data) 00605 { 00606 tau = 0.0001; 00607 00608 ASSERT(kernel); 00609 ASSERT(labels && labels->get_num_labels()); 00610 00611 CSignal::clear_cancel(); 00612 00613 if (data) 00614 { 00615 if (data->get_num_vectors() != labels->get_num_labels()) 00616 { 00617 SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n", 00618 data->get_num_vectors(), labels->get_num_labels()); 00619 } 00620 kernel->init(data, data); 00621 } 00622 00623 ASSERT(kernel->get_num_vec_lhs() && kernel->get_num_vec_rhs()); 00624 00625 nb_train=labels->get_num_labels(); 00626 cache = kernel->get_cache_size(); 00627 00628 int32_t n_it = 1; 00629 float64_t gap = DBL_MAX; 00630 00631 SG_INFO("Training on %d examples\n", nb_train); 00632 while (gap > C1 && (!CSignal::cancel_computations())) // stopping criteria 00633 { 00634 float64_t tr_err = 0; 00635 int32_t ind = step; 00636 for (int32_t i = 0; i < nb_train; i++) 00637 { 00638 int32_t y=labels->get_label(i); 00639 if (add (i, y) != y) // call the add function 00640 tr_err++; 00641 00642 if (ind && i / ind) 00643 { 00644 SG_DEBUG("Done: %d %% Train error (online): %f%%\n", 00645 (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100); 00646 ind += step; 00647 } 00648 } 00649 00650 SG_DEBUG("End of iteration %d\n", n_it++); 00651 SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100); 00652 gap = computeGap (); 00653 SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(C1), 6); 00654 00655 if (!batch_mode) // skip stopping criteria if online mode 00656 gap = 0; 00657 } 00658 SG_DONE(); 00659 00660 int32_t num_classes = outputs.size(); 00661 create_multiclass_svm(num_classes); 00662 SG_DEBUG("%d classes\n", num_classes); 00663 00664 // Used for saving a model file 00665 int32_t i=0; 00666 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00667 { 00668 const LaRankOutput* o=&(it->second); 00669 00670 larank_kcache_t* k=o->getKernel(); 00671 int32_t l=o->get_l(); 00672 float32_t* beta=o->getBetas(); 00673 int32_t *r2i = larank_kcache_r2i (k, l); 00674 00675 ASSERT(l>0); 00676 SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0); 00677 00678 CSVM* svm=new CSVM(l); 00679 00680 for (int32_t j=0; j<l; j++) 00681 { 00682 svm->set_alpha(j, beta[j]); 00683 svm->set_support_vector(j, r2i[j]); 00684 } 00685 00686 svm->set_bias(0); 00687 set_svm(i, svm); 00688 i++; 00689 } 00690 destroy(); 00691 00692 return true; 00693 } 00694 00695 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule 00696 int32_t CLaRank::add (int32_t x_id, int32_t yi) 00697 { 00698 ++nb_seen_examples; 00699 // create a new output object if this one has never been seen before 00700 if (!getOutput (yi)) 00701 { 00702 outputs.insert (std::make_pair (yi, LaRankOutput ())); 00703 LaRankOutput *cur = getOutput (yi); 00704 cur->initialize (kernel, cache); 00705 if (outputs.size () == 1) 00706 y0 = outputs.begin ()->first; 00707 // link the cache of this new output to a buddy 00708 if (outputs.size () > 1) 00709 { 00710 LaRankOutput *out0 = getOutput (y0); 00711 cur->set_kernel_buddy (out0->getKernel ()); 00712 } 00713 } 00714 00715 LaRankPattern tpattern (x_id, yi); 00716 LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern; 00717 00718 // ProcessNew with the "fresh" pattern 00719 float64_t time1 = CTime::get_curtime(); 00720 process_return_t pro_ret = process (pattern, processNew); 00721 float64_t dual_increase = pro_ret.dual_increase; 00722 float64_t duration = (CTime::get_curtime() - time1); 00723 float64_t coeff = dual_increase / (0.00001 + duration); 00724 dual += dual_increase; 00725 n_pro++; 00726 w_pro = 0.05 * coeff + (1 - 0.05) * w_pro; 00727 00728 // ProcessOld & Optimize until ready for a new processnew 00729 // (Adaptative schedule here) 00730 for (;;) 00731 { 00732 float64_t w_sum = w_pro + w_rep + w_opt; 00733 float64_t prop_min = w_sum / 20; 00734 if (w_pro < prop_min) 00735 w_pro = prop_min; 00736 if (w_rep < prop_min) 00737 w_rep = prop_min; 00738 if (w_opt < prop_min) 00739 w_opt = prop_min; 00740 w_sum = w_pro + w_rep + w_opt; 00741 float64_t r = CMath::random(0.0, w_sum); 00742 if (r <= w_pro) 00743 { 00744 break; 00745 } 00746 else if ((r > w_pro) && (r <= w_pro + w_rep)) // ProcessOld here 00747 { 00748 float64_t ltime1 = CTime::get_curtime (); 00749 float64_t ldual_increase = reprocess (); 00750 float64_t lduration = (CTime::get_curtime () - ltime1); 00751 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00752 dual += ldual_increase; 00753 n_rep++; 00754 w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep; 00755 } 00756 else // Optimize here 00757 { 00758 float64_t ltime1 = CTime::get_curtime (); 00759 float64_t ldual_increase = optimize (); 00760 float64_t lduration = (CTime::get_curtime () - ltime1); 00761 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00762 dual += ldual_increase; 00763 n_opt++; 00764 w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt; 00765 } 00766 } 00767 if (nb_seen_examples % 100 == 0) // Cleanup useless Support Vectors/Patterns sometimes 00768 nb_removed += cleanup (); 00769 return pro_ret.ypred; 00770 } 00771 00772 // PREDICTION FUNCTION: main function in la_rank_classify 00773 int32_t CLaRank::predict (int32_t x_id) 00774 { 00775 int32_t res = -1; 00776 float64_t score_max = -DBL_MAX; 00777 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00778 { 00779 float64_t score = it->second.computeScore (x_id); 00780 if (score > score_max) 00781 { 00782 score_max = score; 00783 res = it->first; 00784 } 00785 } 00786 return res; 00787 } 00788 00789 void CLaRank::destroy () 00790 { 00791 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00792 it->second.destroy (); 00793 } 00794 00795 00796 // Compute Duality gap (costly but used in stopping criteria in batch mode) 00797 float64_t CLaRank::computeGap () 00798 { 00799 float64_t sum_sl = 0; 00800 float64_t sum_bi = 0; 00801 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00802 { 00803 const LaRankPattern & p = patterns[i]; 00804 if (!p.exists ()) 00805 continue; 00806 LaRankOutput *out = getOutput (p.y); 00807 if (!out) 00808 continue; 00809 sum_bi += out->getBeta (p.x_id); 00810 float64_t gi = out->computeGradient (p.x_id, p.y, p.y); 00811 float64_t gmin = DBL_MAX; 00812 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00813 { 00814 if (it->first != p.y && it->second.isSupportVector (p.x_id)) 00815 { 00816 float64_t g = 00817 it->second.computeGradient (p.x_id, p.y, it->first); 00818 if (g < gmin) 00819 gmin = g; 00820 } 00821 } 00822 sum_sl += CMath::max (0.0, gi - gmin); 00823 } 00824 return CMath::max (0.0, computeW2 () + C1 * sum_sl - sum_bi); 00825 } 00826 00827 // Nuber of classes so far 00828 uint32_t CLaRank::getNumOutputs () const 00829 { 00830 return outputs.size (); 00831 } 00832 00833 // Number of Support Vectors 00834 int32_t CLaRank::getNSV () 00835 { 00836 int32_t res = 0; 00837 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00838 { 00839 float32_t* sv=NULL; 00840 res += it->second.getSV (sv); 00841 SG_FREE(sv); 00842 } 00843 return res; 00844 } 00845 00846 // Norm of the parameters vector 00847 float64_t CLaRank::computeW2 () 00848 { 00849 float64_t res = 0; 00850 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00851 { 00852 const LaRankPattern & p = patterns[i]; 00853 if (!p.exists ()) 00854 continue; 00855 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00856 if (it->second.getBeta (p.x_id)) 00857 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id); 00858 } 00859 return res; 00860 } 00861 00862 // Compute Dual objective value 00863 float64_t CLaRank::getDual () 00864 { 00865 float64_t res = 0; 00866 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00867 { 00868 const LaRankPattern & p = patterns[i]; 00869 if (!p.exists ()) 00870 continue; 00871 LaRankOutput *out = getOutput (p.y); 00872 if (!out) 00873 continue; 00874 res += out->getBeta (p.x_id); 00875 } 00876 return res - computeW2 () / 2; 00877 } 00878 00879 LaRankOutput *CLaRank::getOutput (int32_t index) 00880 { 00881 outputhash_t::iterator it = outputs.find (index); 00882 return it == outputs.end ()? NULL : &it->second; 00883 } 00884 00885 // IMPORTANT Main SMO optimization step 00886 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype) 00887 { 00888 process_return_t pro_ret = process_return_t (0, 0); 00889 00890 /* 00891 ** compute gradient and sort 00892 */ 00893 std::vector < outputgradient_t > outputgradients; 00894 00895 outputgradients.reserve (getNumOutputs ()); 00896 00897 std::vector < outputgradient_t > outputscores; 00898 outputscores.reserve (getNumOutputs ()); 00899 00900 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00901 { 00902 if (ptype != processOptimize 00903 || it->second.isSupportVector (pattern.x_id)) 00904 { 00905 float64_t g = 00906 it->second.computeGradient (pattern.x_id, pattern.y, it->first); 00907 outputgradients.push_back (outputgradient_t (it->first, g)); 00908 if (it->first == pattern.y) 00909 outputscores.push_back (outputgradient_t (it->first, (1 - g))); 00910 else 00911 outputscores.push_back (outputgradient_t (it->first, -g)); 00912 } 00913 } 00914 00915 std::sort (outputgradients.begin (), outputgradients.end ()); 00916 00917 /* 00918 ** determine the prediction 00919 */ 00920 std::sort (outputscores.begin (), outputscores.end ()); 00921 pro_ret.ypred = outputscores[0].output; 00922 00923 /* 00924 ** Find yp (1st part of the pair) 00925 */ 00926 outputgradient_t ygp; 00927 LaRankOutput *outp = NULL; 00928 uint32_t p; 00929 for (p = 0; p < outputgradients.size (); ++p) 00930 { 00931 outputgradient_t & current = outputgradients[p]; 00932 LaRankOutput *output = getOutput (current.output); 00933 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00934 bool goodclass = current.output == pattern.y; 00935 if ((!support && goodclass) || 00936 (support && output->getBeta (pattern.x_id) < (goodclass ? C1 : 0))) 00937 { 00938 ygp = current; 00939 outp = output; 00940 break; 00941 } 00942 } 00943 if (p == outputgradients.size ()) 00944 return pro_ret; 00945 00946 /* 00947 ** Find ym (2nd part of the pair) 00948 */ 00949 outputgradient_t ygm; 00950 LaRankOutput *outm = NULL; 00951 int32_t m; 00952 for (m = outputgradients.size () - 1; m >= 0; --m) 00953 { 00954 outputgradient_t & current = outputgradients[m]; 00955 LaRankOutput *output = getOutput (current.output); 00956 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00957 bool goodclass = current.output == pattern.y; 00958 if (!goodclass || (support && output->getBeta (pattern.x_id) > 0)) 00959 { 00960 ygm = current; 00961 outm = output; 00962 break; 00963 } 00964 } 00965 if (m < 0) 00966 return pro_ret; 00967 00968 /* 00969 ** Throw or Insert pattern 00970 */ 00971 if ((ygp.gradient - ygm.gradient) < tau) 00972 return pro_ret; 00973 if (ptype == processNew) 00974 patterns.insert (pattern); 00975 00976 /* 00977 ** compute lambda and clip it 00978 */ 00979 float64_t kii = outp->getKii (pattern.x_id); 00980 float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii); 00981 if (ptype == processOptimize || outp->isSupportVector (pattern.x_id)) 00982 { 00983 float64_t beta = outp->getBeta (pattern.x_id); 00984 if (ygp.output == pattern.y) 00985 lambda = CMath::min (lambda, C1 - beta); 00986 else 00987 lambda = CMath::min (lambda, fabs (beta)); 00988 } 00989 else 00990 lambda = CMath::min (lambda, C1); 00991 00992 /* 00993 ** update the solution 00994 */ 00995 outp->update (pattern.x_id, lambda, ygp.gradient); 00996 outm->update (pattern.x_id, -lambda, ygm.gradient); 00997 00998 pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii); 00999 return pro_ret; 01000 } 01001 01002 // ProcessOld 01003 float64_t CLaRank::reprocess () 01004 { 01005 if (patterns.size ()) 01006 { 01007 for (int32_t n = 0; n < 10; ++n) 01008 { 01009 process_return_t pro_ret = process (patterns.sample (), processOld); 01010 if (pro_ret.dual_increase) 01011 return pro_ret.dual_increase; 01012 } 01013 } 01014 return 0; 01015 } 01016 01017 // Optimize 01018 float64_t CLaRank::optimize () 01019 { 01020 float64_t dual_increase = 0; 01021 if (patterns.size ()) 01022 { 01023 for (int32_t n = 0; n < 10; ++n) 01024 { 01025 process_return_t pro_ret = 01026 process (patterns.sample(), processOptimize); 01027 dual_increase += pro_ret.dual_increase; 01028 } 01029 } 01030 return dual_increase; 01031 } 01032 01033 // remove patterns and return the number of patterns that were removed 01034 uint32_t CLaRank::cleanup () 01035 { 01036 /* 01037 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 01038 it->second.cleanup (); 01039 01040 uint32_t res = 0; 01041 for (uint32_t i = 0; i < patterns.size (); ++i) 01042 { 01043 LaRankPattern & p = patterns[i]; 01044 if (p.exists () && !outputs[p.y].isSupportVector (p.x_id)) 01045 { 01046 patterns.remove (i); 01047 ++res; 01048 } 01049 } 01050 return res; 01051 */ 01052 return 0; 01053 }