presage  0.8.7
recencyPredictor.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                 **********(*)*/
00023 
00024 
00025 #include "recencyPredictor.h"
00026 
00027 #include <math.h>  // for exp()
00028 
00029 // RecencyPredictor config variables
00030 const char* RecencyPredictor::LOGGER           = "Presage.Predictors.RecencyPredictor.LOGGER";
00031 const char* RecencyPredictor::LAMBDA           = "Presage.Predictors.RecencyPredictor.LAMBDA";
00032 const char* RecencyPredictor::N_0              = "Presage.Predictors.RecencyPredictor.N_0";
00033 const char* RecencyPredictor::CUTOFF_THRESHOLD = "Presage.Predictors.RecencyPredictor.CUTOFF_THRESHOLD";
00034 
00035 RecencyPredictor::RecencyPredictor(Configuration* config, ContextTracker* ct)
00036     : Predictor(config,
00037                 ct,
00038                 "RecencyPredictor",
00039                 "RecencyPredictor, a statistical recency promotion predictor",
00040                 "RecencyPredictor, based on a recency promotion principle, generates predictions by assigning exponentially decaying probability values to previously encountered tokens. Tokens are assigned a probability value that decays exponentially with their distance from the current token, thereby promoting context recency." ),
00041       dispatcher (this)
00042 {
00043     // init default values
00044     lambda = 1;
00045     n_0 = 1;
00046     cutoff_threshold = 20;
00047 
00048     dispatcher.map(config->find (LOGGER),            &RecencyPredictor::set_logger);
00049     dispatcher.map(config->find (LAMBDA),            &RecencyPredictor::set_lambda);
00050     dispatcher.map(config->find (N_0),               &RecencyPredictor::set_n_0);
00051     dispatcher.map(config->find (CUTOFF_THRESHOLD),  &RecencyPredictor::set_cutoff_threshold);
00052 }
00053 
00054 RecencyPredictor::~RecencyPredictor()
00055 {
00056     // complete
00057 }
00058 
00059 void RecencyPredictor::set_lambda (const std::string& value)
00060 {
00061     lambda = Utility::toDouble(value);
00062     logger << INFO << "LAMBDA: " << value << endl;
00063 }
00064 
00065 void RecencyPredictor::set_n_0 (const std::string& value)
00066 {
00067     n_0 = Utility::toDouble (value);
00068     logger << INFO << "N_0: " << value << endl;
00069 }
00070 
00071 
00072 void RecencyPredictor::set_cutoff_threshold (const std::string& value)
00073 {
00074     cutoff_threshold = Utility::toInt (value);
00075     logger << INFO << "CUTOFF_THRESHOLD: " << value << endl;
00076 }
00077 
00078 
00079 Prediction RecencyPredictor::predict(const size_t max, const char** filter) const
00080 {
00081     Prediction result;
00082 
00083     std::string prefix = contextTracker->getPrefix();
00084     logger << INFO << "prefix: " << prefix << endl;
00085     if (!prefix.empty()) {
00086         // Only build recency prediction if prefix is not empty: when
00087         // prefix is empty, all previosly seen tokens are candidates
00088         // for prediction. This is not desirable, because it means
00089         // that recency prediction reduces to repetion of max previous
00090         // tokens (i.e. the prediction would contain the most recent
00091         // tokens in reverse order).
00092         //
00093         Suggestion  suggestion;
00094         size_t      index = 1;
00095         std::string token = contextTracker->getToken(index);
00096         double      prob = 0;
00097         while (!token.empty()                // context history exhausted
00098                && result.size() < max        // need only max suggestions
00099                && index <= cutoff_threshold  // look back only as far as cutoff
00100             ) {
00101             logger << INFO << "token: " << token << endl;
00102 
00103             if (token.find(prefix) == 0) { // if token starts with prefix
00104                 // compute probability according to exponential decay
00105                 // formula
00106                 //
00107                 prob = n_0 * exp(-(lambda * (index - 1)));
00108                 logger << INFO << "probability: " << prob << endl;
00109                 suggestion.setWord(token);
00110                 suggestion.setProbability(prob);
00111                 result.addSuggestion(suggestion);
00112             }
00113 
00114             index++;
00115             token = contextTracker->getToken(index);
00116         }
00117     }
00118 
00119     return result;
00120 }
00121 
00122 void RecencyPredictor::learn(const std::vector<std::string>& change)
00123 {}
00124 
00125 void RecencyPredictor::update (const Observable* var)
00126 {
00127     logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl;
00128     dispatcher.dispatch (var);
00129 }