presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "recencyPredictor.h" 00026 00027 #include <math.h> // for exp() 00028 00029 // RecencyPredictor config variables 00030 const char* RecencyPredictor::LOGGER = "Presage.Predictors.RecencyPredictor.LOGGER"; 00031 const char* RecencyPredictor::LAMBDA = "Presage.Predictors.RecencyPredictor.LAMBDA"; 00032 const char* RecencyPredictor::N_0 = "Presage.Predictors.RecencyPredictor.N_0"; 00033 const char* RecencyPredictor::CUTOFF_THRESHOLD = "Presage.Predictors.RecencyPredictor.CUTOFF_THRESHOLD"; 00034 00035 RecencyPredictor::RecencyPredictor(Configuration* config, ContextTracker* ct) 00036 : Predictor(config, 00037 ct, 00038 "RecencyPredictor", 00039 "RecencyPredictor, a statistical recency promotion predictor", 00040 "RecencyPredictor, based on a recency promotion principle, generates predictions by assigning exponentially decaying probability values to previously encountered tokens. Tokens are assigned a probability value that decays exponentially with their distance from the current token, thereby promoting context recency." ), 00041 dispatcher (this) 00042 { 00043 // init default values 00044 lambda = 1; 00045 n_0 = 1; 00046 cutoff_threshold = 20; 00047 00048 dispatcher.map(config->find (LOGGER), &RecencyPredictor::set_logger); 00049 dispatcher.map(config->find (LAMBDA), &RecencyPredictor::set_lambda); 00050 dispatcher.map(config->find (N_0), &RecencyPredictor::set_n_0); 00051 dispatcher.map(config->find (CUTOFF_THRESHOLD), &RecencyPredictor::set_cutoff_threshold); 00052 } 00053 00054 RecencyPredictor::~RecencyPredictor() 00055 { 00056 // complete 00057 } 00058 00059 void RecencyPredictor::set_lambda (const std::string& value) 00060 { 00061 lambda = Utility::toDouble(value); 00062 logger << INFO << "LAMBDA: " << value << endl; 00063 } 00064 00065 void RecencyPredictor::set_n_0 (const std::string& value) 00066 { 00067 n_0 = Utility::toDouble (value); 00068 logger << INFO << "N_0: " << value << endl; 00069 } 00070 00071 00072 void RecencyPredictor::set_cutoff_threshold (const std::string& value) 00073 { 00074 cutoff_threshold = Utility::toInt (value); 00075 logger << INFO << "CUTOFF_THRESHOLD: " << value << endl; 00076 } 00077 00078 00079 Prediction RecencyPredictor::predict(const size_t max, const char** filter) const 00080 { 00081 Prediction result; 00082 00083 std::string prefix = contextTracker->getPrefix(); 00084 logger << INFO << "prefix: " << prefix << endl; 00085 if (!prefix.empty()) { 00086 // Only build recency prediction if prefix is not empty: when 00087 // prefix is empty, all previosly seen tokens are candidates 00088 // for prediction. This is not desirable, because it means 00089 // that recency prediction reduces to repetion of max previous 00090 // tokens (i.e. the prediction would contain the most recent 00091 // tokens in reverse order). 00092 // 00093 Suggestion suggestion; 00094 size_t index = 1; 00095 std::string token = contextTracker->getToken(index); 00096 double prob = 0; 00097 while (!token.empty() // context history exhausted 00098 && result.size() < max // need only max suggestions 00099 && index <= cutoff_threshold // look back only as far as cutoff 00100 ) { 00101 logger << INFO << "token: " << token << endl; 00102 00103 if (token.find(prefix) == 0) { // if token starts with prefix 00104 // compute probability according to exponential decay 00105 // formula 00106 // 00107 prob = n_0 * exp(-(lambda * (index - 1))); 00108 logger << INFO << "probability: " << prob << endl; 00109 suggestion.setWord(token); 00110 suggestion.setProbability(prob); 00111 result.addSuggestion(suggestion); 00112 } 00113 00114 index++; 00115 token = contextTracker->getToken(index); 00116 } 00117 } 00118 00119 return result; 00120 } 00121 00122 void RecencyPredictor::learn(const std::vector<std::string>& change) 00123 {} 00124 00125 void RecencyPredictor::update (const Observable* var) 00126 { 00127 logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl; 00128 dispatcher.dispatch (var); 00129 }