presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "contextTracker.h" 00026 #include "../utility.h" 00027 #include "../predictorRegistry.h" 00028 #include "../tokenizer/forwardTokenizer.h" 00029 00030 #include <stdlib.h> // for atoi() 00031 00032 const char* ContextTracker::LOGGER = "Presage.ContextTracker.LOGGER"; 00033 const char* ContextTracker::SLIDING_WINDOW_SIZE = "Presage.ContextTracker.SLIDING_WINDOW_SIZE"; 00034 00035 ContextTracker::ContextTracker(Configuration* config, 00036 PredictorRegistry* registry, 00037 PresageCallback* callback, 00038 const char wChars[], 00039 const char tChars[], 00040 const char bChars[], 00041 const char cChars[]) 00042 : wordChars (wChars), 00043 separatorChars (tChars), 00044 blankspaceChars(bChars), 00045 controlChars (cChars), 00046 predictorRegistry (registry), 00047 logger ("ContextTracker", std::cerr), 00048 //tokenizer (pastStream, blankspaceChars, separatorChars), 00049 dispatcher (this) 00050 { 00051 if (callback) { 00052 context_tracker_callback = callback; 00053 } else { 00054 throw new PresageException(PRESAGE_INVALID_CALLBACK_ERROR, "Invalid callback object"); 00055 } 00056 00057 contextChangeDetector = new ContextChangeDetector(wordChars, 00058 separatorChars, 00059 blankspaceChars, 00060 controlChars); 00061 00062 // set pointer to this context tracker in predictor registry so that 00063 // predictors can be constructed when next iterator is requested 00064 // 00065 if (predictorRegistry) { 00066 predictorRegistry->setContextTracker(this); 00067 } 00068 00069 // build dispatch map 00070 dispatcher.map (config->find (LOGGER), & ContextTracker::set_logger); 00071 dispatcher.map (config->find (SLIDING_WINDOW_SIZE), & ContextTracker::set_sliding_window_size); 00072 00073 } 00074 00075 ContextTracker::~ContextTracker() 00076 { 00077 delete contextChangeDetector; 00078 } 00079 00080 void ContextTracker::set_logger (const std::string& value) 00081 { 00082 logger << setlevel (value); 00083 logger << INFO << "LOGGER: " << value << endl; 00084 } 00085 00086 void ContextTracker::set_sliding_window_size (const std::string& value) 00087 { 00088 contextChangeDetector->set_sliding_window_size (value); 00089 logger << INFO << "SLIDING_WINDOWS_SIZE: " << value << endl; 00090 } 00091 00092 const PresageCallback* ContextTracker::callback(const PresageCallback* new_callback) 00093 { 00094 const PresageCallback* result = context_tracker_callback; 00095 if (new_callback) { 00096 context_tracker_callback = new_callback; 00097 } 00098 return result; 00099 } 00100 00104 bool ContextTracker::contextChange() 00105 { 00106 return contextChangeDetector->context_change(getPastStream()); 00107 } 00108 00109 void ContextTracker::update() 00110 { 00111 std::stringstream change; 00112 00113 // prepend partially entered token to change if exists, need to 00114 // look into sliding_window to get previously partially entered 00115 // token if it exists 00116 std::stringstream sliding_window_stream; 00117 sliding_window_stream << contextChangeDetector->get_sliding_window(); 00118 ReverseTokenizer rTok(sliding_window_stream, 00119 blankspaceChars, 00120 separatorChars); 00121 std::string first_token = rTok.nextToken(); 00122 if (!first_token.empty()) { 00123 change << first_token; 00124 } 00125 00126 logger << DEBUG << "update(): getPastStream(): " << getPastStream() << endl; 00127 00128 // append change detected by context change detector 00129 change << contextChangeDetector->change(getPastStream()); 00130 00131 logger << INFO << "update(): change: " << change.str() << endl; 00132 00133 // split change up into tokens 00134 std::vector<std::string> change_tokens; 00135 ForwardTokenizer tok(change, 00136 blankspaceChars, 00137 separatorChars); 00138 logger << INFO << "update(): tokenized change: "; 00139 while (tok.hasMoreTokens()) { 00140 std::string token = tok.nextToken(); 00141 change_tokens.push_back(token); 00142 logger << INFO << token << ':'; 00143 } 00144 logger << INFO << endl; 00145 00146 if (! change_tokens.empty()) { 00147 // remove prefix (partially entered token or empty token) 00148 change_tokens.pop_back(); 00149 } 00150 00151 logger << INFO << "update(): change tokens: "; 00152 for (std::vector<std::string>::const_iterator it = change_tokens.begin(); 00153 it != change_tokens.end(); 00154 it++) { 00155 logger << INFO << *it << ':'; 00156 } 00157 logger << INFO << endl; 00158 00159 // time to learn 00160 PredictorRegistry::Iterator it = predictorRegistry->iterator(); 00161 Predictor* predictor = 0; 00162 00163 while (it.hasNext()) { 00164 predictor = it.next(); 00165 predictor->learn(change_tokens); 00166 } 00167 00168 // update sliding window 00169 contextChangeDetector->update_sliding_window(getPastStream()); 00170 } 00171 00172 std::string ContextTracker::getPrefix() const 00173 { 00174 return getToken(0); 00175 } 00176 00177 std::string ContextTracker::getToken(const int index) const 00178 { 00179 std::stringstream pastStringStream(context_tracker_callback->get_past_stream()); 00180 ReverseTokenizer tokenizer(pastStringStream, blankspaceChars, separatorChars); 00181 00182 std::string token; 00183 int i = 0; 00184 while (tokenizer.hasMoreTokens() && i <= index) { 00185 token = tokenizer.nextToken(); 00186 i++; 00187 } 00188 if (i <= index) { 00189 // in case the index points too far back 00190 token = ""; 00191 } 00192 return token; 00193 00195 // "a b c" 00196 // 2 1 0 00197 // 0 1 2 00198 // 1 2 3 00199 // 00200 // ForwardTokenizer tokenizer(pastStream, blankspaceChars, separatorChars); 00201 // std::string result; 00202 // int tokens = tokenizer.countTokens(); 00203 // // why oh why is this clear() required to get it to work??? 00204 // pastStream.clear(); 00205 // int j = 0; 00206 // while (tokenizer.hasMoreTokens() && j < tokens - index) { 00207 // result = tokenizer.nextToken(); 00208 // j++; 00209 // 00210 // std::cerr << "ContextTracker::getToken() current token: " << result << std::endl; 00211 // } 00212 // return result; 00213 } 00214 00215 std::string ContextTracker::getSlidingWindowToken(const int index) const 00216 { 00217 std::stringstream slidingWindowStream(contextChangeDetector->get_sliding_window()); 00218 ReverseTokenizer tokenizer(slidingWindowStream, blankspaceChars, separatorChars); 00219 00220 std::string token; 00221 int i = 0; 00222 while (tokenizer.hasMoreTokens() && i <= index) { 00223 token = tokenizer.nextToken(); 00224 i++; 00225 } 00226 if (i <= index) { 00227 // in case the index points too far back 00228 token = ""; 00229 } 00230 return token; 00231 } 00232 00233 std::string ContextTracker::getFutureStream() const 00234 { 00235 return context_tracker_callback->get_future_stream(); 00236 } 00237 00238 std::string ContextTracker::getPastStream() const 00239 { 00240 std::string result = context_tracker_callback->get_past_stream(); 00241 return result; 00242 } 00243 00244 bool ContextTracker::isCompletionValid(const std::string& completion) const 00245 { 00246 bool result = false; 00247 00248 std::string prefix = getPrefix(); 00249 prefix = Utility::strtolower(prefix); // no need to be case sensitive 00250 if (completion.find(prefix) == 0) { 00251 result = true; 00252 } 00253 00254 return result; 00255 } 00256 00257 bool ContextTracker::isWordChar(const char c) const 00258 { 00259 if(wordChars.find(c, 0) != std::string::npos) 00260 return true; 00261 else 00262 return false; 00263 } 00264 00265 bool ContextTracker::isSeparatorChar(const char c) const 00266 { 00267 if(separatorChars.find(c, 0) != std::string::npos) 00268 return true; 00269 else 00270 return false; 00271 } 00272 00273 bool ContextTracker::isBlankspaceChar(const char c) const 00274 { 00275 if(blankspaceChars.find(c, 0) != std::string::npos) 00276 return true; 00277 else 00278 return false; 00279 } 00280 00281 bool ContextTracker::isControlChar(const char c) const 00282 { 00283 if(controlChars.find(c, 0) != std::string::npos) 00284 return true; 00285 else 00286 return false; 00287 } 00288 00289 std::string ContextTracker::getWordChars() const 00290 { 00291 return wordChars; 00292 } 00293 00294 std::string ContextTracker::getSeparatorChars() const 00295 { 00296 return separatorChars; 00297 } 00298 00299 std::string ContextTracker::getBlankspaceChars() const 00300 { 00301 return blankspaceChars; 00302 } 00303 00304 std::string ContextTracker::getControlChars() const 00305 { 00306 return controlChars; 00307 } 00308 00309 std::string ContextTracker::toString() const 00310 { 00311 return context_tracker_callback->get_past_stream() + "<|>" + context_tracker_callback->get_future_stream() + "\n"; 00312 } 00313 00314 void ContextTracker::update (const Observable* variable) 00315 { 00316 logger << DEBUG << "Notification received: " 00317 << variable->get_name () << " - " << variable->get_value () << endl; 00318 00319 dispatcher.dispatch (variable); 00320 }