presage  0.8.7
contextTracker.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021     *
00022     **********(*)*/
00023 
00024 
00025 #include "contextTracker.h"
00026 #include "../utility.h"
00027 #include "../predictorRegistry.h"
00028 #include "../tokenizer/forwardTokenizer.h"
00029 
00030 #include <stdlib.h>  // for atoi()
00031 
00032 const char* ContextTracker::LOGGER = "Presage.ContextTracker.LOGGER";
00033 const char* ContextTracker::SLIDING_WINDOW_SIZE = "Presage.ContextTracker.SLIDING_WINDOW_SIZE";
00034 
00035 ContextTracker::ContextTracker(Configuration* config,
00036                                PredictorRegistry* registry,
00037                                PresageCallback* callback,
00038                                const char wChars[],
00039                                const char tChars[],
00040                                const char bChars[],
00041                                const char cChars[])
00042     : wordChars      (wChars),
00043       separatorChars (tChars),
00044       blankspaceChars(bChars),
00045       controlChars   (cChars),
00046       predictorRegistry (registry),
00047       logger         ("ContextTracker", std::cerr),
00048       //tokenizer      (pastStream, blankspaceChars, separatorChars),
00049       dispatcher     (this)
00050 {
00051     if (callback) {
00052         context_tracker_callback = callback;
00053     } else {
00054         throw new PresageException(PRESAGE_INVALID_CALLBACK_ERROR, "Invalid callback object");
00055     }
00056 
00057     contextChangeDetector = new ContextChangeDetector(wordChars,
00058                                                       separatorChars,
00059                                                       blankspaceChars,
00060                                                       controlChars);
00061 
00062     // set pointer to this context tracker in predictor registry so that
00063     // predictors can be constructed when next iterator is requested
00064     //
00065     if (predictorRegistry) {
00066         predictorRegistry->setContextTracker(this);
00067     }
00068 
00069     // build dispatch map
00070     dispatcher.map (config->find (LOGGER), & ContextTracker::set_logger);
00071     dispatcher.map (config->find (SLIDING_WINDOW_SIZE), & ContextTracker::set_sliding_window_size);
00072 
00073 }
00074 
00075 ContextTracker::~ContextTracker()
00076 {
00077     delete contextChangeDetector;
00078 }
00079 
00080 void ContextTracker::set_logger (const std::string& value)
00081 {
00082     logger << setlevel (value);
00083     logger << INFO << "LOGGER: " << value << endl;
00084 }
00085 
00086 void ContextTracker::set_sliding_window_size (const std::string& value)
00087 {
00088     contextChangeDetector->set_sliding_window_size (value);
00089     logger << INFO << "SLIDING_WINDOWS_SIZE: " << value << endl;
00090 }
00091 
00092 const PresageCallback* ContextTracker::callback(const PresageCallback* new_callback)
00093 {
00094     const PresageCallback* result = context_tracker_callback;
00095     if (new_callback) {
00096         context_tracker_callback = new_callback;
00097     }
00098     return result;
00099 }
00100 
00104 bool ContextTracker::contextChange()
00105 {
00106     return contextChangeDetector->context_change(getPastStream());
00107 }
00108 
00109 void ContextTracker::update()
00110 {
00111     std::stringstream change;
00112 
00113     // prepend partially entered token to change if exists, need to
00114     // look into sliding_window to get previously partially entered
00115     // token if it exists
00116     std::stringstream sliding_window_stream;
00117     sliding_window_stream << contextChangeDetector->get_sliding_window();
00118     ReverseTokenizer rTok(sliding_window_stream,
00119                           blankspaceChars,
00120                           separatorChars);
00121     std::string first_token = rTok.nextToken();
00122     if (!first_token.empty()) {
00123         change << first_token;
00124     }
00125 
00126     logger << DEBUG << "update(): getPastStream(): " << getPastStream() << endl;
00127 
00128     // append change detected by context change detector
00129     change << contextChangeDetector->change(getPastStream());
00130 
00131     logger << INFO << "update(): change: " << change.str() << endl;
00132 
00133     // split change up into tokens
00134     std::vector<std::string> change_tokens;
00135     ForwardTokenizer tok(change,
00136                          blankspaceChars,
00137                          separatorChars);
00138     logger << INFO << "update(): tokenized change: ";
00139     while (tok.hasMoreTokens()) {
00140         std::string token = tok.nextToken();
00141         change_tokens.push_back(token);
00142         logger << INFO << token << ':';
00143     }
00144     logger << INFO << endl;
00145 
00146     if (! change_tokens.empty()) {
00147         // remove prefix (partially entered token or empty token)
00148         change_tokens.pop_back();
00149     }
00150 
00151     logger << INFO << "update(): change tokens: ";
00152     for (std::vector<std::string>::const_iterator it = change_tokens.begin();
00153          it != change_tokens.end();
00154          it++) {
00155         logger << INFO << *it << ':';
00156     }
00157     logger << INFO << endl;
00158 
00159     // time to learn
00160     PredictorRegistry::Iterator it = predictorRegistry->iterator();
00161     Predictor* predictor = 0;
00162 
00163     while (it.hasNext()) {
00164         predictor = it.next();
00165         predictor->learn(change_tokens);
00166     }
00167 
00168     // update sliding window
00169     contextChangeDetector->update_sliding_window(getPastStream());
00170 }
00171 
00172 std::string ContextTracker::getPrefix() const
00173 {
00174     return getToken(0);
00175 }
00176 
00177 std::string ContextTracker::getToken(const int index) const
00178 {
00179     std::stringstream pastStringStream(context_tracker_callback->get_past_stream());
00180     ReverseTokenizer tokenizer(pastStringStream, blankspaceChars, separatorChars);
00181 
00182     std::string token;
00183     int i = 0;
00184     while (tokenizer.hasMoreTokens() && i <= index) {
00185         token = tokenizer.nextToken();
00186         i++;
00187     }
00188     if (i <= index) {
00189         // in case the index points too far back
00190         token = "";
00191     }
00192     return token;
00193 
00195 //    "a b c"
00196 //     2 1 0
00197 //     0 1 2
00198 //     1 2 3
00199 //
00200 //    ForwardTokenizer tokenizer(pastStream, blankspaceChars, separatorChars);
00201 //    std::string result;
00202 //    int tokens = tokenizer.countTokens();
00203 //    // why oh why is this clear() required to get it to work???
00204 //    pastStream.clear();
00205 //    int j = 0;
00206 //    while (tokenizer.hasMoreTokens() && j < tokens - index) {
00207 //      result = tokenizer.nextToken();
00208 //      j++;
00209 //
00210 //      std::cerr << "ContextTracker::getToken() current token: " << result << std::endl;
00211 //    }
00212 //    return result;
00213 }
00214 
00215 std::string ContextTracker::getSlidingWindowToken(const int index) const
00216 {
00217     std::stringstream slidingWindowStream(contextChangeDetector->get_sliding_window());
00218     ReverseTokenizer tokenizer(slidingWindowStream, blankspaceChars, separatorChars);
00219 
00220     std::string token;
00221     int i = 0;
00222     while (tokenizer.hasMoreTokens() && i <= index) {
00223         token = tokenizer.nextToken();
00224         i++;
00225     }
00226     if (i <= index) {
00227         // in case the index points too far back
00228         token = "";
00229     }
00230     return token;
00231 }
00232 
00233 std::string ContextTracker::getFutureStream() const
00234 {
00235     return context_tracker_callback->get_future_stream();
00236 }
00237 
00238 std::string ContextTracker::getPastStream() const
00239 {
00240     std::string result = context_tracker_callback->get_past_stream();
00241     return result;
00242 }
00243 
00244 bool ContextTracker::isCompletionValid(const std::string& completion) const
00245 {
00246     bool result = false;
00247 
00248     std::string prefix = getPrefix();
00249     prefix = Utility::strtolower(prefix);  // no need to be case sensitive
00250     if (completion.find(prefix) == 0) {
00251         result = true;
00252     }
00253 
00254     return result;
00255 }
00256 
00257 bool ContextTracker::isWordChar(const char c) const
00258 {
00259     if(wordChars.find(c, 0) != std::string::npos)
00260         return true;
00261     else
00262         return false;
00263 }
00264 
00265 bool ContextTracker::isSeparatorChar(const char c) const
00266 {
00267     if(separatorChars.find(c, 0) != std::string::npos)
00268         return true;
00269     else
00270         return false;
00271 }
00272 
00273 bool ContextTracker::isBlankspaceChar(const char c) const
00274 {
00275     if(blankspaceChars.find(c, 0) != std::string::npos)
00276         return true;
00277     else
00278         return false;
00279 }
00280 
00281 bool ContextTracker::isControlChar(const char c) const
00282 {
00283     if(controlChars.find(c, 0) != std::string::npos)
00284         return true;
00285     else
00286         return false;
00287 }
00288 
00289 std::string ContextTracker::getWordChars() const
00290 {
00291     return wordChars;
00292 }
00293 
00294 std::string ContextTracker::getSeparatorChars() const
00295 {
00296     return separatorChars;
00297 }
00298 
00299 std::string ContextTracker::getBlankspaceChars() const
00300 {
00301     return blankspaceChars;
00302 }
00303 
00304 std::string ContextTracker::getControlChars() const
00305 {
00306     return controlChars;
00307 }
00308 
00309 std::string ContextTracker::toString() const
00310 {
00311     return context_tracker_callback->get_past_stream() + "<|>" + context_tracker_callback->get_future_stream() + "\n";
00312 }
00313 
00314 void ContextTracker::update (const Observable* variable)
00315 {
00316     logger << DEBUG << "Notification received: "
00317            << variable->get_name () << " - " << variable->get_value () << endl;
00318 
00319     dispatcher.dispatch (variable);
00320 }