presage  0.8.7
abbreviationExpansionPredictor.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                              **********(*)*/
00023 
00024 
00025 #include "abbreviationExpansionPredictor.h"
00026 
00027 #include <fstream>
00028 
00029 
00030 const char* AbbreviationExpansionPredictor::LOGGER        = "Presage.Predictors.AbbreviationExpansionPredictor.LOGGER";
00031 const char* AbbreviationExpansionPredictor::ABBREVIATIONS = "Presage.Predictors.AbbreviationExpansionPredictor.ABBREVIATIONS";
00032 
00033 AbbreviationExpansionPredictor::AbbreviationExpansionPredictor(Configuration* config, ContextTracker* ct)
00034     : Predictor(config,
00035                 ct,
00036                 "AbbreviationExpansionPredictor",
00037                 "AbbreviationExpansionPredictor, maps abbreviations to the corresponding fully expanded token.",
00038                 "AbbreviationExpansionPredictor maps abbreviations to the corresponding fully expanded token (i.e. word or phrase).\n\nThe mapping between abbreviations and expansions is stored in the file specified by the predictor configuration section.\n\nThe format for the abbreviation-expansion database is a simple tab separated text file format, with each abbreviation-expansion pair per line."
00039         ),
00040       dispatcher (this)
00041 {
00042     // build notification dispatch map
00043     dispatcher.map (config->find (LOGGER), & AbbreviationExpansionPredictor::set_logger);
00044     dispatcher.map (config->find (ABBREVIATIONS), & AbbreviationExpansionPredictor::set_abbreviations);
00045 }
00046 
00047 AbbreviationExpansionPredictor::~AbbreviationExpansionPredictor()
00048 {
00049     // complete
00050 }
00051 
00052 
00053 void AbbreviationExpansionPredictor::set_abbreviations (const std::string& filename)
00054 {
00055     abbreviations = filename;
00056     logger << INFO << "ABBREVIATIONS:" << abbreviations << endl;
00057 
00058     cacheAbbreviationsExpansions();
00059 }
00060 
00061 
00062 Prediction AbbreviationExpansionPredictor::predict(const size_t max_partial_predictions_size, const char** filter) const
00063 {
00064     Prediction result;
00065 
00066     std::map< std::string, std::string >::const_iterator it = 
00067         cache.find(contextTracker->getPrefix());
00068 
00069     if (it != cache.end()){
00070         //result.addSuggestion(Suggestion(it->second, 1.0));
00071 
00072         // prepend expansion with enough backspaces to erase
00073         // abbreviation
00074         std::string expansion(contextTracker->getPrefix().size(), '\b');
00075 
00076         // concatenate actual expansion
00077         expansion += it->second;
00078 
00079         result.addSuggestion(Suggestion(expansion, 1.0));
00080 
00081     } else {
00082         logger << NOTICE << "Could not find expansion for abbreviation: " << contextTracker->getPrefix() << endl;
00083     }
00084 
00085     return result;
00086 }
00087 
00088 void AbbreviationExpansionPredictor::learn(const std::vector<std::string>& change)
00089 {}
00090 
00091 void AbbreviationExpansionPredictor::cacheAbbreviationsExpansions()
00092 {
00093     cache.clear();
00094 
00095     std::ifstream abbr_file(abbreviations.c_str());
00096     if (!abbr_file) {
00097         logger << ERROR << "Could not open abbreviations file: " << abbreviations << endl;
00098         // TODO: throw exception here
00099         //
00100 
00101     } else {
00102         logger << INFO << "Caching abbreviations/expansions from file: " << abbreviations << endl;
00103     
00104         std::string buffer;
00105         std::string abbreviation;
00106         std::string expansion;
00107         std::string::size_type tab_pos;
00108         while (getline(abbr_file, buffer)) {
00109             tab_pos = buffer.find_first_of('\t');
00110             if (tab_pos == std::string::npos) {
00111                 logger << ERROR << "Error reading abbreviations/expansions from file: " << abbreviations << endl;
00112             } else {
00113                 abbreviation = buffer.substr(0, tab_pos);
00114                 expansion    = buffer.substr(tab_pos + 1, std::string::npos);
00115 
00116                 logger << INFO << "Caching abbreviation: " << abbreviation << " - expansion: " << expansion << endl;
00117                 cache[abbreviation] = expansion;
00118             }
00119         }
00120         
00121         abbr_file.close();
00122     }
00123 }
00124 
00125 void AbbreviationExpansionPredictor::update (const Observable* var)
00126 {
00127     logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl;
00128     dispatcher.dispatch (var);
00129 }
00130