presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "abbreviationExpansionPredictor.h" 00026 00027 #include <fstream> 00028 00029 00030 const char* AbbreviationExpansionPredictor::LOGGER = "Presage.Predictors.AbbreviationExpansionPredictor.LOGGER"; 00031 const char* AbbreviationExpansionPredictor::ABBREVIATIONS = "Presage.Predictors.AbbreviationExpansionPredictor.ABBREVIATIONS"; 00032 00033 AbbreviationExpansionPredictor::AbbreviationExpansionPredictor(Configuration* config, ContextTracker* ct) 00034 : Predictor(config, 00035 ct, 00036 "AbbreviationExpansionPredictor", 00037 "AbbreviationExpansionPredictor, maps abbreviations to the corresponding fully expanded token.", 00038 "AbbreviationExpansionPredictor maps abbreviations to the corresponding fully expanded token (i.e. word or phrase).\n\nThe mapping between abbreviations and expansions is stored in the file specified by the predictor configuration section.\n\nThe format for the abbreviation-expansion database is a simple tab separated text file format, with each abbreviation-expansion pair per line." 00039 ), 00040 dispatcher (this) 00041 { 00042 // build notification dispatch map 00043 dispatcher.map (config->find (LOGGER), & AbbreviationExpansionPredictor::set_logger); 00044 dispatcher.map (config->find (ABBREVIATIONS), & AbbreviationExpansionPredictor::set_abbreviations); 00045 } 00046 00047 AbbreviationExpansionPredictor::~AbbreviationExpansionPredictor() 00048 { 00049 // complete 00050 } 00051 00052 00053 void AbbreviationExpansionPredictor::set_abbreviations (const std::string& filename) 00054 { 00055 abbreviations = filename; 00056 logger << INFO << "ABBREVIATIONS:" << abbreviations << endl; 00057 00058 cacheAbbreviationsExpansions(); 00059 } 00060 00061 00062 Prediction AbbreviationExpansionPredictor::predict(const size_t max_partial_predictions_size, const char** filter) const 00063 { 00064 Prediction result; 00065 00066 std::map< std::string, std::string >::const_iterator it = 00067 cache.find(contextTracker->getPrefix()); 00068 00069 if (it != cache.end()){ 00070 //result.addSuggestion(Suggestion(it->second, 1.0)); 00071 00072 // prepend expansion with enough backspaces to erase 00073 // abbreviation 00074 std::string expansion(contextTracker->getPrefix().size(), '\b'); 00075 00076 // concatenate actual expansion 00077 expansion += it->second; 00078 00079 result.addSuggestion(Suggestion(expansion, 1.0)); 00080 00081 } else { 00082 logger << NOTICE << "Could not find expansion for abbreviation: " << contextTracker->getPrefix() << endl; 00083 } 00084 00085 return result; 00086 } 00087 00088 void AbbreviationExpansionPredictor::learn(const std::vector<std::string>& change) 00089 {} 00090 00091 void AbbreviationExpansionPredictor::cacheAbbreviationsExpansions() 00092 { 00093 cache.clear(); 00094 00095 std::ifstream abbr_file(abbreviations.c_str()); 00096 if (!abbr_file) { 00097 logger << ERROR << "Could not open abbreviations file: " << abbreviations << endl; 00098 // TODO: throw exception here 00099 // 00100 00101 } else { 00102 logger << INFO << "Caching abbreviations/expansions from file: " << abbreviations << endl; 00103 00104 std::string buffer; 00105 std::string abbreviation; 00106 std::string expansion; 00107 std::string::size_type tab_pos; 00108 while (getline(abbr_file, buffer)) { 00109 tab_pos = buffer.find_first_of('\t'); 00110 if (tab_pos == std::string::npos) { 00111 logger << ERROR << "Error reading abbreviations/expansions from file: " << abbreviations << endl; 00112 } else { 00113 abbreviation = buffer.substr(0, tab_pos); 00114 expansion = buffer.substr(tab_pos + 1, std::string::npos); 00115 00116 logger << INFO << "Caching abbreviation: " << abbreviation << " - expansion: " << expansion << endl; 00117 cache[abbreviation] = expansion; 00118 } 00119 } 00120 00121 abbr_file.close(); 00122 } 00123 } 00124 00125 void AbbreviationExpansionPredictor::update (const Observable* var) 00126 { 00127 logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl; 00128 dispatcher.dispatch (var); 00129 } 00130