presage  0.8.7
combiner.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                 **********(*)*/
00023 
00024 
00025 #include "combiner.h"
00026 #include "profile.h"
00027 
00028 #include <set>
00029 
00030 Combiner::Combiner()
00031 {
00032     // intentionally empty
00033 }
00034 
00035 Combiner::~Combiner()
00036 {
00037     // intentionally empty
00038 }
00039 
00040 Prediction Combiner::filter(const Prediction& prediction) const
00041 {
00042     Prediction result;
00043 
00044     std::set<std::string> seen_tokens;
00045 
00046     int size = prediction.size();
00047     Suggestion suggestion;
00048     std::string token;
00049     for (int i = 0; i < size; i++) {
00050         suggestion = prediction.getSuggestion(i);
00051         token = suggestion.getWord();
00052         //std::cerr << "[filter] token: " << token << std::endl;
00053         if (seen_tokens.find(token) == seen_tokens.end()) {
00054             // if token has not been seen before, then look for
00055             // potential duplicates and add the interpolated combined
00056             // probability and remember that this token has now been
00057             // processed
00058             //
00059             //std::cerr << "[filter] searching for possible duplicates" << std::endl;
00060             for (int j = i + 1; j < size; j++) {
00061                 if (suggestion.getWord() == prediction.getSuggestion(j).getWord()) {
00062                     double new_prob = suggestion.getProbability()
00063                         + prediction.getSuggestion(j).getProbability();
00064                     suggestion.setProbability((new_prob > Suggestion::MAX_PROBABILITY ? 
00065                                                Suggestion::MAX_PROBABILITY : new_prob));
00066                     //std::cerr << "[filter] duplicate found, adjusting probability" << std::endl;
00067                 }
00068             }
00069             seen_tokens.insert(suggestion.getWord());
00070             result.addSuggestion(suggestion);
00071             //std::cerr << "[filter] added token " << token << std::endl;
00072         }
00073     }
00074 
00075     return result;
00076 }