presage
0.8.7
|
#include <ARPAPredictor.h>
Public Member Functions | |
ARPAPredictor (Configuration *, ContextTracker *) | |
~ARPAPredictor () | |
virtual Prediction | predict (const size_t size, const char **filter) const |
Generate prediction. | |
virtual void | learn (const std::vector< std::string > &change) |
virtual void | update (const Observable *variable) |
void | set_vocab_filename (const std::string &value) |
void | set_arpa_filename (const std::string &value) |
void | set_timeout (const std::string &value) |
Private Member Functions | |
void | loadVocabulary () |
void | createARPATable () |
bool | matchesPrefixAndFilter (std::string, std::string, const char **) const |
void | addUnigram (std::string) |
void | addBigram (std::string) |
void | addTrigram (std::string) |
float | computeTrigramBackoff (int, int, int) const |
float | computeBigramBackoff (int, int) const |
Private Attributes | |
std::string | arpaFilename |
std::string | vocabFilename |
int | timeout |
std::map< std::string, int > | vocabCode |
std::map< int, std::string > | vocabDecode |
std::map< int, ARPAData > | unigramMap |
std::map< BigramKey, ARPAData > | bigramMap |
std::map< TrigramKey, float > | trigramMap |
int | unigramCount |
int | bigramCount |
int | trigramCount |
int | unigramTot |
int | bigramTot |
int | trigramTot |
ProgressBar< char > * | unigramProg |
ProgressBar< char > * | bigramProg |
ProgressBar< char > * | trigramProg |
Dispatcher< ARPAPredictor > | dispatcher |
Static Private Attributes | |
static const char * | LOGGER = "Presage.Predictors.ARPAPredictor.LOGGER" |
static const char * | ARPAFILENAME = "Presage.Predictors.ARPAPredictor.ARPAFILENAME" |
static const char * | VOCABFILENAME = "Presage.Predictors.ARPAPredictor.VOCABFILENAME" |
static const char * | TIMEOUT = "Presage.Predictors.ARPAPredictor.TIMEOUT" |
Smoothed n-gram statistical predictor.
Definition at line 112 of file ARPAPredictor.h.
ARPAPredictor::ARPAPredictor | ( | Configuration * | config, |
ContextTracker * | ct | ||
) |
Definition at line 42 of file ARPAPredictor.cpp.
References ARPAFILENAME, createARPATable(), dispatcher, Configuration::find(), loadVocabulary(), LOGGER, Dispatcher< class_t >::map(), set_arpa_filename(), Predictor::set_logger(), set_timeout(), set_vocab_filename(), TIMEOUT, and VOCABFILENAME.
Definition at line 298 of file ARPAPredictor.cpp.
References bigramProg, trigramProg, and unigramProg.
void ARPAPredictor::addBigram | ( | std::string | row | ) | [private] |
Definition at line 241 of file ARPAPredictor.cpp.
References bigramCount, bigramMap, bigramProg, bigramTot, OOV, ProgressBar< _charT, _Traits >::update(), and vocabCode.
Referenced by createARPATable().
void ARPAPredictor::addTrigram | ( | std::string | row | ) | [private] |
Definition at line 268 of file ARPAPredictor.cpp.
References OOV, trigramCount, trigramMap, trigramProg, trigramTot, ProgressBar< _charT, _Traits >::update(), and vocabCode.
Referenced by createARPATable().
void ARPAPredictor::addUnigram | ( | std::string | row | ) | [private] |
Definition at line 214 of file ARPAPredictor.cpp.
References OOV, unigramCount, unigramMap, unigramProg, unigramTot, ProgressBar< _charT, _Traits >::update(), and vocabCode.
Referenced by createARPATable().
float ARPAPredictor::computeBigramBackoff | ( | int | wd1, |
int | wd2 | ||
) | const [inline, private] |
Computes P( wd2 | wd1 )
Definition at line 443 of file ARPAPredictor.cpp.
References bigramMap, and unigramMap.
Referenced by computeTrigramBackoff(), and predict().
float ARPAPredictor::computeTrigramBackoff | ( | int | wd1, |
int | wd2, | ||
int | wd3 | ||
) | const [inline, private] |
Computes P(wd3 | wd1 wd2)
Definition at line 409 of file ARPAPredictor.cpp.
References bigramMap, computeBigramBackoff(), endl(), Predictor::logger, trigramMap, and vocabDecode.
Referenced by predict().
void ARPAPredictor::createARPATable | ( | ) | [private] |
Definition at line 106 of file ARPAPredictor.cpp.
References addBigram(), addTrigram(), addUnigram(), arpaFilename, bigramCount, bigramProg, bigramTot, endl(), Predictor::logger, trigramCount, trigramProg, trigramTot, unigramCount, unigramProg, and unigramTot.
Referenced by ARPAPredictor().
void ARPAPredictor::learn | ( | const std::vector< std::string > & | change | ) | [virtual] |
Implements Predictor.
Definition at line 455 of file ARPAPredictor.cpp.
References endl(), and Predictor::logger.
void ARPAPredictor::loadVocabulary | ( | ) | [private] |
Definition at line 79 of file ARPAPredictor.cpp.
References endl(), Predictor::logger, vocabCode, vocabDecode, and vocabFilename.
Referenced by ARPAPredictor().
bool ARPAPredictor::matchesPrefixAndFilter | ( | std::string | word, |
std::string | prefix, | ||
const char ** | filter | ||
) | const [private] |
Definition at line 305 of file ARPAPredictor.cpp.
Referenced by predict().
Prediction ARPAPredictor::predict | ( | const size_t | size, |
const char ** | filter | ||
) | const [virtual] |
Generate prediction.
size | desired prediction size |
filter | filter |
note if we have not tokens to compute 3-gram probabilities we compute 2-gram or 1-gram probabilities. the following code might be repetitive but more efficient than having the main loop outside.
Implements Predictor.
Definition at line 320 of file ARPAPredictor.cpp.
References Prediction::addSuggestion(), computeBigramBackoff(), computeTrigramBackoff(), Predictor::contextTracker, endl(), ContextTracker::getToken(), Predictor::logger, matchesPrefixAndFilter(), Utility::strtolower(), unigramMap, vocabCode, and vocabDecode.
void ARPAPredictor::set_arpa_filename | ( | const std::string & | value | ) |
Definition at line 67 of file ARPAPredictor.cpp.
References arpaFilename, endl(), and Predictor::logger.
Referenced by ARPAPredictor().
void ARPAPredictor::set_timeout | ( | const std::string & | value | ) |
Definition at line 73 of file ARPAPredictor.cpp.
References endl(), Predictor::logger, and timeout.
Referenced by ARPAPredictor().
void ARPAPredictor::set_vocab_filename | ( | const std::string & | value | ) |
Definition at line 61 of file ARPAPredictor.cpp.
References endl(), Predictor::logger, and vocabFilename.
Referenced by ARPAPredictor().
void ARPAPredictor::update | ( | const Observable * | variable | ) | [virtual] |
Implements Observer.
Definition at line 461 of file ARPAPredictor.cpp.
References Dispatcher< class_t >::dispatch(), dispatcher, endl(), Observable::get_name(), Observable::get_value(), and Predictor::logger.
const char * ARPAPredictor::ARPAFILENAME = "Presage.Predictors.ARPAPredictor.ARPAFILENAME" [static, private] |
Definition at line 130 of file ARPAPredictor.h.
Referenced by ARPAPredictor().
std::string ARPAPredictor::arpaFilename [private] |
Definition at line 134 of file ARPAPredictor.h.
Referenced by createARPATable(), and set_arpa_filename().
int ARPAPredictor::bigramCount [private] |
Definition at line 157 of file ARPAPredictor.h.
Referenced by addBigram(), and createARPATable().
std::map<BigramKey,ARPAData> ARPAPredictor::bigramMap [private] |
Definition at line 142 of file ARPAPredictor.h.
Referenced by addBigram(), computeBigramBackoff(), and computeTrigramBackoff().
ProgressBar<char>* ARPAPredictor::bigramProg [private] |
Definition at line 165 of file ARPAPredictor.h.
Referenced by addBigram(), createARPATable(), and ~ARPAPredictor().
int ARPAPredictor::bigramTot [private] |
Definition at line 161 of file ARPAPredictor.h.
Referenced by addBigram(), and createARPATable().
Dispatcher<ARPAPredictor> ARPAPredictor::dispatcher [private] |
Definition at line 168 of file ARPAPredictor.h.
Referenced by ARPAPredictor(), and update().
const char * ARPAPredictor::LOGGER = "Presage.Predictors.ARPAPredictor.LOGGER" [static, private] |
Definition at line 129 of file ARPAPredictor.h.
Referenced by ARPAPredictor().
const char * ARPAPredictor::TIMEOUT = "Presage.Predictors.ARPAPredictor.TIMEOUT" [static, private] |
Definition at line 132 of file ARPAPredictor.h.
Referenced by ARPAPredictor().
int ARPAPredictor::timeout [private] |
Definition at line 136 of file ARPAPredictor.h.
Referenced by set_timeout().
int ARPAPredictor::trigramCount [private] |
Definition at line 158 of file ARPAPredictor.h.
Referenced by addTrigram(), and createARPATable().
std::map<TrigramKey,float> ARPAPredictor::trigramMap [private] |
Definition at line 143 of file ARPAPredictor.h.
Referenced by addTrigram(), and computeTrigramBackoff().
ProgressBar<char>* ARPAPredictor::trigramProg [private] |
Definition at line 166 of file ARPAPredictor.h.
Referenced by addTrigram(), createARPATable(), and ~ARPAPredictor().
int ARPAPredictor::trigramTot [private] |
Definition at line 162 of file ARPAPredictor.h.
Referenced by addTrigram(), and createARPATable().
int ARPAPredictor::unigramCount [private] |
Definition at line 156 of file ARPAPredictor.h.
Referenced by addUnigram(), and createARPATable().
std::map<int,ARPAData> ARPAPredictor::unigramMap [private] |
Definition at line 141 of file ARPAPredictor.h.
Referenced by addUnigram(), computeBigramBackoff(), and predict().
ProgressBar<char>* ARPAPredictor::unigramProg [private] |
Definition at line 164 of file ARPAPredictor.h.
Referenced by addUnigram(), createARPATable(), and ~ARPAPredictor().
int ARPAPredictor::unigramTot [private] |
Definition at line 160 of file ARPAPredictor.h.
Referenced by addUnigram(), and createARPATable().
std::map<std::string,int> ARPAPredictor::vocabCode [private] |
Definition at line 138 of file ARPAPredictor.h.
Referenced by addBigram(), addTrigram(), addUnigram(), loadVocabulary(), and predict().
std::map<int,std::string> ARPAPredictor::vocabDecode [private] |
Definition at line 139 of file ARPAPredictor.h.
Referenced by computeTrigramBackoff(), loadVocabulary(), and predict().
const char * ARPAPredictor::VOCABFILENAME = "Presage.Predictors.ARPAPredictor.VOCABFILENAME" [static, private] |
Definition at line 131 of file ARPAPredictor.h.
Referenced by ARPAPredictor().
std::string ARPAPredictor::vocabFilename [private] |
Definition at line 135 of file ARPAPredictor.h.
Referenced by loadVocabulary(), and set_vocab_filename().