presage  0.8.7
tokenizer.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                 **********(*)*/
00023 
00024 
00025 #include "tokenizer.h"
00026 
00027 Tokenizer::Tokenizer(
00028     std::istream& is,
00029     const std::string   blankspaces,
00030     const std::string   separators
00031 )
00032     : stream(is),
00033       lowercase(false)
00034 {
00035     // this should be changed to deal with a !good() stream
00036     // appropriately
00037     //assert(stream.good());
00038 
00039     offset = stream.tellg();
00040     sstate = stream.rdstate();
00041 
00042     StreamGuard(stream, offset);
00043 
00044     stream.seekg(0, std::ios::end);
00045     offend = stream.tellg();
00046     stream.seekg(0, std::ios::beg);
00047     offbeg = stream.tellg();
00048 
00049     blankspaceChars(blankspaces);
00050     separatorChars (separators );
00051 }
00052 
00053 Tokenizer::~Tokenizer()
00054 {
00055     // reset stream state to enable repeatability
00056     // (see reverseTokenizerTest::testRepeatability())
00057     stream.setstate(sstate);
00058     stream.clear();
00059 }
00060 
00061 void Tokenizer::blankspaceChars(const std::string chars)
00062 {
00063     blankspaces = chars;
00064 }
00065 
00066 std::string Tokenizer::blankspaceChars() const
00067 {
00068     return blankspaces;
00069 }
00070 
00071 void Tokenizer::separatorChars(const std::string chars)
00072 {
00073     separators = chars;
00074 }
00075 
00076 std::string Tokenizer::separatorChars() const
00077 {
00078     return separators;
00079 }
00080 
00081 void Tokenizer::lowercaseMode(const bool value)
00082 {
00083     lowercase = value;
00084 }
00085 
00086 bool Tokenizer::lowercaseMode() const
00087 {
00088     return lowercase;
00089 }
00090 
00091 bool Tokenizer::isBlankspace(const int character) const
00092 {
00093     std::string::size_type ret = blankspaces.find(character);
00094     if (ret == std::string::npos) {
00095         return false;
00096     } else {
00097         return true;
00098     }
00099 }
00100 
00101 bool Tokenizer::isSeparator(const int character) const
00102 {
00103     std::string::size_type ret = separators.find(character);
00104     if (ret == std::string::npos) {
00105         return false;
00106     } else {
00107         return true;
00108     }
00109 }