presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "tokenizer.h" 00026 00027 Tokenizer::Tokenizer( 00028 std::istream& is, 00029 const std::string blankspaces, 00030 const std::string separators 00031 ) 00032 : stream(is), 00033 lowercase(false) 00034 { 00035 // this should be changed to deal with a !good() stream 00036 // appropriately 00037 //assert(stream.good()); 00038 00039 offset = stream.tellg(); 00040 sstate = stream.rdstate(); 00041 00042 StreamGuard(stream, offset); 00043 00044 stream.seekg(0, std::ios::end); 00045 offend = stream.tellg(); 00046 stream.seekg(0, std::ios::beg); 00047 offbeg = stream.tellg(); 00048 00049 blankspaceChars(blankspaces); 00050 separatorChars (separators ); 00051 } 00052 00053 Tokenizer::~Tokenizer() 00054 { 00055 // reset stream state to enable repeatability 00056 // (see reverseTokenizerTest::testRepeatability()) 00057 stream.setstate(sstate); 00058 stream.clear(); 00059 } 00060 00061 void Tokenizer::blankspaceChars(const std::string chars) 00062 { 00063 blankspaces = chars; 00064 } 00065 00066 std::string Tokenizer::blankspaceChars() const 00067 { 00068 return blankspaces; 00069 } 00070 00071 void Tokenizer::separatorChars(const std::string chars) 00072 { 00073 separators = chars; 00074 } 00075 00076 std::string Tokenizer::separatorChars() const 00077 { 00078 return separators; 00079 } 00080 00081 void Tokenizer::lowercaseMode(const bool value) 00082 { 00083 lowercase = value; 00084 } 00085 00086 bool Tokenizer::lowercaseMode() const 00087 { 00088 return lowercase; 00089 } 00090 00091 bool Tokenizer::isBlankspace(const int character) const 00092 { 00093 std::string::size_type ret = blankspaces.find(character); 00094 if (ret == std::string::npos) { 00095 return false; 00096 } else { 00097 return true; 00098 } 00099 } 00100 00101 bool Tokenizer::isSeparator(const int character) const 00102 { 00103 std::string::size_type ret = separators.find(character); 00104 if (ret == std::string::npos) { 00105 return false; 00106 } else { 00107 return true; 00108 } 00109 }