presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #ifndef PRESAGE_TOKENIZER 00026 #define PRESAGE_TOKENIZER 00027 00028 #ifdef HAVE_CONFIG_H 00029 #include "config.h" 00030 #endif 00031 00032 #include <iostream> 00033 #include <istream> 00034 #include <string> 00035 #include <assert.h> 00036 00064 class Tokenizer { 00065 public: 00066 Tokenizer(std::istream& stream, 00067 const std::string blankspaces, 00068 const std::string separators ); 00069 virtual ~Tokenizer(); 00070 00073 virtual int countTokens() = 0; 00074 00077 virtual bool hasMoreTokens() const = 0; 00078 00081 virtual std::string nextToken() = 0; 00082 00085 virtual double progress() const = 0; 00086 00087 00090 void blankspaceChars(const std::string); 00093 std::string blankspaceChars() const; 00094 00097 void separatorChars(const std::string); 00100 std::string separatorChars() const; 00101 00104 void lowercaseMode(const bool); 00107 bool lowercaseMode() const; 00108 00109 std::string streamToString() const { 00110 std::streamoff offbackup = stream.tellg(); 00111 std::string str; 00112 std::streamoff curroff = offbeg; 00113 stream.seekg(curroff); 00114 while (curroff < offend) { 00115 stream.clear(); 00116 str.push_back(stream.peek()); 00117 curroff++; 00118 stream.seekg(curroff); 00119 } 00120 stream.seekg(offbackup); 00121 return str; 00122 } 00123 00124 protected: 00125 class StreamGuard { 00126 public: 00127 StreamGuard(std::istream& so, std::streamoff& of) 00128 : guardedStream(so) { 00129 currstate = guardedStream.rdstate(); 00130 curroff = guardedStream.tellg(); 00131 guardedStream.seekg (of ); 00132 } 00133 ~StreamGuard() { 00134 guardedStream.seekg (curroff ); 00135 guardedStream.setstate(currstate); 00136 } 00137 00138 private: 00139 std::istream& guardedStream; 00140 std::ios::iostate currstate; 00141 std::streamoff curroff; 00142 }; 00143 00144 std::istream& stream; 00145 std::ios::iostate sstate; 00146 std::streamoff offbeg; 00147 std::streamoff offend; 00148 std::streamoff offset; 00149 00150 bool isBlankspace(const int character) const; 00151 bool isSeparator (const int character) const; 00152 00153 private: 00154 std::string blankspaces; 00155 std::string separators; 00156 00157 bool lowercase; 00158 }; 00159 00160 #endif // PRESAGE_TOKENIZER