presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "reverseTokenizer.h" 00026 00027 ReverseTokenizer::ReverseTokenizer(std::istream& stream, 00028 const std::string blanks, 00029 const std::string separs) 00030 : Tokenizer(stream, blanks, separs) 00031 { 00032 offset = offend; 00033 //assert( stream.good()); 00034 //assert(!stream.fail()); 00035 //assert(!stream.bad() ); 00036 //assert(!stream.eof() ); 00037 // stream clearing needed because offset is positioned at end 00038 stream.clear(); 00039 00040 //std::cerr << "ReverseTokenizer::ReverseTokenizer() offbeg: " << offbeg 00041 // << " offset: " << offset << " offend: " << offend << std::endl; 00042 } 00043 00044 ReverseTokenizer::~ReverseTokenizer() 00045 {} 00046 00047 int ReverseTokenizer::countTokens() 00048 { 00049 StreamGuard guard(stream, offset); 00050 00051 // store current seek pointer position 00052 std::streamoff curroff = offset; 00053 00054 // position get pointer at end of stream 00055 offset = offend; 00056 00057 int count = 0; 00058 while (hasMoreTokens()) { 00059 nextToken(); 00060 count++; 00061 } 00062 00063 // reposition seek get pointer to original position 00064 offset = curroff; 00065 00066 return count; 00067 } 00068 00069 bool ReverseTokenizer::hasMoreTokens() const 00070 { 00071 //std::cerr << "ReverseTokenizer::hasMoreTokens() offbeg: " << offbeg 00072 // << " offset: " << offset << " offend: " << offend << std::endl; 00073 if (offbeg < offset) { 00074 return true; 00075 } else { 00076 return false; 00077 } 00078 } 00079 00080 std::string ReverseTokenizer::nextToken() 00081 { 00082 StreamGuard guard(stream, offset); 00083 00084 int current; 00085 std::string str; 00086 00087 if (stream.good()) { 00088 while (offbeg < offset 00089 && str.empty()) { 00090 stream.seekg(offset - 1); 00091 current = stream.peek(); 00092 00093 if (offset == offend && 00094 (isSeparator(current) || isBlankspace(current))) { 00095 offset--; 00096 return str; 00097 } 00098 00099 while ((isBlankspace(current) || isSeparator(current)) 00100 && offbeg < offset ) { 00101 offset--; 00102 stream.seekg(offset - 1); 00103 current = stream.peek(); 00104 } 00105 00106 while (!isBlankspace(current) 00107 && !isSeparator(current) 00108 && offbeg < offset) { 00109 00110 if( lowercaseMode() ) { 00111 current = tolower( current ); 00112 } 00113 00114 // since the token is read backwards, the string 00115 // needs to be reversed by inserting the char at 00116 // the front 00117 str.insert(str.begin(), current); 00118 00119 offset--; 00120 stream.seekg(offset - 1); 00121 current = stream.peek(); 00122 } 00123 } 00124 } 00125 00126 // if (stream.good()) { 00127 // do { 00128 // do { 00129 // current = stream.peek(); 00130 // offset--; 00131 // stream.seekg(offset); 00132 // 00133 // // handle case where last character is a separator by 00134 // // returning an empty token 00135 // if (offset == offend - 2 00136 // && isSeparator(current)) { 00137 // return ""; 00138 // } 00139 // 00140 // //std::cerr << "[DEBUG] read: " 00141 // // << static_cast<char>(current) 00142 // // << std::endl; 00143 // 00144 // if (!isBlankspace(current) 00145 // && !isSeparator(current) 00146 // && offset >= offbeg - 1) { 00147 // 00148 // if( lowercaseMode() ) { 00149 // current = tolower( current ); 00150 // } 00151 // 00152 // // since the token is read backwards, the string 00153 // // needs to be reversed by inserting the char at 00154 // // the front 00155 // str.insert(str.begin(), current); 00156 // 00157 // //std::cerr << "[DEBUG] pushed: " 00158 // // << static_cast<char>(current) 00159 // // << std::endl; 00160 // //std::cerr << "[DEBUG] partial string: " 00161 // // << str << std::endl; 00162 // } 00163 // } while (!isBlankspace(current) 00164 // && !isSeparator(current) 00165 // && (offset >= offbeg)); 00166 // } while (str.empty() && (offset >= offbeg)); 00167 // } 00168 00169 //std::cerr << "[DEBUG] token: " << str << std::endl; 00170 00171 return str; 00172 } 00173 00174 double ReverseTokenizer::progress() const 00175 { 00176 return static_cast<double>(offend - offset) / (offend - offbeg); 00177 }