presage  0.8.7
forwardTokenizer.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                 **********(*)*/
00023 
00024 
00025 #include "forwardTokenizer.h"
00026 
00027 ForwardTokenizer::ForwardTokenizer(std::istream& stream,
00028                                    const std::string blankspaces,
00029                                    const std::string separators)
00030     : Tokenizer(stream, blankspaces, separators)
00031 {
00032     //std::cerr << "ForwardTokenizer::ForwardTokenizer()" << std::endl;
00033     offset = offbeg;
00034 }
00035 
00036 ForwardTokenizer::~ForwardTokenizer()
00037 {}
00038 
00039 int ForwardTokenizer::countTokens()
00040 {
00041     StreamGuard guard(stream, offset);
00042 
00043     // store current seek pointer position
00044     std::streamoff curroff = offset;
00045 
00046     // position get pointer at beginning of stream
00047     offset = offbeg;
00048 
00049     int count = 0;
00050     while (hasMoreTokens()) {
00051         count++;
00052         nextToken();
00053     }
00054 
00055     // reposition seek get pointer to original position
00056     offset = curroff;
00057     
00058     return count;
00059 }
00060 
00061 bool ForwardTokenizer::hasMoreTokens() const
00062 {
00063     //StreamGuard guard(stream, offset);
00064 
00065     if (offset >= offend) {
00066         return false;
00067     } else {
00068         return true;
00069     }
00070 }
00071     
00072 std::string ForwardTokenizer::nextToken()
00073 {
00074     StreamGuard guard(stream, offset);
00075 
00076     int current;
00077     std::string str;
00078 
00079     if (stream.good()) { // good() if bad,fail and eof bit are not set
00080         current = stream.peek();
00081         if (offset < offend) {
00082             
00083             while (isBlankspace(current)
00084                    || isSeparator(current)) {
00085                 offset++;
00086                 stream.seekg(offset);
00087                 current = stream.peek();
00088             }
00089 
00090             while (!isBlankspace(current)
00091                    && !isSeparator(current)
00092                    && offset < offend) {
00093 
00094                 //std::cerr << "[DEBUG] read: "
00095                 //        << static_cast<char>(current)
00096                 //        << std::endl;
00097                 
00098                 if( lowercaseMode() ) {
00099                     current = tolower( current );
00100                 }
00101 
00102                 str.push_back(current);
00103                 
00104                 //std::cerr << "[DEBUG] pushed: "
00105                 //          << static_cast<char>(current)
00106                 //          << std::endl;
00107 
00108                 offset++;
00109                 stream.seekg(offset);
00110                 current = stream.peek();
00111             }
00112         }
00113                 
00114 //      do {
00115 //          do {
00116 //              current = stream.peek();
00117 //              offset++;
00118 //              stream.seekg(offset);
00119 //
00120 //              //std::cerr << "[DEBUG] read: "
00121 //              //        << static_cast<char>(current)
00122 //              //        << std::endl;
00123 //              
00124 //              if (   !isBlankspace(current)
00125 //                  && !isSeparator(current)
00126 //                  && offset <= offend) {
00127 //                                      
00128 //                  if( lowercaseMode() ) {
00129 //                      current = tolower( current );
00130 //                  }
00131 //
00132 //                  str.push_back(current);
00133 //
00134 //                  //std::cerr << "[DEBUG] pushed: "
00135 //                  //          << static_cast<char>(current)
00136 //                  //          << std::endl;
00137 //              }
00138 //          } while (   !isBlankspace(current)
00139 //                      && !isSeparator(current)
00140 //                      && offset < offend);
00141 //      } while (str.empty() && (offset < offend));
00142     } else {
00143         std::cerr << "stream is NOT good!" << std::endl;
00144     }
00145 
00146     //std::cerr << "[DEBUG] token: " << str << std::endl;
00147     
00148     return str;
00149 }
00150 
00151 double ForwardTokenizer::progress() const
00152 {
00153     return static_cast<double>(offset) / offend;
00154 }
00155