presage  0.8.7
databaseConnector.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************
00003  *  Presage, an extensible predictive text entry system
00004  *  ---------------------------------------------------
00005  *
00006  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
00007 
00008     This program is free software; you can redistribute it and/or modify
00009     it under the terms of the GNU General Public License as published by
00010     the Free Software Foundation; either version 2 of the License, or
00011     (at your option) any later version.
00012 
00013     This program is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016     GNU General Public License for more details.
00017 
00018     You should have received a copy of the GNU General Public License along
00019     with this program; if not, write to the Free Software Foundation, Inc.,
00020     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00021                                                                              *
00022                                                                 **********(*)*/
00023 
00024 
00025 #include "databaseConnector.h"
00026 
00027 #include <sstream>
00028 #include <stdlib.h>
00029 #include <assert.h>
00030 
00031 DatabaseConnector::DatabaseConnector()
00032     : logger("DatabaseConnector", std::cerr)
00033 {}
00034 
00035 DatabaseConnector::DatabaseConnector(const std::string& log_level)
00036     : logger("DatabaseConnector", std::cerr, log_level)
00037 {}
00038 
00039 DatabaseConnector::~DatabaseConnector()
00040 {}
00041 
00042 void DatabaseConnector::createNgramTable(const int n) const
00043 {
00044     if (n > 0) {
00045         std::stringstream query;
00046         std::stringstream unique;
00047         query << "CREATE TABLE";
00048 // This #ifdef does not belong here, but unfortunately SQLite 2.x does
00049 // not support the IF NOT EXISTS SQL clause.
00050 #ifndef HAVE_SQLITE_H
00051         query << " IF NOT EXISTS";
00052 #endif
00053         query << " _" << n << "_gram (";
00054         for (int i = n - 1; i >= 0; i--) {
00055             if (i != 0) {
00056                 unique << "word_" << i << ", ";
00057                 query << "word_" << i << " TEXT, ";
00058             } else {
00059                 unique << "word";
00060                 query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );";
00061             }
00062         }
00063 
00064         executeSql(query.str());
00065     } else {
00066         // TODO
00067         // throw exception
00068     }
00069 }
00070 
00071 int DatabaseConnector::getUnigramCountsSum() const
00072 {
00073     std::string query = "SELECT SUM(count) FROM _1_gram;";
00074 
00075     NgramTable result = executeSql(query);
00076 
00077     logger << DEBUG << "NgramTable:";
00078     for (size_t i = 0; i < result.size(); i++) {
00079         for (size_t j = 0; j < result[i].size(); j++) {
00080             logger << DEBUG << result[i][j] << '\t';
00081         }
00082     logger << DEBUG << endl;
00083     }
00084 
00085     return extractFirstInteger(result);
00086 }
00087 
00088 int DatabaseConnector::getNgramCount(const Ngram ngram) const
00089 {
00090     std::stringstream query;
00091     query << "SELECT count "
00092           << "FROM _" << ngram.size() << "_gram"
00093           << buildWhereClause(ngram) << ";";
00094 
00095     NgramTable result = executeSql(query.str());
00096 
00097     logger << DEBUG << "NgramTable:";
00098     for (size_t i = 0; i < result.size(); i++) {
00099         for (size_t j = 0; j < result[i].size(); j++) {
00100             logger << DEBUG << result[i][j] << '\t';
00101         }
00102         logger << DEBUG << endl;
00103     }
00104 
00105     return extractFirstInteger(result);
00106 }
00107 
00108 NgramTable DatabaseConnector::getNgramLikeTable(const Ngram ngram, int limit) const
00109 {
00110     std::stringstream query;
00111     query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
00112           << "FROM _" << ngram.size() << "_gram"
00113           << buildWhereLikeClause(ngram)
00114           << " ORDER BY count DESC";
00115     if (limit < 0) {
00116         query << ";";
00117     } else {
00118         query << " LIMIT " << limit << ';';
00119     }
00120 
00121     return executeSql(query.str());
00122 }
00123 
00124 NgramTable DatabaseConnector::getNgramLikeTableFiltered(const Ngram ngram, const char** filter, int limit) const
00125 {
00126     std::stringstream query;
00127     query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
00128           << "FROM _" << ngram.size() << "_gram"
00129           << buildWhereLikeClauseFiltered(ngram,filter)
00130           << " ORDER BY count DESC";
00131     if (limit < 0) {
00132         query << ";";
00133     } else {
00134         query << " LIMIT " << limit << ';';
00135     }
00136 
00137     return executeSql(query.str());
00138 }
00139 
00140 int DatabaseConnector::incrementNgramCount(const Ngram ngram) const
00141 {
00142     int count = getNgramCount(ngram);
00143 
00144     if (count > 0) {
00145         // the ngram was found in the database
00146         updateNgram(ngram, ++count);
00147 
00148         logger << DEBUG << "Updated ngram to " << count << endl;
00149 
00150     } else {
00151         // the ngram was not found in the database
00152         count = 1;
00153         insertNgram(ngram, count);
00154 
00155         logger << DEBUG << "Inserted ngram" << endl;
00156 
00157     }
00158     return count;
00159 }
00160 
00161 void DatabaseConnector::removeNgram(const Ngram ngram) const
00162 {}
00163 
00164 void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const
00165 {
00166     std::stringstream query;
00167 
00168     query << "INSERT INTO _" << ngram.size() << "_gram "
00169           << buildValuesClause(ngram, count)
00170           << ";";
00171 
00172     executeSql(query.str());
00173 }
00174 
00175 void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const
00176 {
00177     std::stringstream query;
00178 
00179     query << "UPDATE _" << ngram.size() << "_gram "
00180           << "SET count = " << count
00181           << buildWhereClause(ngram) << ";";
00182 
00183     executeSql(query.str());
00184 }
00185 
00186 std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const
00187 {
00188     std::stringstream where_clause;
00189     where_clause << " WHERE";
00190     for (size_t i = 0; i < ngram.size(); i++) {
00191         if (i < ngram.size() - 1) {
00192             where_clause << " word_" << ngram.size() - i - 1 << " = '"
00193                          << sanitizeString(ngram[i]) << "' AND";
00194         } else {
00195             where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'";
00196         }
00197     }
00198     return where_clause.str();
00199 }
00200 
00201 // TODO REVISIT refactor: this is same as buildWhereClause, except for
00202 //                        "word = " instead of "word LIKE "
00203 std::string DatabaseConnector::buildWhereLikeClause(const Ngram ngram) const
00204 {
00205     std::stringstream where_clause;
00206     where_clause << " WHERE";
00207     for (size_t i = 0; i < ngram.size(); i++) {
00208         if (i < ngram.size() - 1) {
00209             where_clause << " word_" << ngram.size() - i - 1 << " = '"
00210                          << sanitizeString(ngram[i]) << "' AND";
00211         } else {
00212             where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
00213         }
00214     }
00215     return where_clause.str();
00216 }
00217 
00218 std::string DatabaseConnector::buildWhereLikeClauseFiltered(const Ngram ngram, const char** filter) const
00219 {
00220     std::stringstream where_clause;
00221     where_clause << " WHERE";
00222     for (size_t i = 0; i < ngram.size(); i++) {
00223         if (i < ngram.size() - 1) {
00224             where_clause << " word_" << ngram.size() - i - 1 << " = '"
00225                          << sanitizeString(ngram[i]) << "' AND";
00226         } else {
00227             if(filter == 0)
00228                 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
00229             else {
00230                 std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]);
00231                 where_clause << " (";
00232                 for (int j = 0; filter[j] != 0; j++) {
00233 //              for(size_t j=0; j < filter.size()-1; j++)
00234                     if (j) {
00235                         where_clause << " OR ";
00236                     }
00237                     where_clause << " word LIKE '" << true_prefix << filter[j] << "%'";
00238                 }
00239 //              where_clause << " word LIKE '" << true_prefix <<"%' )";
00240                 where_clause << ')';
00241             }
00242         }
00243     }
00244     return where_clause.str();
00245 }
00246 
00247 
00248 std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const
00249 {
00250     assert(cardinality > 0);
00251 
00252     std::stringstream result;
00253     for (int i = cardinality - 1; i >= 0; i--) {
00254         if (i != 0) {
00255             result << "word_" << i << ", ";
00256         } else {
00257             result << "word, count";
00258         }
00259     }
00260 
00261     return result.str();
00262 }
00263 
00264 std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const
00265 {
00266     std::stringstream values_clause;
00267     values_clause << "VALUES(";
00268     for (size_t i = 0; i < ngram.size(); i++) {
00269         if (i < ngram.size() - 1) {
00270             values_clause << "'" << sanitizeString(ngram[i]) << "', ";
00271         } else {
00272             values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")";
00273         }
00274     }
00275     return values_clause.str();
00276 }
00277 
00278 std::string DatabaseConnector::sanitizeString(const std::string str) const
00279 {
00280     // TODO
00281     // just return the string for the time being
00282     // REVISIT
00283     // TO BE DONE
00284     // TBD
00285     return str;
00286 }
00287 
00288 int DatabaseConnector::extractFirstInteger(const NgramTable& table) const
00289 {
00290     // Initialize count to zero and then check that we have at least
00291     // an entry in the table of ngram counts returned by the
00292     // executeSql() method. If so, convert it into an integer and
00293     // return it.
00294     //
00295     // REVISIT: make conversion to integer more robust (strtol ??)
00296     //
00297     int count = 0;
00298     if (table.size() > 0) {
00299         if (table[0].size() > 0) {
00300             count = atoi(table[0][0].c_str());
00301         }
00302     }
00303 
00304     logger << DEBUG << "table: ";
00305     for (size_t i = 0; i < table.size(); i++) {
00306         for (size_t j = 0; j < table[i].size(); j++) {
00307             logger << DEBUG << table[i][j] << '\t';
00308         }
00309         logger << DEBUG << endl;
00310     }
00311 
00312     return (count > 0 ? count : 0);
00313 }
00314 
00315 void DatabaseConnector::beginTransaction() const
00316 {
00317     executeSql("BEGIN TRANSACTION;");
00318 }
00319 
00320 void DatabaseConnector::endTransaction() const
00321 {
00322     executeSql("END TRANSACTION;");
00323 }
00324 
00325 void DatabaseConnector::rollbackTransaction() const
00326 {
00327     executeSql("ROLLBACK TRANSACTION;");
00328 }