presage
0.8.7
|
00001 00002 /****************************************************** 00003 * Presage, an extensible predictive text entry system 00004 * --------------------------------------------------- 00005 * 00006 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk> 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License along 00019 with this program; if not, write to the Free Software Foundation, Inc., 00020 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00021 * 00022 **********(*)*/ 00023 00024 00025 #include "databaseConnector.h" 00026 00027 #include <sstream> 00028 #include <stdlib.h> 00029 #include <assert.h> 00030 00031 DatabaseConnector::DatabaseConnector() 00032 : logger("DatabaseConnector", std::cerr) 00033 {} 00034 00035 DatabaseConnector::DatabaseConnector(const std::string& log_level) 00036 : logger("DatabaseConnector", std::cerr, log_level) 00037 {} 00038 00039 DatabaseConnector::~DatabaseConnector() 00040 {} 00041 00042 void DatabaseConnector::createNgramTable(const int n) const 00043 { 00044 if (n > 0) { 00045 std::stringstream query; 00046 std::stringstream unique; 00047 query << "CREATE TABLE"; 00048 // This #ifdef does not belong here, but unfortunately SQLite 2.x does 00049 // not support the IF NOT EXISTS SQL clause. 00050 #ifndef HAVE_SQLITE_H 00051 query << " IF NOT EXISTS"; 00052 #endif 00053 query << " _" << n << "_gram ("; 00054 for (int i = n - 1; i >= 0; i--) { 00055 if (i != 0) { 00056 unique << "word_" << i << ", "; 00057 query << "word_" << i << " TEXT, "; 00058 } else { 00059 unique << "word"; 00060 query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );"; 00061 } 00062 } 00063 00064 executeSql(query.str()); 00065 } else { 00066 // TODO 00067 // throw exception 00068 } 00069 } 00070 00071 int DatabaseConnector::getUnigramCountsSum() const 00072 { 00073 std::string query = "SELECT SUM(count) FROM _1_gram;"; 00074 00075 NgramTable result = executeSql(query); 00076 00077 logger << DEBUG << "NgramTable:"; 00078 for (size_t i = 0; i < result.size(); i++) { 00079 for (size_t j = 0; j < result[i].size(); j++) { 00080 logger << DEBUG << result[i][j] << '\t'; 00081 } 00082 logger << DEBUG << endl; 00083 } 00084 00085 return extractFirstInteger(result); 00086 } 00087 00088 int DatabaseConnector::getNgramCount(const Ngram ngram) const 00089 { 00090 std::stringstream query; 00091 query << "SELECT count " 00092 << "FROM _" << ngram.size() << "_gram" 00093 << buildWhereClause(ngram) << ";"; 00094 00095 NgramTable result = executeSql(query.str()); 00096 00097 logger << DEBUG << "NgramTable:"; 00098 for (size_t i = 0; i < result.size(); i++) { 00099 for (size_t j = 0; j < result[i].size(); j++) { 00100 logger << DEBUG << result[i][j] << '\t'; 00101 } 00102 logger << DEBUG << endl; 00103 } 00104 00105 return extractFirstInteger(result); 00106 } 00107 00108 NgramTable DatabaseConnector::getNgramLikeTable(const Ngram ngram, int limit) const 00109 { 00110 std::stringstream query; 00111 query << "SELECT " << buildSelectLikeClause(ngram.size()) << " " 00112 << "FROM _" << ngram.size() << "_gram" 00113 << buildWhereLikeClause(ngram) 00114 << " ORDER BY count DESC"; 00115 if (limit < 0) { 00116 query << ";"; 00117 } else { 00118 query << " LIMIT " << limit << ';'; 00119 } 00120 00121 return executeSql(query.str()); 00122 } 00123 00124 NgramTable DatabaseConnector::getNgramLikeTableFiltered(const Ngram ngram, const char** filter, int limit) const 00125 { 00126 std::stringstream query; 00127 query << "SELECT " << buildSelectLikeClause(ngram.size()) << " " 00128 << "FROM _" << ngram.size() << "_gram" 00129 << buildWhereLikeClauseFiltered(ngram,filter) 00130 << " ORDER BY count DESC"; 00131 if (limit < 0) { 00132 query << ";"; 00133 } else { 00134 query << " LIMIT " << limit << ';'; 00135 } 00136 00137 return executeSql(query.str()); 00138 } 00139 00140 int DatabaseConnector::incrementNgramCount(const Ngram ngram) const 00141 { 00142 int count = getNgramCount(ngram); 00143 00144 if (count > 0) { 00145 // the ngram was found in the database 00146 updateNgram(ngram, ++count); 00147 00148 logger << DEBUG << "Updated ngram to " << count << endl; 00149 00150 } else { 00151 // the ngram was not found in the database 00152 count = 1; 00153 insertNgram(ngram, count); 00154 00155 logger << DEBUG << "Inserted ngram" << endl; 00156 00157 } 00158 return count; 00159 } 00160 00161 void DatabaseConnector::removeNgram(const Ngram ngram) const 00162 {} 00163 00164 void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const 00165 { 00166 std::stringstream query; 00167 00168 query << "INSERT INTO _" << ngram.size() << "_gram " 00169 << buildValuesClause(ngram, count) 00170 << ";"; 00171 00172 executeSql(query.str()); 00173 } 00174 00175 void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const 00176 { 00177 std::stringstream query; 00178 00179 query << "UPDATE _" << ngram.size() << "_gram " 00180 << "SET count = " << count 00181 << buildWhereClause(ngram) << ";"; 00182 00183 executeSql(query.str()); 00184 } 00185 00186 std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const 00187 { 00188 std::stringstream where_clause; 00189 where_clause << " WHERE"; 00190 for (size_t i = 0; i < ngram.size(); i++) { 00191 if (i < ngram.size() - 1) { 00192 where_clause << " word_" << ngram.size() - i - 1 << " = '" 00193 << sanitizeString(ngram[i]) << "' AND"; 00194 } else { 00195 where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'"; 00196 } 00197 } 00198 return where_clause.str(); 00199 } 00200 00201 // TODO REVISIT refactor: this is same as buildWhereClause, except for 00202 // "word = " instead of "word LIKE " 00203 std::string DatabaseConnector::buildWhereLikeClause(const Ngram ngram) const 00204 { 00205 std::stringstream where_clause; 00206 where_clause << " WHERE"; 00207 for (size_t i = 0; i < ngram.size(); i++) { 00208 if (i < ngram.size() - 1) { 00209 where_clause << " word_" << ngram.size() - i - 1 << " = '" 00210 << sanitizeString(ngram[i]) << "' AND"; 00211 } else { 00212 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'"; 00213 } 00214 } 00215 return where_clause.str(); 00216 } 00217 00218 std::string DatabaseConnector::buildWhereLikeClauseFiltered(const Ngram ngram, const char** filter) const 00219 { 00220 std::stringstream where_clause; 00221 where_clause << " WHERE"; 00222 for (size_t i = 0; i < ngram.size(); i++) { 00223 if (i < ngram.size() - 1) { 00224 where_clause << " word_" << ngram.size() - i - 1 << " = '" 00225 << sanitizeString(ngram[i]) << "' AND"; 00226 } else { 00227 if(filter == 0) 00228 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'"; 00229 else { 00230 std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]); 00231 where_clause << " ("; 00232 for (int j = 0; filter[j] != 0; j++) { 00233 // for(size_t j=0; j < filter.size()-1; j++) 00234 if (j) { 00235 where_clause << " OR "; 00236 } 00237 where_clause << " word LIKE '" << true_prefix << filter[j] << "%'"; 00238 } 00239 // where_clause << " word LIKE '" << true_prefix <<"%' )"; 00240 where_clause << ')'; 00241 } 00242 } 00243 } 00244 return where_clause.str(); 00245 } 00246 00247 00248 std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const 00249 { 00250 assert(cardinality > 0); 00251 00252 std::stringstream result; 00253 for (int i = cardinality - 1; i >= 0; i--) { 00254 if (i != 0) { 00255 result << "word_" << i << ", "; 00256 } else { 00257 result << "word, count"; 00258 } 00259 } 00260 00261 return result.str(); 00262 } 00263 00264 std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const 00265 { 00266 std::stringstream values_clause; 00267 values_clause << "VALUES("; 00268 for (size_t i = 0; i < ngram.size(); i++) { 00269 if (i < ngram.size() - 1) { 00270 values_clause << "'" << sanitizeString(ngram[i]) << "', "; 00271 } else { 00272 values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")"; 00273 } 00274 } 00275 return values_clause.str(); 00276 } 00277 00278 std::string DatabaseConnector::sanitizeString(const std::string str) const 00279 { 00280 // TODO 00281 // just return the string for the time being 00282 // REVISIT 00283 // TO BE DONE 00284 // TBD 00285 return str; 00286 } 00287 00288 int DatabaseConnector::extractFirstInteger(const NgramTable& table) const 00289 { 00290 // Initialize count to zero and then check that we have at least 00291 // an entry in the table of ngram counts returned by the 00292 // executeSql() method. If so, convert it into an integer and 00293 // return it. 00294 // 00295 // REVISIT: make conversion to integer more robust (strtol ??) 00296 // 00297 int count = 0; 00298 if (table.size() > 0) { 00299 if (table[0].size() > 0) { 00300 count = atoi(table[0][0].c_str()); 00301 } 00302 } 00303 00304 logger << DEBUG << "table: "; 00305 for (size_t i = 0; i < table.size(); i++) { 00306 for (size_t j = 0; j < table[i].size(); j++) { 00307 logger << DEBUG << table[i][j] << '\t'; 00308 } 00309 logger << DEBUG << endl; 00310 } 00311 00312 return (count > 0 ? count : 0); 00313 } 00314 00315 void DatabaseConnector::beginTransaction() const 00316 { 00317 executeSql("BEGIN TRANSACTION;"); 00318 } 00319 00320 void DatabaseConnector::endTransaction() const 00321 { 00322 executeSql("END TRANSACTION;"); 00323 } 00324 00325 void DatabaseConnector::rollbackTransaction() const 00326 { 00327 executeSql("ROLLBACK TRANSACTION;"); 00328 }