1 : /** @file termgenerator.h
2 : * @brief parse free text and generate terms
3 : */
4 : /* Copyright (C) 2007 Olly Betts
5 : *
6 : * This program is free software; you can redistribute it and/or modify
7 : * it under the terms of the GNU General Public License as published by
8 : * the Free Software Foundation; either version 2 of the License, or
9 : * (at your option) any later version.
10 : *
11 : * This program is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License
17 : * along with this program; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : #ifndef XAPIAN_INCLUDED_TERMGENERATOR_H
22 : #define XAPIAN_INCLUDED_TERMGENERATOR_H
23 :
24 : #include <xapian/base.h>
25 : #include <xapian/types.h>
26 : #include <xapian/unicode.h>
27 : #include <xapian/visibility.h>
28 :
29 : #include <string>
30 :
31 : namespace Xapian {
32 :
33 : class Document;
34 : class Stem;
35 : class Stopper;
36 : class WritableDatabase;
37 :
38 : /** Parses a piece of text and generate terms.
39 : *
40 : * This module takes a piece of text and parses it to produce words which are
41 : * then used to generate suitable terms for indexing. The terms generated are
42 : * suitable for use with Query objects produced by the QueryParser class.
43 : */
44 : class XAPIAN_VISIBILITY_DEFAULT TermGenerator {
45 : public:
46 : /// @private @internal Class representing the TermGenerator internals.
47 : class Internal;
48 : /// @private @internal Reference counted internals.
49 : Xapian::Internal::RefCntPtr<Internal> internal;
50 :
51 : /// Copy constructor.
52 : TermGenerator(const TermGenerator & o);
53 :
54 : /// Assignment.
55 : TermGenerator & operator=(const TermGenerator & o);
56 :
57 : /// Default constructor.
58 : TermGenerator();
59 :
60 : /// Destructor.
61 : ~TermGenerator();
62 :
63 : /// Set the Xapian::Stem object to be used for generating stemmed terms.
64 : void set_stemmer(const Xapian::Stem & stemmer);
65 :
66 : /// Set the Xapian::Stopper object to be used for identifying stopwords.
67 : void set_stopper(const Xapian::Stopper *stop = NULL);
68 :
69 : /// Set the current document.
70 : void set_document(const Xapian::Document & doc);
71 :
72 : /// Get the current document.
73 : const Xapian::Document & get_document() const;
74 :
75 : /// Set the database to index spelling data to.
76 : void set_database(const Xapian::WritableDatabase &db);
77 :
78 : /// Flags to OR together and pass to TermGenerator::set_flags().
79 : enum flags {
80 : /// Index data required for spelling correction.
81 : FLAG_SPELLING = 128 // Value matches QueryParser flag.
82 : };
83 :
84 : /** Set flags.
85 : *
86 : * The new value of flags is: (flags & mask) ^ toggle
87 : *
88 : * To just set the flags, pass the new flags in toggle and the
89 : * default value for mask.
90 : *
91 : * @param toggle Flags to XOR.
92 : * @param mask Flags to AND with first.
93 : *
94 : * @return The old flags setting.
95 : */
96 : flags set_flags(flags toggle, flags mask = flags(0));
97 :
98 : /** Index some text.
99 : *
100 : * @param weight The wdf increment (default 1).
101 : * @param prefix The term prefix to use (default is no prefix).
102 : */
103 : void index_text(const Xapian::Utf8Iterator & itor,
104 : Xapian::termcount weight = 1,
105 : const std::string & prefix = "");
106 :
107 : /** Index some text in a std::string.
108 : *
109 : * @param weight The wdf increment (default 1).
110 : * @param prefix The term prefix to use (default is no prefix).
111 : */
112 : void index_text(const std::string & text,
113 : Xapian::termcount weight = 1,
114 : const std::string & prefix = "") {
115 : return index_text(Utf8Iterator(text), weight, prefix);
116 : }
117 :
118 : /** Index some text without positional information.
119 : *
120 : * Just like index_text, but no positional information is generated. This
121 : * means that the database will be significantly smaller, but that phrase
122 : * searching and NEAR won't be supported.
123 : */
124 : void index_text_without_positions(const Xapian::Utf8Iterator & itor,
125 : Xapian::termcount weight = 1,
126 : const std::string & prefix = "");
127 :
128 : /** Index some text in a std::string without positional information.
129 : *
130 : * Just like index_text, but no positional information is generated. This
131 : * means that the database will be significantly smaller, but that phrase
132 : * searching and NEAR won't be supported.
133 : */
134 : void index_text_without_positions(const std::string & text,
135 : Xapian::termcount weight = 1,
136 5208 : const std::string & prefix = "") {
137 5208 : return index_text_without_positions(Utf8Iterator(text), weight, prefix);
138 : }
139 :
140 : /** Increase the termpos used by index_text by @a delta.
141 : *
142 : * This can be used to prevent phrase searches from spanning two
143 : * unconnected blocks of text (e.g. the title and body text).
144 : */
145 : void increase_termpos(Xapian::termcount delta = 100);
146 :
147 : /// Get the current term position.
148 : Xapian::termcount get_termpos() const;
149 :
150 : /// Set the current term position.
151 : void set_termpos(Xapian::termcount termpos);
152 :
153 : /// Return a string describing this object.
154 : std::string get_description() const;
155 : };
156 :
157 : }
158 :
159 : #endif // XAPIAN_INCLUDED_TERMGENERATOR_H
|