Branch data Line data Source code
1 : : /** @file glass_postlist.h
2 : : * @brief Postlists in glass databases
3 : : */
4 : : /* Copyright 1999,2000,2001 BrightStation PLC
5 : : * Copyright 2002 Ananova Ltd
6 : : * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2013,2014,2015,2017 Olly Betts
7 : : * Copyright 2007,2009 Lemur Consulting Ltd
8 : : *
9 : : * This program is free software; you can redistribute it and/or
10 : : * modify it under the terms of the GNU General Public License as
11 : : * published by the Free Software Foundation; either version 2 of the
12 : : * License, or (at your option) any later version.
13 : : *
14 : : * This program is distributed in the hope that it will be useful,
15 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : : * GNU General Public License for more details.
18 : : *
19 : : * You should have received a copy of the GNU General Public License
20 : : * along with this program; if not, write to the Free Software
21 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 : : * USA
23 : : */
24 : :
25 : : #ifndef XAPIAN_INCLUDED_GLASS_POSTLIST_H
26 : : #define XAPIAN_INCLUDED_GLASS_POSTLIST_H
27 : :
28 : : #include <xapian/database.h>
29 : :
30 : : #include "glass_defs.h"
31 : : #include "glass_inverter.h"
32 : : #include "glass_positionlist.h"
33 : : #include "api/leafpostlist.h"
34 : : #include "omassert.h"
35 : :
36 : : #include <memory>
37 : : #include <map>
38 : : #include <string>
39 : :
40 : : using namespace std;
41 : :
42 : : class GlassCursor;
43 : : class GlassDatabase;
44 : :
45 : : namespace Glass {
46 : : class PostlistChunkReader;
47 : : class PostlistChunkWriter;
48 : : class RootInfo;
49 : : }
50 : :
51 : : using Glass::RootInfo;
52 : :
53 : : class GlassPostList;
54 : :
55 : 7802 : class GlassPostListTable : public GlassTable {
56 : : /// PostList for looking up document lengths.
57 : : mutable unique_ptr<GlassPostList> doclen_pl;
58 : :
59 : : public:
60 : : /** Create a new table object.
61 : : *
62 : : * This does not create the table on disk - the create() method must
63 : : * be called before the table is created on disk
64 : : *
65 : : * This also does not open the table - the open() method must be
66 : : * called before use is made of the table.
67 : : *
68 : : * @param path_ - Path at which the table is stored.
69 : : * @param readonly_ - whether to open the table for read only
70 : : * access.
71 : : */
72 : 3599 : GlassPostListTable(const string & path_, bool readonly_)
73 : 7198 : : GlassTable("postlist", path_ + "/postlist.", readonly_),
74 [ + - ]: 7198 : doclen_pl()
75 : 3599 : { }
76 : :
77 : 305 : GlassPostListTable(int fd, off_t offset_, bool readonly_)
78 : : : GlassTable("postlist", fd, offset_, readonly_),
79 : 305 : doclen_pl()
80 : 305 : { }
81 : :
82 : 2990 : void open(int flags_, const RootInfo & root_info,
83 : : glass_revision_number_t rev) {
84 : 2990 : doclen_pl.reset(0);
85 : 2990 : GlassTable::open(flags_, root_info, rev);
86 : 2990 : }
87 : :
88 : : /// Merge changes for a term.
89 : : void merge_changes(const string &term, const Inverter::PostingChanges & changes);
90 : :
91 : : /// Merge document length changes.
92 : : void merge_doclen_changes(const map<Xapian::docid, Xapian::termcount> & doclens);
93 : :
94 : : Xapian::docid get_chunk(const string &tname,
95 : : Xapian::docid did, bool adding,
96 : : Glass::PostlistChunkReader ** from,
97 : : Glass::PostlistChunkWriter **to);
98 : :
99 : : /// Compose a key from a termname and docid.
100 : 482482 : static string make_key(const string & term, Xapian::docid did) {
101 : 482482 : return pack_glass_postlist_key(term, did);
102 : : }
103 : :
104 : : /// Compose a key from a termname.
105 : 1753068 : static string make_key(const string & term) {
106 : 1753068 : return pack_glass_postlist_key(term);
107 : : }
108 : :
109 : 2491 : bool term_exists(const string & term) const {
110 [ + + ]: 2491 : return key_exists(make_key(term));
111 : : }
112 : :
113 : : /** Returns frequencies for a term.
114 : : *
115 : : * @param term The term to get frequencies for
116 : : * @param termfreq_ptr Point to return number of docs indexed by @a
117 : : * term (or NULL not to return)
118 : : * @param collfreq_ptr Point to return number of occurrences of @a
119 : : * term in the database (or NULL not to return)
120 : : * @param wdfub_ptr Point to return an upper bound on the wdf
121 : : * of @a term in the database (or NULL not to
122 : : * return)
123 : : */
124 : : void get_freqs(const std::string & term,
125 : : Xapian::doccount * termfreq_ptr,
126 : : Xapian::termcount * collfreq_ptr,
127 : : Xapian::termcount * wdfub_ptr = NULL) const;
128 : :
129 : : /** Returns the length of document @a did. */
130 : : Xapian::termcount get_doclength(Xapian::docid did,
131 : : Xapian::Internal::intrusive_ptr<const GlassDatabase> db) const;
132 : :
133 : : /** Check if document @a did exists. */
134 : : bool document_exists(Xapian::docid did,
135 : : Xapian::Internal::intrusive_ptr<const GlassDatabase> db) const;
136 : :
137 : : void get_used_docid_range(Xapian::docid & first,
138 : : Xapian::docid & last) const;
139 : : };
140 : :
141 : : /** A postlist in a glass database.
142 : : */
143 : : class GlassPostList : public LeafPostList {
144 : : /** The database we are searching. This pointer is held so that the
145 : : * database doesn't get deleted before us, and also to give us access
146 : : * to the position_table.
147 : : */
148 : : Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db;
149 : :
150 : : /// The position list object for this posting list.
151 : : GlassRePositionList* positionlist = NULL;
152 : :
153 : : /// Whether we've started reading the list yet.
154 : : bool have_started;
155 : :
156 : : /// True if this is the last chunk.
157 : : bool is_last_chunk;
158 : :
159 : : /// Whether we've run off the end of the list yet.
160 : : bool is_at_end;
161 : :
162 : : /// Cursor pointing to current chunk of postlist.
163 : : unique_ptr<GlassCursor> cursor;
164 : :
165 : : /// The first document id in this chunk.
166 : : Xapian::docid first_did_in_chunk;
167 : :
168 : : /// The last document id in this chunk.
169 : : Xapian::docid last_did_in_chunk;
170 : :
171 : : /// Position of iteration through current chunk.
172 : : const char * pos;
173 : :
174 : : /// Pointer to byte after end of current chunk.
175 : : const char * end;
176 : :
177 : : /// Document id we're currently at.
178 : : Xapian::docid did;
179 : :
180 : : /// The wdf of the current document.
181 : : Xapian::termcount wdf;
182 : :
183 : : /// The number of entries in the posting list.
184 : : Xapian::doccount number_of_entries;
185 : :
186 : : /// Copying is not allowed.
187 : : GlassPostList(const GlassPostList &);
188 : :
189 : : /// Assignment is not allowed.
190 : : void operator=(const GlassPostList &);
191 : :
192 : : /** Move to the next item in the chunk, if possible.
193 : : * If already at the end of the chunk, returns false.
194 : : */
195 : : bool next_in_chunk();
196 : :
197 : : /** Move to the next chunk.
198 : : *
199 : : * If there are no more chunks in this postlist, this will set
200 : : * is_at_end to true.
201 : : */
202 : : void next_chunk();
203 : :
204 : : /** Return true if the given document ID lies in the range covered
205 : : * by the current chunk. This does not say whether the document ID
206 : : * is actually present. It will return false if the document ID
207 : : * is greater than the last document ID in the chunk, even if it is
208 : : * less than the first document ID in the next chunk: it is possible
209 : : * for no chunk to contain a particular document ID.
210 : : */
211 : : bool current_chunk_contains(Xapian::docid desired_did);
212 : :
213 : : /** Move to chunk containing the specified document ID.
214 : : *
215 : : * This moves to the chunk whose starting document ID is
216 : : * <= desired_did, but such that the next chunk's starting
217 : : * document ID is > desired_did.
218 : : *
219 : : * It is thus possible that current_chunk_contains(desired_did)
220 : : * will return false after this call, since the document ID
221 : : * might lie after the end of this chunk, but before the start
222 : : * of the next chunk.
223 : : */
224 : : void move_to_chunk_containing(Xapian::docid desired_did);
225 : :
226 : : /** Scan forward in the current chunk for the specified document ID.
227 : : *
228 : : * This is particularly efficient if the desired document ID is
229 : : * greater than the last in the chunk - it then skips straight
230 : : * to the end.
231 : : *
232 : : * @return true if we moved to a valid document,
233 : : * false if we reached the end of the chunk.
234 : : */
235 : : bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
236 : :
237 : : GlassPostList(Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db_,
238 : : const string & term,
239 : : GlassCursor * cursor_);
240 : :
241 : : void init();
242 : :
243 : : public:
244 : : /// Default constructor.
245 : : GlassPostList(Xapian::Internal::intrusive_ptr<const GlassDatabase> this_db_,
246 : : const string & term,
247 : : bool keep_reference);
248 : :
249 : : /// Destructor.
250 : : ~GlassPostList();
251 : :
252 : : LeafPostList * open_nearby_postlist(const std::string & term_,
253 : : bool need_read_pos) const;
254 : :
255 : : /** Used for looking up doclens.
256 : : *
257 : : * @return true if docid @a desired_did has a document length.
258 : : */
259 : : bool jump_to(Xapian::docid desired_did);
260 : :
261 : : /** Returns number of docs indexed by this term.
262 : : *
263 : : * This is the length of the postlist.
264 : : */
265 : 1645286 : Xapian::doccount get_termfreq() const { return number_of_entries; }
266 : :
267 : : /// Returns the current docid.
268 : 66690082 : Xapian::docid get_docid() const { Assert(have_started); return did; }
269 : :
270 : : /** Returns the Within Document Frequency of the term in the current
271 : : * document.
272 : : */
273 : 115155358 : Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
274 : :
275 : : /** Get the list of positions of the term in the current document.
276 : : */
277 : : PositionList *read_position_list();
278 : :
279 : : /** Get the list of positions of the term in the current document.
280 : : */
281 : : PositionList * open_position_list() const;
282 : :
283 : : /// Move to the next document.
284 : : PostList * next(double w_min);
285 : :
286 : : /// Skip to next document with docid >= docid.
287 : : PostList * skip_to(Xapian::docid desired_did, double w_min);
288 : :
289 : : /// Return true if and only if we're off the end of the list.
290 : 57972000 : bool at_end() const { return is_at_end; }
291 : :
292 : : /// Get a description of the document.
293 : : std::string get_description() const;
294 : :
295 : : /// Read the number of entries and the collection frequency.
296 : : static void read_number_of_entries(const char ** posptr,
297 : : const char * end,
298 : : Xapian::doccount * number_of_entries_ptr,
299 : : Xapian::termcount * collection_freq_ptr);
300 : : };
301 : :
302 : : #ifdef DISABLE_GPL_LIBXAPIAN
303 : : # error GPL source we cannot relicense included in libxapian
304 : : #endif
305 : :
306 : : #endif /* XAPIAN_INCLUDED_GLASS_POSTLIST_H */
|