Branch data Line data Source code
1 : : /** @file glass_table.h
2 : : * @brief Btree implementation
3 : : */
4 : : /* Copyright 1999,2000,2001 BrightStation PLC
5 : : * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2012,2013,2014,2015,2016 Olly Betts
6 : : * Copyright 2008 Lemur Consulting Ltd
7 : : *
8 : : * This program is free software; you can redistribute it and/or
9 : : * modify it under the terms of the GNU General Public License as
10 : : * published by the Free Software Foundation; either version 2 of the
11 : : * License, or (at your option) any later version.
12 : : *
13 : : * This program is distributed in the hope that it will be useful,
14 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : * GNU General Public License for more details.
17 : : *
18 : : * You should have received a copy of the GNU General Public License
19 : : * along with this program; if not, write to the Free Software
20 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 : : * USA
22 : : */
23 : :
24 : : #ifndef XAPIAN_INCLUDED_GLASS_TABLE_H
25 : : #define XAPIAN_INCLUDED_GLASS_TABLE_H
26 : :
27 : : #include <xapian/constants.h>
28 : : #include <xapian/error.h>
29 : :
30 : : #include "glass_freelist.h"
31 : : #include "glass_cursor.h"
32 : : #include "glass_defs.h"
33 : :
34 : : #include "io_utils.h"
35 : : #include "omassert.h"
36 : : #include "str.h"
37 : : #include "stringutils.h"
38 : : #include "wordaccess.h"
39 : :
40 : : #include "common/compression_stream.h"
41 : :
42 : : #include <algorithm>
43 : : #include <string>
44 : :
45 : : namespace Glass {
46 : :
47 : : /** Even for items of at maximum size, it must be possible to get this number of
48 : : * items in a block */
49 : : const size_t BLOCK_CAPACITY = 4;
50 : :
51 : : /** The largest possible value of a key_len.
52 : : *
53 : : * This gives the upper limit of the size of a key that may be stored in the
54 : : * B-tree.
55 : : */
56 : : #define GLASS_BTREE_MAX_KEY_LEN 255
57 : :
58 : : // FIXME: This named constant probably isn't used everywhere it should be...
59 : : const int BYTES_PER_BLOCK_NUMBER = 4;
60 : :
61 : : /* The B-tree blocks have a number of internal lengths and offsets held in 1, 2
62 : : or 4 bytes. To make the coding a little clearer,
63 : : we use for
64 : : ------ ---
65 : : K1 the 1 byte length of key
66 : : I2 the 2 byte length of an item (key-tag pair)
67 : : D2 the 2 byte offset to the item from the directory
68 : : X2 the 2 byte component counter that ends each key
69 : : */
70 : :
71 : : const int K1 = 1;
72 : : const int I2 = 2;
73 : : const int D2 = 2;
74 : : const int X2 = 2;
75 : :
76 : : /* and when getting or setting them, we use these methods of the various
77 : : * *Item* classes: */
78 : :
79 : : // getD(p, c)
80 : : // setD(p, c, x)
81 : : // getI()
82 : : // setI(x)
83 : : // getX(p, c)
84 : : // setX(p, c, x)
85 : :
86 : : /* if you've been reading the comments from the top, the next four procedures
87 : : will not cause any headaches.
88 : :
89 : : Recall that a leaf item has this form:
90 : :
91 : : i k x
92 : : | | |
93 : : I K key X tag
94 : : ←K→
95 : : <---SIZE---->
96 : : <---I--->
97 : :
98 : : Except that X is omitted for the first component of a tag (there is a flag
99 : : bit in the upper bits of I which indicates these).
100 : :
101 : : item_of(p, c) returns i, the address of the item at block address p,
102 : : directory offset c,
103 : :
104 : : component_of(p, c) returns the number marked 'x' above,
105 : :
106 : : last_component(p, c) returns true if this is a final component.
107 : : */
108 : :
109 : 21754794 : inline uint4 REVISION(const uint8_t * b) { return aligned_read4(b); }
110 : 21843628 : inline int GET_LEVEL(const uint8_t * b) { return b[4]; }
111 : 11766132 : inline int MAX_FREE(const uint8_t * b) { return unaligned_read2(b + 5); }
112 : 13128394 : inline int TOTAL_FREE(const uint8_t * b) { return unaligned_read2(b + 7); }
113 : 79165480 : inline int DIR_END(const uint8_t * b) { return unaligned_read2(b + 9); }
114 : : const int DIR_START = 11;
115 : :
116 : 335332 : inline void SET_REVISION(uint8_t * b, uint4 rev) { aligned_write4(b, rev); }
117 : 27251 : inline void SET_LEVEL(uint8_t * b, int x) { AssertRel(x,<,256); b[4] = x; }
118 : 6356312 : inline void SET_MAX_FREE(uint8_t * b, int x) { unaligned_write2(b + 5, x); }
119 : 7770150 : inline void SET_TOTAL_FREE(uint8_t * b, int x) { unaligned_write2(b + 7, x); }
120 : 5883140 : inline void SET_DIR_END(uint8_t * b, int x) { unaligned_write2(b + 9, x); }
121 : :
122 : : // The item size is stored in 2 bytes, but the top bit is used to store a flag for
123 : : // "is the tag data compressed" and the next two bits are used to flag if this is the
124 : : // first and/or last item for this tag.
125 : : const int I_COMPRESSED_BIT = 0x80;
126 : : const int I_LAST_BIT = 0x40;
127 : : const int I_FIRST_BIT = 0x20;
128 : :
129 : : const int I_MASK = (I_COMPRESSED_BIT|I_LAST_BIT|I_FIRST_BIT);
130 : :
131 : : const int ITEM_SIZE_MASK = (0xffff &~ (I_MASK << 8));
132 : : const size_t MAX_ITEM_SIZE = (ITEM_SIZE_MASK + 3);
133 : :
134 : : /** Freelist blocks have their level set to LEVEL_FREELIST. */
135 : : const int LEVEL_FREELIST = 254;
136 : :
137 : : class RootInfo;
138 : :
139 : : class Key {
140 : : const uint8_t *p;
141 : : public:
142 : 110731364 : explicit Key(const uint8_t * p_) : p(p_) { }
143 : 40360 : const uint8_t * get_address() const { return p; }
144 : 207046644 : const uint8_t * data() const { return p + K1; }
145 : 3970361 : void read(std::string * key) const {
146 : 3970361 : key->assign(reinterpret_cast<const char *>(p + K1), length());
147 : 3970361 : }
148 : 110731364 : int length() const {
149 : 110731364 : return p[0];
150 : : }
151 : 217936 : char operator[](size_t i) const {
152 : : AssertRel(i,<,(size_t)length());
153 : 217936 : return p[i + K1];
154 : : }
155 : : };
156 : :
157 : : // LeafItem_wr wants to be "LeafItem with non-const p and more methods" - we can't
158 : : // achieve that nicely with inheritance, so we use a template base class.
159 : : template<class T> class LeafItem_base {
160 : : protected:
161 : : T p;
162 : 25530820 : int get_key_len() const { return p[I2]; }
163 : 50531444 : static int getD(const uint8_t * q, int c) {
164 : : AssertRel(c, >=, DIR_START);
165 : : AssertRel(c, <, 65535);
166 : : Assert((c & 1) == 1);
167 : 50531444 : return unaligned_read2(q + c);
168 : : }
169 : 44672356 : int getI() const { return unaligned_read2(p); }
170 : 4254342 : static int getX(const uint8_t * q, int c) { return unaligned_read2(q + c); }
171 : : public:
172 : : /* LeafItem from block address and offset to item pointer */
173 : 101062888 : LeafItem_base(T p_, int c) : p(p_ + getD(p_, c)) { }
174 : 22434258 : explicit LeafItem_base(T p_) : p(p_) { }
175 : 16943070 : T get_address() const { return p; }
176 : : /** SIZE in diagram above. */
177 : 22336178 : int size() const {
178 : 22336178 : return (getI() & ITEM_SIZE_MASK) + 3;
179 : : }
180 : 10626448 : bool get_compressed() const { return *p & I_COMPRESSED_BIT; }
181 : 49896462 : bool first_component() const { return *p & I_FIRST_BIT; }
182 : 22386682 : bool last_component() const { return *p & I_LAST_BIT; }
183 : 8322792 : int component_of() const {
184 [ + + ]: 8322792 : if (first_component()) return 1;
185 : 2127171 : return getX(p, get_key_len() + I2 + K1);
186 : : }
187 : 166299936 : Key key() const { return Key(p + I2); }
188 : 9924678 : void append_chunk(std::string * tag) const {
189 : : // Offset to the start of the tag data.
190 : 9924678 : int cd = get_key_len() + I2 + K1;
191 [ + + ]: 9924678 : if (!first_component()) cd += X2;
192 : : // Number of bytes to extract from current component.
193 : 9924678 : int l = size() - cd;
194 : 9924678 : const char * chunk = reinterpret_cast<const char *>(p + cd);
195 : 9924678 : tag->append(chunk, l);
196 : 9924678 : }
197 : 283945 : bool decompress_chunk(CompressionStream& comp_stream, string& tag) const {
198 : : // Offset to the start of the tag data.
199 : 283945 : int cd = get_key_len() + I2 + K1;
200 [ + + ]: 283945 : if (!first_component()) cd += X2;
201 : : // Number of bytes to extract from current component.
202 : 283945 : int l = size() - cd;
203 : 283945 : const char * chunk = reinterpret_cast<const char *>(p + cd);
204 : 283945 : return comp_stream.decompress_chunk(chunk, l, tag);
205 : : }
206 : : };
207 : :
208 : : class LeafItem : public LeafItem_base<const uint8_t *> {
209 : : public:
210 : : /* LeafItem from block address and offset to item pointer */
211 : 50531444 : LeafItem(const uint8_t * p_, int c)
212 : 50531444 : : LeafItem_base<const uint8_t *>(p_, c) { }
213 : 22332876 : explicit LeafItem(const uint8_t * p_)
214 : 22332876 : : LeafItem_base<const uint8_t *>(p_) { }
215 : : };
216 : :
217 : : class LeafItem_wr : public LeafItem_base<uint8_t *> {
218 : 8805891 : void set_key_len(int x) {
219 : : AssertRel(x, >=, 0);
220 : : AssertRel(x, <=, GLASS_BTREE_MAX_KEY_LEN);
221 : 8805891 : p[I2] = x;
222 : 8805891 : }
223 : 7273106 : void setI(int x) { unaligned_write2(p, x); }
224 : 859232 : static void setX(uint8_t * q, int c, int x) { unaligned_write2(q + c, x); }
225 : : public:
226 : : /* LeafItem_wr from block address and offset to item pointer */
227 : : LeafItem_wr(uint8_t * p_, int c) : LeafItem_base<uint8_t *>(p_, c) { }
228 : 202764 : explicit LeafItem_wr(uint8_t * p_) : LeafItem_base<uint8_t *>(p_) { }
229 : 429616 : void set_component_of(int i) {
230 : : AssertRel(i,>,1);
231 : 429616 : *p &=~ I_FIRST_BIT;
232 : 429616 : setX(p, get_key_len() + I2 + K1, i);
233 : 429616 : }
234 : 3636553 : void set_size(int new_size) {
235 : : AssertRel(new_size,>=,3);
236 : 3636553 : int I = new_size - 3;
237 : : // We should never be able to pass too large a size here, but don't
238 : : // corrupt the database if this somehow happens.
239 [ - + ][ # # ]: 3636553 : if (rare(I &~ ITEM_SIZE_MASK)) throw Xapian::DatabaseError("item too large!");
[ # # ][ # # ]
240 : 3636553 : setI(I);
241 : 3636553 : }
242 : 8795918 : void form_key(const std::string & key_) {
243 : 8795918 : std::string::size_type key_len = key_.length();
244 [ + + ]: 8795918 : if (key_len > GLASS_BTREE_MAX_KEY_LEN) {
245 : : // We check term length when a term is added to a document but
246 : : // glass doubles zero bytes, so this can still happen for terms
247 : : // which contain one or more zero bytes.
248 [ + - ]: 10 : std::string msg("Key too long: length was ");
249 [ + - ][ + - ]: 10 : msg += str(key_len);
250 : : msg += " bytes, maximum length of a key is "
251 [ + - ]: 10 : STRINGIZE(GLASS_BTREE_MAX_KEY_LEN) " bytes";
252 [ + - ][ + - ]: 10 : throw Xapian::InvalidArgumentError(msg);
253 : : }
254 : :
255 : 8795908 : set_key_len(key_len);
256 : 8795908 : std::memmove(p + I2 + K1, key_.data(), key_len);
257 : 8795908 : *p |= I_FIRST_BIT;
258 : 8795908 : }
259 : : // FIXME passing cd here is icky
260 : 3626570 : void set_tag(int cd, const char *start, int len, bool compressed, int i, int m) {
261 : 3626570 : std::memmove(p + cd, start, len);
262 : 3626570 : set_size(cd + len);
263 [ + + ]: 3626570 : if (compressed) *p |= I_COMPRESSED_BIT;
264 [ + + ]: 3626570 : if (i == m) *p |= I_LAST_BIT;
265 [ + + ]: 3626570 : if (i == 1) {
266 : 3197321 : *p |= I_FIRST_BIT;
267 : : } else {
268 : 429249 : set_component_of(i);
269 : : }
270 : 3626570 : }
271 : 9983 : void fake_root_item() {
272 : 9983 : set_key_len(0); // null key length
273 : 9983 : set_size(I2 + K1); // length of the item
274 : 9983 : *p |= I_FIRST_BIT|I_LAST_BIT;
275 : 9983 : }
276 : 44665752 : operator const LeafItem() const { return LeafItem(p); }
277 : 7014807 : static void setD(uint8_t * q, int c, int x) {
278 : : AssertRel(c, >=, DIR_START);
279 : : AssertRel(c, <, 65535);
280 : : Assert((c & 1) == 1);
281 : 7014807 : unaligned_write2(q + c, x);
282 : 7014807 : }
283 : : };
284 : :
285 : : /* A branch item has this form:
286 : :
287 : : k x
288 : : | |
289 : : tag K key X
290 : : ←B→ ←K→
291 : : <--SIZE--->
292 : :
293 : : B = BYTES_PER_BLOCK_NUMBER
294 : :
295 : : We can't omit X here, as we've nowhere to store the first and last bit
296 : : flags which we have in leaf items.
297 : : */
298 : :
299 : : // BItem_wr wants to be "BItem with non-const p and more methods" - we can't
300 : : // achieve that nicely with inheritance, so we use a template base class.
301 : : template<class T> class BItem_base {
302 : : protected:
303 : : T p;
304 : 2617514 : int get_key_len() const { return p[BYTES_PER_BLOCK_NUMBER]; }
305 : 40910991 : static int getD(const uint8_t * q, int c) {
306 : : AssertRel(c, >=, DIR_START);
307 : : AssertRel(c, <, 65535);
308 : : Assert((c & 1) == 1);
309 : 40910991 : return unaligned_read2(q + c);
310 : : }
311 : 2499198 : static int getX(const uint8_t * q, int c) { return unaligned_read2(q + c); }
312 : : public:
313 : : /* BItem from block address and offset to item pointer */
314 : 81821982 : BItem_base(T p_, int c) : p(p_ + getD(p_, c)) { }
315 : 41056 : explicit BItem_base(T p_) : p(p_) { }
316 : 68732 : T get_address() const { return p; }
317 : : /** SIZE in diagram above. */
318 : 58721 : int size() const {
319 : 58721 : return get_key_len() + K1 + X2 + BYTES_PER_BLOCK_NUMBER;
320 : : }
321 : 55162792 : Key key() const { return Key(p + BYTES_PER_BLOCK_NUMBER); }
322 : : /** Get this item's tag as a block number (this block should not be at
323 : : * level 0).
324 : : */
325 : 13188275 : uint4 block_given_by() const {
326 : 13188275 : return unaligned_read4(p);
327 : : }
328 : 1249599 : int component_of() const {
329 : 1249599 : return getX(p, get_key_len() + BYTES_PER_BLOCK_NUMBER + K1);
330 : : }
331 : : };
332 : :
333 : : class BItem : public BItem_base<const uint8_t *> {
334 : : public:
335 : : /* BItem from block address and offset to item pointer */
336 : 81574316 : BItem(const uint8_t * p_, int c) : BItem_base<const uint8_t *>(p_, c) { }
337 : 41056 : explicit BItem(const uint8_t * p_) : BItem_base<const uint8_t *>(p_) { }
338 : : };
339 : :
340 : : class BItem_wr : public BItem_base<uint8_t *> {
341 : 20528 : void set_key_len(int x) {
342 : : AssertRel(x, >=, 0);
343 : : AssertRel(x, <, GLASS_BTREE_MAX_KEY_LEN);
344 : 20528 : p[BYTES_PER_BLOCK_NUMBER] = x;
345 : 20528 : }
346 : 41056 : static void setX(uint8_t * q, int c, int x) { unaligned_write2(q + c, x); }
347 : : public:
348 : : /* BItem_wr from block address and offset to item pointer */
349 : 247666 : BItem_wr(uint8_t * p_, int c) : BItem_base<uint8_t *>(p_, c) { }
350 : 41056 : explicit BItem_wr(uint8_t * p_) : BItem_base<uint8_t *>(p_) { }
351 : 437 : void set_component_of(int i) {
352 : 437 : setX(p, get_key_len() + BYTES_PER_BLOCK_NUMBER + K1, i);
353 : 437 : }
354 : 89 : void set_key_and_block(Key newkey, uint4 n) {
355 : 89 : int len = newkey.length() + K1 + X2;
356 : : // Copy the key size, main part of the key and the count part.
357 : 89 : std::memcpy(p + BYTES_PER_BLOCK_NUMBER, newkey.get_address(), len);
358 : : // Set tag contents to block number
359 : 89 : set_block_given_by(n);
360 : 89 : }
361 : : // Takes size as we may be truncating newkey.
362 : 20091 : void set_truncated_key_and_block(Key newkey, int new_comp,
363 : : int truncate_size, uint4 n) {
364 : 20091 : int i = truncate_size;
365 : : AssertRel(i,<=,newkey.length());
366 : : // Key size
367 : 20091 : set_key_len(i);
368 : : // Copy the main part of the key, possibly truncating.
369 : 20091 : std::memcpy(p + BYTES_PER_BLOCK_NUMBER + K1, newkey.get_address() + K1, i);
370 : : // Set the component count.
371 : 20091 : setX(p, BYTES_PER_BLOCK_NUMBER + K1 + i, new_comp);
372 : : // Set tag contents to block number
373 : 20091 : set_block_given_by(n);
374 : 20091 : }
375 : :
376 : : /** Set this item's tag to point to block n (this block should not be at
377 : : * level 0).
378 : : */
379 : 144361 : void set_block_given_by(uint4 n) {
380 : 144361 : unaligned_write4(p, n);
381 : 144361 : }
382 : : /** Form an item with a null key and with block number n in the tag.
383 : : */
384 : 437 : void form_null_key(uint4 n) {
385 : 437 : set_block_given_by(n);
386 : 437 : set_key_len(0); /* null key */
387 : 437 : set_component_of(0);
388 : 437 : }
389 : 41056 : operator const BItem() const { return BItem(p); }
390 : 34360 : static void setD(uint8_t * q, int c, int x) {
391 : : AssertRel(c, >=, DIR_START);
392 : : AssertRel(c, <, 65535);
393 : : Assert((c & 1) == 1);
394 : 34360 : unaligned_write2(q + c, x);
395 : 34360 : }
396 : : };
397 : :
398 : : // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
399 : : // With 10, overflow is practically impossible
400 : : // FIXME: but we want it to be completely impossible...
401 : : const int BTREE_CURSOR_LEVELS = 10;
402 : :
403 : : }
404 : :
405 : : using Glass::RootInfo;
406 : :
407 : : class GlassChanges;
408 : :
409 : : /** Class managing a Btree table in a Glass database.
410 : : *
411 : : * A table is a store holding a set of key/tag pairs.
412 : : *
413 : : * A key is used to access a block of data in a glass table.
414 : : *
415 : : * Keys are of limited length.
416 : : *
417 : : * Keys may not be empty (each Btree has a special empty key for internal use).
418 : : *
419 : : * A tag is a piece of data associated with a given key. The contents
420 : : * of the tag are opaque to the Btree.
421 : : *
422 : : * Tags may be of arbitrary length (the Btree imposes a very large limit).
423 : : * Note though that they will be loaded into memory in their entirety, so
424 : : * should not be permitted to grow without bound in normal usage.
425 : : *
426 : : * Tags which are null strings _are_ valid, and are different from a
427 : : * tag simply not being in the table.
428 : : */
429 : : class GlassTable {
430 : : friend class GlassCursor; /* Should probably fix this. */
431 : : friend class GlassFreeList;
432 : : private:
433 : : /// Copying not allowed
434 : : GlassTable(const GlassTable &);
435 : :
436 : : /// Assignment not allowed
437 : : GlassTable & operator=(const GlassTable &);
438 : :
439 : : void basic_open(const RootInfo * root_info,
440 : : glass_revision_number_t rev);
441 : :
442 : : /** Perform the opening operation to read. */
443 : : void do_open_to_read(const RootInfo * root_info,
444 : : glass_revision_number_t rev);
445 : :
446 : : /** Perform the opening operation to write. */
447 : : void do_open_to_write(const RootInfo * root_info,
448 : : glass_revision_number_t rev = 0);
449 : :
450 : : public:
451 : : /** Create a new Btree object.
452 : : *
453 : : * This does not create the table on disk - the create_and_open()
454 : : * method must be called to create the table on disk.
455 : : *
456 : : * This also does not open the table - either the create_and_open()
457 : : * or open() methods must be called before use is made of the table.
458 : : *
459 : : * @param tablename_ The name of the table (used in changesets).
460 : : * @param path_ Path at which the table is stored.
461 : : * @param readonly_ whether to open the table for read only access.
462 : : * @param lazy If true, don't create the table until it's
463 : : * needed.
464 : : */
465 : : GlassTable(const char * tablename_, const std::string & path_,
466 : : bool readonly_, bool lazy = false);
467 : :
468 : : GlassTable(const char * tablename_, int fd, off_t offset_,
469 : : bool readonly_, bool lazy = false);
470 : :
471 : : /** Close the Btree.
472 : : *
473 : : * Any outstanding changes (ie, changes made without commit() having
474 : : * subsequently been called) will be lost.
475 : : */
476 : : ~GlassTable();
477 : :
478 : : /** Close the Btree. This closes and frees any of the btree
479 : : * structures which have been created and opened.
480 : : *
481 : : * @param permanent If true, the Btree will not reopen on demand.
482 : : */
483 : : void close(bool permanent = false);
484 : :
485 : : bool readahead_key(const string &key) const;
486 : :
487 : : /** Determine whether the btree exists on disk.
488 : : */
489 : : bool exists() const;
490 : :
491 : : /** Open the btree.
492 : : *
493 : : * @param flags_ flags for opening
494 : : * @param root_info root block info
495 : : *
496 : : * @exception Xapian::DatabaseCorruptError will be thrown if the table
497 : : * is in a corrupt state.
498 : : * @exception Xapian::DatabaseOpeningError will be thrown if the table
499 : : * cannot be opened (but is not corrupt - eg, permission problems,
500 : : * not present, etc).
501 : : */
502 : : void open(int flags_, const RootInfo & root_info,
503 : : glass_revision_number_t rev);
504 : :
505 : : /** Return true if this table is open.
506 : : *
507 : : * NB If the table is lazy and doesn't yet exist, returns false.
508 : : */
509 : 2171594 : bool is_open() const { return handle >= 0; }
510 : :
511 : : /** Return true if this table is writable. */
512 : 198744 : bool is_writable() const { return writable; }
513 : :
514 : : /** Flush any outstanding changes to the DB file of the table.
515 : : *
516 : : * This must be called before commit, to ensure that the DB file is
517 : : * ready to be switched to a new version by the commit.
518 : : */
519 : : void flush_db();
520 : :
521 : : /** Commit any outstanding changes to the table.
522 : : *
523 : : * Commit changes made by calling add() and del() to the Btree.
524 : : *
525 : : * If an error occurs during the operation, this will be signalled
526 : : * by an exception. In case of error, changes made will not be
527 : : * committed to the Btree - they will be discarded.
528 : : *
529 : : * @param new_revision The new revision number to store. This must
530 : : * be greater than the current revision number. FIXME: If
531 : : * we support rewinding to a previous revision, maybe this
532 : : * needs to be greater than any previously used revision.
533 : : *
534 : : * @param root_info Information about the root is returned in this.
535 : : */
536 : : void commit(glass_revision_number_t revision, RootInfo * root_info);
537 : :
538 : 54924 : bool sync() {
539 [ + + ]: 54924 : return (flags & Xapian::DB_NO_SYNC) ||
540 [ + - + - ]: 109848 : handle < 0 ||
541 : 100290 : io_sync(handle);
542 : : }
543 : :
544 : : /** Cancel any outstanding changes.
545 : : *
546 : : * This will discard any modifications which haven't been committed
547 : : * by calling commit().
548 : : */
549 : : void cancel(const RootInfo & root_info, glass_revision_number_t rev);
550 : :
551 : : /** Read an entry from the table, if and only if it is exactly that
552 : : * being asked for.
553 : : *
554 : : * If the key is found in the table, then the tag is copied to @a
555 : : * tag. If the key is not found tag is left unchanged.
556 : : *
557 : : * The result is true iff the specified key is found in the Btree.
558 : : *
559 : : * @param key The key to look for in the table.
560 : : * @param tag A tag object to fill with the value if found.
561 : : *
562 : : * @return true if key is found in table,
563 : : * false if key is not found in table.
564 : : */
565 : : bool get_exact_entry(const std::string & key, std::string & tag) const;
566 : :
567 : : /** Check if a key exists in the Btree.
568 : : *
569 : : * This is just like get_exact_entry() except it doesn't read the tag
570 : : * value so is more efficient if you only want to check that the key
571 : : * exists.
572 : : *
573 : : * @param key The key to look for in the table.
574 : : *
575 : : * @return true if key is found in table,
576 : : * false if key is not found in table.
577 : : */
578 : : bool key_exists(const std::string &key) const;
579 : :
580 : : /** Read the tag value for the key pointed to by cursor C_.
581 : : *
582 : : * @param keep_compressed Don't uncompress the tag - e.g. useful
583 : : * if it's just being opaquely copied.
584 : : *
585 : : * @return true if current_tag holds compressed data (always
586 : : * false if keep_compressed was false).
587 : : */
588 : : bool read_tag(Glass::Cursor * C_, std::string *tag, bool keep_compressed) const;
589 : :
590 : : /** Add a key/tag pair to the table, replacing any existing pair with
591 : : * the same key.
592 : : *
593 : : * If an error occurs during the operation, an exception will be
594 : : * thrown.
595 : : *
596 : : * If key is empty, then the null item is replaced.
597 : : *
598 : : * e.g. btree.add("TODAY", "Mon 9 Oct 2000");
599 : : *
600 : : * @param key The key to store in the table.
601 : : * @param tag The tag to store in the table.
602 : : * @param already_compressed true if tag is already compressed,
603 : : * for example because it is being opaquely copied
604 : : * (default: false).
605 : : */
606 : : void add(const std::string &key, std::string tag, bool already_compressed = false);
607 : :
608 : : /** Delete an entry from the table.
609 : : *
610 : : * The entry will be removed from the table, if it exists. If
611 : : * it does not exist, no action will be taken. The item with
612 : : * an empty key can't be removed, and false is returned.
613 : : *
614 : : * If an error occurs during the operation, this will be signalled
615 : : * by an exception.
616 : : *
617 : : * e.g. bool deleted = btree.del("TODAY")
618 : : *
619 : : * @param key The key to remove from the table.
620 : : *
621 : : * @return true if an entry was removed; false if it did not exist.
622 : : */
623 : : bool del(const std::string &key);
624 : :
625 : 18310 : int get_flags() const { return flags; }
626 : :
627 : : /** Create a new empty btree structure on disk and open it at the
628 : : * initial revision.
629 : : *
630 : : * The table must be writable - it doesn't make sense to create
631 : : * a table that is read-only!
632 : : *
633 : : * The block size must be less than 64K, where K = 1024. It is unwise
634 : : * to use a small block size (less than 1024 perhaps), so we enforce a
635 : : * minimum block size of 2K.
636 : : *
637 : : * Example:
638 : : *
639 : : * // File will be "X." + GLASS_TABLE_EXTENSION (i.e. "X.glass")
640 : : * Btree btree("X.");
641 : : * btree.create_and_open(0, root_info);
642 : : *
643 : : * @param root_info RootInfo object
644 : : *
645 : : * @exception Xapian::DatabaseCreateError if the table can't be
646 : : * created.
647 : : * @exception Xapian::InvalidArgumentError if the requested blocksize
648 : : * is unsuitable.
649 : : */
650 : : void create_and_open(int flags_, const RootInfo & root_info);
651 : :
652 : : void set_full_compaction(bool parity);
653 : :
654 : : /** Get the revision number at which this table
655 : : * is currently open.
656 : : *
657 : : * It is possible that there are other, more recent or older
658 : : * revisions available.
659 : : *
660 : : * @return the current revision number.
661 : : */
662 : : glass_revision_number_t get_open_revision_number() const {
663 : : return revision_number;
664 : : }
665 : :
666 : : /** Return a count of the number of entries in the table.
667 : : *
668 : : * The count does not include the ever-present item with null key.
669 : : *
670 : : * Use @a empty() if you only want to know if the table is empty or
671 : : * not.
672 : : *
673 : : * @return The number of entries in the table.
674 : : */
675 : 20681 : glass_tablesize_t get_entry_count() const {
676 : 20681 : return item_count;
677 : : }
678 : :
679 : : /// Return true if there are no entries in the table.
680 : 129431 : bool empty() const {
681 : 129431 : return (item_count == 0);
682 : : }
683 : :
684 : : /** Get a cursor for reading from the table.
685 : : *
686 : : * The cursor is owned by the caller - it is the caller's
687 : : * responsibility to ensure that it is deleted.
688 : : */
689 : : GlassCursor * cursor_get() const;
690 : :
691 : : /** Determine whether the object contains uncommitted modifications.
692 : : *
693 : : * @return true if there have been modifications since the last
694 : : * the last call to commit().
695 : : */
696 : 66582 : bool is_modified() const { return Btree_modified; }
697 : :
698 : : /** Set the maximum item size given the block capacity.
699 : : *
700 : : * At least this many items of maximum size must fit into a block.
701 : : * The default is BLOCK_CAPACITY (which is currently 4).
702 : : */
703 : 16435 : void set_max_item_size(size_t block_capacity) {
704 [ - + ]: 16435 : if (block_capacity > Glass::BLOCK_CAPACITY)
705 : 0 : block_capacity = Glass::BLOCK_CAPACITY;
706 : : using Glass::DIR_START;
707 : : using Glass::D2;
708 : : max_item_size =
709 : 16435 : (block_size - DIR_START - block_capacity * D2) / block_capacity;
710 : : // Make sure we don't exceed the limit imposed by the format.
711 [ + + ]: 16435 : if (max_item_size > Glass::MAX_ITEM_SIZE)
712 : 8 : max_item_size = Glass::MAX_ITEM_SIZE;
713 : 16435 : }
714 : :
715 : : /** Set the GlassChanges object to write changed blocks to.
716 : : *
717 : : * The GlassChanges object is not owned by the table, so the table
718 : : * must not delete it.
719 : : */
720 : 57498 : void set_changes(GlassChanges * changes) {
721 : 57498 : changes_obj = changes;
722 : 57498 : }
723 : :
724 : : /// Throw an exception indicating that the database is closed.
725 : : [[noreturn]]
726 : : static void throw_database_closed();
727 : :
728 : 2366 : string get_path() const {
729 : 2366 : return name + GLASS_TABLE_EXTENSION;
730 : : }
731 : :
732 : : protected:
733 : :
734 : : bool find(Glass::Cursor *) const;
735 : : int delete_kt();
736 : : void read_block(uint4 n, uint8_t *p) const;
737 : : void write_block(uint4 n, const uint8_t *p,
738 : : bool appending = false) const;
739 : : [[noreturn]]
740 : : void set_overwritten() const;
741 : : void block_to_cursor(Glass::Cursor *C_, int j, uint4 n) const;
742 : : void alter();
743 : : void compact(uint8_t *p);
744 : : void enter_key_above_leaf(Glass::LeafItem previtem, Glass::LeafItem newitem);
745 : : void enter_key_above_branch(int j, Glass::BItem newitem);
746 : : int mid_point(uint8_t *p) const;
747 : : void add_item_to_leaf(uint8_t *p, Glass::LeafItem kt, int c);
748 : : void add_item_to_branch(uint8_t *p, Glass::BItem kt, int c);
749 : : void add_leaf_item(Glass::LeafItem kt);
750 : : void add_branch_item(Glass::BItem kt, int j);
751 : : void delete_leaf_item(bool repeatedly);
752 : : void delete_branch_item(int j);
753 : : int add_kt(bool found);
754 : : void read_root();
755 : : void split_root(uint4 split_n);
756 : : void form_key(const std::string & key) const;
757 : :
758 : : /// The name of the table (used when writing changesets).
759 : : const char * tablename;
760 : :
761 : : /** revision number of the opened B-tree. */
762 : : glass_revision_number_t revision_number;
763 : :
764 : : /** keeps a count of the number of items in the B-tree. */
765 : : glass_tablesize_t item_count;
766 : :
767 : : /** block size of the B tree in bytes */
768 : : unsigned int block_size;
769 : :
770 : : /** Flags like DB_NO_SYNC and DB_DANGEROUS. */
771 : : int flags;
772 : :
773 : : /** true if the root block is faked (not written to disk).
774 : : * false otherwise. This is true when the btree hasn't been
775 : : * modified yet.
776 : : */
777 : : bool faked_root_block;
778 : :
779 : : /** true iff the data has been written in a single write in
780 : : * sequential order.
781 : : */
782 : : bool sequential;
783 : :
784 : : /** File descriptor of the table.
785 : : *
786 : : * If close() has been called, this will be -2.
787 : : *
788 : : * If the table is lazily created and doesn't yet exist, this will be
789 : : * -1 (for a multi-file database) or -3-fd (for a single-file database).
790 : : */
791 : : int handle;
792 : :
793 : : /// number of levels, counting from 0
794 : : int level;
795 : :
796 : : /// the root block of the B-tree
797 : : uint4 root;
798 : :
799 : : /// buffer of size block_size for making up key-tag items
800 : : mutable Glass::LeafItem_wr kt;
801 : :
802 : : /// buffer of size block_size for reforming blocks
803 : : uint8_t * buffer;
804 : :
805 : : /// List of free blocks.
806 : : GlassFreeList free_list;
807 : :
808 : : /** The path name of the B tree.
809 : : *
810 : : * For a single-file database, this will be empty.
811 : : */
812 : : std::string name;
813 : :
814 : : /** count of the number of successive instances of purely
815 : : * sequential addition, starting at SEQ_START_POINT (neg) and
816 : : * going up to zero. */
817 : : int seq_count;
818 : :
819 : : /** the last block to be changed by an addition */
820 : : uint4 changed_n;
821 : :
822 : : /** directory offset corresponding to last block to be changed
823 : : * by an addition */
824 : : int changed_c;
825 : :
826 : : /// maximum size of an item (key-tag pair)
827 : : size_t max_item_size;
828 : :
829 : : /// Set to true the first time the B-tree is modified.
830 : : mutable bool Btree_modified;
831 : :
832 : : /// set to true when full compaction is to be achieved
833 : : bool full_compaction;
834 : :
835 : : /// Set to true when the database is opened to write.
836 : : bool writable;
837 : :
838 : : /// Flag for tracking when cursors need to rebuild.
839 : : mutable bool cursor_created_since_last_modification;
840 : :
841 : : /// Version count for tracking when cursors need to rebuild.
842 : : unsigned long cursor_version;
843 : :
844 : : /** The GlassChanges object to write block changes to.
845 : : *
846 : : * If NULL, no changes will be written.
847 : : */
848 : : GlassChanges * changes_obj;
849 : :
850 : 42597 : bool single_file() const {
851 : 42597 : return name.empty();
852 : : }
853 : :
854 : : /* B-tree navigation functions */
855 : 4093610 : bool prev(Glass::Cursor *C_, int j) const {
856 [ + + ][ + - ]: 4093610 : if (sequential && !single_file())
[ + + ]
857 : 10040 : return prev_for_sequential(C_, j);
858 : 4083570 : return prev_default(C_, j);
859 : : }
860 : :
861 : 8899572 : bool next(Glass::Cursor *C_, int j) const {
862 [ + + ]: 8899572 : if (sequential) return next_for_sequential(C_, j);
863 : 7807377 : return next_default(C_, j);
864 : : }
865 : :
866 : : /* Default implementations. */
867 : : bool prev_default(Glass::Cursor *C_, int j) const;
868 : : bool next_default(Glass::Cursor *C_, int j) const;
869 : :
870 : : /* Implementations for sequential mode. */
871 : : bool prev_for_sequential(Glass::Cursor *C_, int dummy) const;
872 : : bool next_for_sequential(Glass::Cursor *C_, int dummy) const;
873 : :
874 : : static int find_in_leaf(const uint8_t * p,
875 : : Glass::LeafItem item, int c, bool& exact);
876 : : static int find_in_branch(const uint8_t * p,
877 : : Glass::LeafItem item, int c);
878 : : static int find_in_branch(const uint8_t * p, Glass::BItem item, int c);
879 : :
880 : : /** block_given_by(p, c) finds the item at block address p, directory
881 : : * offset c, and returns its tag value as an integer.
882 : : */
883 : : static uint4 block_given_by(const uint8_t * p, int c);
884 : :
885 : : mutable Glass::Cursor C[Glass::BTREE_CURSOR_LEVELS];
886 : :
887 : : /** Buffer used when splitting a block.
888 : : *
889 : : * This buffer holds the split off part of the block. It's only used
890 : : * when updating (in GlassTable::add_item().
891 : : */
892 : : uint8_t * split_p;
893 : :
894 : : /** Minimum size tag to try compressing (0 for no compression). */
895 : : uint4 compress_min;
896 : :
897 : : mutable CompressionStream comp_stream;
898 : :
899 : : /// If true, don't create the table until it's needed.
900 : : bool lazy;
901 : :
902 : : /// Last block readahead_key() preread.
903 : : mutable uint4 last_readahead;
904 : :
905 : : /// offset to start of table in file.
906 : : off_t offset;
907 : :
908 : : /* Debugging methods */
909 : : // void report_block_full(int m, int n, const uint8_t * p);
910 : : };
911 : :
912 : : namespace Glass {
913 : :
914 : : /** Compare two items by their keys.
915 : :
916 : : The result is negative if a precedes b, positive is b precedes a, and
917 : : 0 if a and b are equal. The comparison is for byte sequence collating
918 : : order, taking lengths into account. So if the keys are made up of lower case
919 : : ASCII letters we get alphabetical ordering.
920 : :
921 : : Now remember that items are added into the B-tree in fastest time
922 : : when they are preordered by their keys. This is therefore the piece
923 : : of code that needs to be followed to arrange for the preordering.
924 : :
925 : : Note that keys have two parts - a string value and a "component_of" count.
926 : : If the string values are equal, the comparison is made based on
927 : : "component_of".
928 : : */
929 : :
930 : : template<typename ITEM1, typename ITEM2>
931 : 51761661 : int compare(ITEM1 a, ITEM2 b)
932 : : {
933 : 51761661 : Key key1 = a.key();
934 : 51761661 : Key key2 = b.key();
935 : 51761661 : const uint8_t* p1 = key1.data();
936 : 51761661 : const uint8_t* p2 = key2.data();
937 : 51761661 : int key1_len = key1.length();
938 : 51761661 : int key2_len = key2.length();
939 [ + + ][ + + ]: 51761661 : int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
[ + + ]
940 : :
941 : : // Compare the common part of the keys.
942 : 51761661 : int diff = std::memcmp(p1, p2, k_smaller);
943 [ + + ][ + + ]: 51761661 : if (diff == 0) {
[ + + ]
944 : : // If the common part matches, compare the lengths.
945 : 8098271 : diff = key1_len - key2_len;
946 [ + + ][ + + ]: 8098271 : if (diff == 0) {
[ + + ]
947 : : // If the strings match, compare component_of().
948 [ + - ][ + - ]: 4776150 : diff = a.component_of() - b.component_of();
[ + - ][ + - ]
[ + - ][ + - ]
949 : : }
950 : : }
951 : 51761661 : return diff;
952 : : }
953 : :
954 : : /** Compare two BItem objects by their keys.
955 : : *
956 : : * Specialisation for comparing two BItems, where component_of is always
957 : : * explicitly stored.
958 : : */
959 : : inline int
960 : 0 : compare(BItem a, BItem b)
961 : : {
962 : 0 : Key key1 = a.key();
963 : 0 : Key key2 = b.key();
964 : 0 : const uint8_t* p1 = key1.data();
965 : 0 : const uint8_t* p2 = key2.data();
966 : 0 : int key1_len = key1.length();
967 : 0 : int key2_len = key2.length();
968 [ # # ]: 0 : if (key1_len == key2_len) {
969 : : // The keys are the same length, so we can compare the counts in the
970 : : // same operation since they're stored as 2 byte bigendian numbers.
971 : 0 : int len = key1_len + X2;
972 : 0 : return std::memcmp(p1, p2, len);
973 : : }
974 : :
975 [ # # ]: 0 : int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
976 : :
977 : : // Compare the common part of the keys.
978 : 0 : int diff = std::memcmp(p1, p2, k_smaller);
979 [ # # ]: 0 : if (diff == 0) {
980 : : // If the common part matches, compare the lengths.
981 : 0 : diff = key1_len - key2_len;
982 : : // We dealt with the "same length" case above so we never need to check
983 : : // component_of here.
984 : : }
985 : 0 : return diff;
986 : : }
987 : :
988 : : }
989 : :
990 : : #ifdef DISABLE_GPL_LIBXAPIAN
991 : : # error GPL source we cannot relicense included in libxapian
992 : : #endif
993 : :
994 : : #endif /* XAPIAN_INCLUDED_GLASS_TABLE_H */
|