LCOV - code coverage report
Current view: top level - backends/honey - honey_database.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core c2b6f1024d3a Lines: 6 6 100.0 %
Date: 2019-05-16 09:13:18 Functions: 3 3 100.0 %
Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /** @file honey_database.h
       2                 :            :  * @brief Database using honey backend
       3                 :            :  */
       4                 :            : /* Copyright 2004,2006,2007,2008,2009,2011,2014,2015,2016,2017 Olly Betts
       5                 :            :  * Copyright 2007,2008 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #ifndef XAPIAN_INCLUDED_HONEY_DATABASE_H
      23                 :            : #define XAPIAN_INCLUDED_HONEY_DATABASE_H
      24                 :            : 
      25                 :            : #include "backends/databaseinternal.h"
      26                 :            : 
      27                 :            : #include "honey_alldocspostlist.h"
      28                 :            : #include "honey_docdata.h"
      29                 :            : #include "honey_postlisttable.h"
      30                 :            : #include "honey_positionlist.h"
      31                 :            : #include "honey_spelling.h"
      32                 :            : #include "honey_synonym.h"
      33                 :            : #include "honey_termlisttable.h"
      34                 :            : #include "honey_values.h"
      35                 :            : #include "honey_version.h"
      36                 :            : #include "xapian/compactor.h"
      37                 :            : 
      38                 :            : class HoneyAllTermsList;
      39                 :            : class HoneyCursor;
      40                 :            : class HoneyPostList;
      41                 :            : class HoneySynonymTermList;
      42                 :            : class HoneySpellingWordsList;
      43                 :            : class HoneyTermList;
      44                 :            : 
      45                 :            : /// Database using honey backend.
      46                 :            : class HoneyDatabase : public Xapian::Database::Internal {
      47                 :            :     friend class HoneyAllTermsList;
      48                 :            :     friend class HoneyPosPostList;
      49                 :            :     friend class HoneyPostList;
      50                 :            :     friend class HoneySpellingWordsList;
      51                 :            :     friend class HoneySynonymTermList;
      52                 :            :     friend class HoneyTermList;
      53                 :            : 
      54                 :            :     /// Don't allow assignment.
      55                 :            :     HoneyDatabase& operator=(const HoneyDatabase&) = delete;
      56                 :            : 
      57                 :            :     /// Don't allow copying.
      58                 :            :     HoneyDatabase(const HoneyDatabase&) = delete;
      59                 :            : 
      60                 :            :     /// Path of the directory.
      61                 :            :     std::string path;
      62                 :            : 
      63                 :            :     /// Version file ("iamhoney").
      64                 :            :     HoneyVersion version_file;
      65                 :            : 
      66                 :            :     HoneyDocDataTable docdata_table;
      67                 :            : 
      68                 :            :     HoneyPostListTable postlist_table;
      69                 :            : 
      70                 :            :     HoneyPositionTable position_table;
      71                 :            : 
      72                 :            :     mutable HoneySpellingTable spelling_table;
      73                 :            : 
      74                 :            :     HoneySynonymTable synonym_table;
      75                 :            : 
      76                 :            :     HoneyTermListTable termlist_table;
      77                 :            : 
      78                 :            :     HoneyValueManager value_manager;
      79                 :            : 
      80                 :            :     mutable Honey::DocLenChunkReader doclen_chunk_reader;
      81                 :            : 
      82                 :            :     mutable HoneyCursor* doclen_cursor = NULL;
      83                 :            : 
      84                 :            :     [[noreturn]]
      85                 :            :     void throw_termlist_table_close_exception() const;
      86                 :            : 
      87                 :            :   public:
      88                 :            :     explicit
      89                 :            :     HoneyDatabase(const std::string& path_, int flags = Xapian::DB_READONLY_);
      90                 :            : 
      91                 :            :     explicit
      92                 :            :     HoneyDatabase(int fd, int flags = Xapian::DB_READONLY_);
      93                 :            : 
      94                 :            :     ~HoneyDatabase();
      95                 :            : 
      96                 :            :     void readahead_for_query(const Xapian::Query& query) const;
      97                 :            : 
      98                 :            :     Xapian::doccount get_doccount() const;
      99                 :            : 
     100                 :            :     /** Return the last used document id of this (sub) database. */
     101                 :            :     Xapian::docid get_lastdocid() const;
     102                 :            : 
     103                 :            :     /** Return the total length of all documents in this database. */
     104                 :            :     Xapian::totallength get_total_length() const;
     105                 :            : 
     106                 :            :     Xapian::termcount get_doclength(Xapian::docid did) const;
     107                 :            : 
     108                 :            :     /** Get the number of unique terms in document.
     109                 :            :      *
     110                 :            :      *  @param did  The document id of the document to return this value for.
     111                 :            :      */
     112                 :            :     Xapian::termcount get_unique_terms(Xapian::docid did) const;
     113                 :            : 
     114                 :            :     /** Returns frequencies for a term.
     115                 :            :      *
     116                 :            :      *  @param term             The term to get frequencies for
     117                 :            :      *  @param termfreq_ptr     Point to return number of docs indexed by @a
     118                 :            :      *                          term (or NULL not to return)
     119                 :            :      *  @param collfreq_ptr     Point to return number of occurrences of @a
     120                 :            :      *                          term in the database (or NULL not to return)
     121                 :            :      */
     122                 :            :     void get_freqs(const std::string& term,
     123                 :            :                    Xapian::doccount* termfreq_ptr,
     124                 :            :                    Xapian::termcount* collfreq_ptr) const;
     125                 :            : 
     126                 :            :     /** Return the frequency of a given value slot.
     127                 :            :      *
     128                 :            :      *  This is the number of documents which have a (non-empty) value
     129                 :            :      *  stored in the slot.
     130                 :            :      *
     131                 :            :      *  @param slot The value slot to examine.
     132                 :            :      *
     133                 :            :      *  @exception UnimplementedError The frequency of the value isn't
     134                 :            :      *  available for this database type.
     135                 :            :      */
     136                 :            :     Xapian::doccount get_value_freq(Xapian::valueno slot) const;
     137                 :            : 
     138                 :            :     /** Get a lower bound on the values stored in the given value slot.
     139                 :            :      *
     140                 :            :      *  If the lower bound isn't available for the given database type,
     141                 :            :      *  this will return the lowest possible bound - the empty string.
     142                 :            :      *
     143                 :            :      *  @param slot The value slot to examine.
     144                 :            :      */
     145                 :            :     std::string get_value_lower_bound(Xapian::valueno slot) const;
     146                 :            : 
     147                 :            :     /** Get an upper bound on the values stored in the given value slot.
     148                 :            :      *
     149                 :            :      *  @param slot The value slot to examine.
     150                 :            :      *
     151                 :            :      *  @exception UnimplementedError The upper bound of the values isn't
     152                 :            :      *  available for this database type.
     153                 :            :      */
     154                 :            :     std::string get_value_upper_bound(Xapian::valueno slot) const;
     155                 :            : 
     156                 :            :     /// Get a lower bound on the length of a document in this DB.
     157                 :            :     Xapian::termcount get_doclength_lower_bound() const;
     158                 :            : 
     159                 :            :     /// Get an upper bound on the length of a document in this DB.
     160                 :            :     Xapian::termcount get_doclength_upper_bound() const;
     161                 :            : 
     162                 :            :     /// Get an upper bound on the wdf of term @a term.
     163                 :            :     Xapian::termcount get_wdf_upper_bound(const std::string& term) const;
     164                 :            : 
     165                 :            :     /// Get a lower bound on the unique terms size of a document in this DB.
     166                 :            :     Xapian::termcount get_unique_terms_lower_bound() const;
     167                 :            : 
     168                 :            :     /// Get an upper bound on the unique terms size of a document in this DB.
     169                 :            :     Xapian::termcount get_unique_terms_upper_bound() const;
     170                 :            : 
     171                 :            :     bool term_exists(const std::string& term) const;
     172                 :            : 
     173                 :            :     /** Check whether this database contains any positional information. */
     174                 :            :     bool has_positions() const;
     175                 :            : 
     176                 :            :     PostList* open_post_list(const std::string& term) const;
     177                 :            : 
     178                 :            :     LeafPostList* open_leaf_post_list(const std::string& term,
     179                 :            :                                       bool need_read_pos) const;
     180                 :            : 
     181                 :            :     /** Open a value stream.
     182                 :            :      *
     183                 :            :      *  This returns the value in a particular slot for each document.
     184                 :            :      *
     185                 :            :      *  @param slot     The value slot.
     186                 :            :      *
     187                 :            :      *  @return Pointer to a new ValueList object which should be deleted by
     188                 :            :      *          the caller once it is no longer needed.
     189                 :            :      */
     190                 :            :     ValueList* open_value_list(Xapian::valueno slot) const;
     191                 :            : 
     192                 :            :     TermList* open_term_list(Xapian::docid did) const;
     193                 :            : 
     194                 :            :     /** Like open_term_list() but without MultiTermList wrapper.
     195                 :            :      *
     196                 :            :      *  MultiDatabase::open_term_list() wraps the returns TermList in a
     197                 :            :      *  MultiTermList, but we don't want that for query expansion.
     198                 :            :      */
     199                 :            :     TermList* open_term_list_direct(Xapian::docid did) const;
     200                 :            : 
     201                 :            :     TermList* open_allterms(const std::string& prefix) const;
     202                 :            : 
     203                 :            :     PositionList* open_position_list(Xapian::docid did,
     204                 :            :                                      const std::string& term) const;
     205                 :            : 
     206                 :            :     /** Open a handle on a document.
     207                 :            :      *
     208                 :            :      *  The returned handle provides access to document data and document
     209                 :            :      *  values.
     210                 :            :      *
     211                 :            :      *  @param did      The document id to open.
     212                 :            :      *
     213                 :            :      *  @param lazy     If true, there's no need to check that this document
     214                 :            :      *                  actually exists (only a hint - the backend may still
     215                 :            :      *                  check).  Used to avoid unnecessary work when we already
     216                 :            :      *                  know that the requested document exists.
     217                 :            :      *
     218                 :            :      *  @return         A new document object, owned by the caller.
     219                 :            :      */
     220                 :            :     Xapian::Document::Internal* open_document(Xapian::docid did,
     221                 :            :                                               bool lazy) const;
     222                 :            : 
     223                 :            :     /** Create a termlist tree from trigrams of @a word.
     224                 :            :      *
     225                 :            :      *  You can assume word.size() > 1.
     226                 :            :      *
     227                 :            :      *  If there are no trigrams, returns NULL.
     228                 :            :      */
     229                 :            :     TermList* open_spelling_termlist(const std::string& word) const;
     230                 :            : 
     231                 :            :     /** Return a termlist which returns the words which are spelling
     232                 :            :      *  correction targets.
     233                 :            :      *
     234                 :            :      *  If there are no spelling correction targets, returns NULL.
     235                 :            :      */
     236                 :            :     TermList* open_spelling_wordlist() const;
     237                 :            : 
     238                 :            :     /** Return the number of times @a word was added as a spelling. */
     239                 :            :     Xapian::doccount get_spelling_frequency(const std::string& word) const;
     240                 :            : 
     241                 :            :     /** Add a word to the spelling dictionary.
     242                 :            :      *
     243                 :            :      *  If the word is already present, its frequency is increased.
     244                 :            :      *
     245                 :            :      *  @param word     The word to add.
     246                 :            :      *  @param freqinc  How much to increase its frequency by.
     247                 :            :      */
     248                 :            :     void add_spelling(const std::string& word,
     249                 :            :                       Xapian::termcount freqinc) const;
     250                 :            : 
     251                 :            :     /** Remove a word from the spelling dictionary.
     252                 :            :      *
     253                 :            :      *  The word's frequency is decreased, and if would become zero or less
     254                 :            :      *  then the word is removed completely.
     255                 :            :      *
     256                 :            :      *  @param word     The word to remove.
     257                 :            :      *  @param freqdec  How much to decrease its frequency by.
     258                 :            :      *
     259                 :            :      *  @return Any freqdec not "used up".
     260                 :            :      */
     261                 :            :     Xapian::termcount remove_spelling(const std::string& word,
     262                 :            :                                       Xapian::termcount freqdec) const;
     263                 :            : 
     264                 :            :     /** Open a termlist returning synonyms for a term.
     265                 :            :      *
     266                 :            :      *  If @a term has no synonyms, returns NULL.
     267                 :            :      */
     268                 :            :     TermList* open_synonym_termlist(const std::string& term) const;
     269                 :            : 
     270                 :            :     /** Open a termlist returning each term which has synonyms.
     271                 :            :      *
     272                 :            :      *  @param prefix   If non-empty, only terms with this prefix are
     273                 :            :      *              returned.
     274                 :            :      */
     275                 :            :     TermList* open_synonym_keylist(const std::string& prefix) const;
     276                 :            : 
     277                 :            :     /** Add a synonym for a term.
     278                 :            :      *
     279                 :            :      *  If @a synonym is already a synonym for @a term, then no action is
     280                 :            :      *  taken.
     281                 :            :      */
     282                 :            :     void add_synonym(const std::string& term,
     283                 :            :                      const std::string& synonym) const;
     284                 :            : 
     285                 :            :     /** Remove a synonym for a term.
     286                 :            :      *
     287                 :            :      *  If @a synonym isn't a synonym for @a term, then no action is taken.
     288                 :            :      */
     289                 :            :     void remove_synonym(const std::string& term,
     290                 :            :                         const std::string& synonym) const;
     291                 :            : 
     292                 :            :     /** Clear all synonyms for a term.
     293                 :            :      *
     294                 :            :      *  If @a term has no synonyms, no action is taken.
     295                 :            :      */
     296                 :            :     void clear_synonyms(const std::string& term) const;
     297                 :            : 
     298                 :            :     /** Get the metadata associated with a given key.
     299                 :            :      *
     300                 :            :      *  See Database::get_metadata() for more information.
     301                 :            :      */
     302                 :            :     std::string get_metadata(const std::string& key) const;
     303                 :            : 
     304                 :            :     /** Open a termlist returning each metadata key.
     305                 :            :      *
     306                 :            :      *  Only metadata keys which are associated with a non-empty value will
     307                 :            :      *  be returned.
     308                 :            :      *
     309                 :            :      *  @param prefix   If non-empty, only keys with this prefix are returned.
     310                 :            :      */
     311                 :            :     TermList* open_metadata_keylist(const std::string& prefix) const;
     312                 :            : 
     313                 :            :     /** Set the metadata associated with a given key.
     314                 :            :      *
     315                 :            :      *  See WritableDatabase::set_metadata() for more information.
     316                 :            :      */
     317                 :            :     void set_metadata(const std::string& key, const std::string& value);
     318                 :            : 
     319                 :            :     /** Reopen the database to the latest available revision.
     320                 :            :      *
     321                 :            :      *  Database backends which don't support simultaneous update and
     322                 :            :      *  reading probably don't need to do anything here.
     323                 :            :      */
     324                 :            :     bool reopen();
     325                 :            : 
     326                 :            :     /** Close the database */
     327                 :            :     void close();
     328                 :            : 
     329                 :            :     /** Request a document.
     330                 :            :      *
     331                 :            :      *  This tells the database that we're going to want a particular
     332                 :            :      *  document soon.  It's just a hint which the backend may ignore,
     333                 :            :      *  but for glass it issues a preread hint on the file with the
     334                 :            :      *  document data in, and for the remote backend it might cause
     335                 :            :      *  the document to be fetched asynchronously (this isn't currently
     336                 :            :      *  implemented though).
     337                 :            :      *
     338                 :            :      *  It can be called for multiple documents in turn, and a common usage
     339                 :            :      *  pattern would be to iterate over an MSet and request the documents,
     340                 :            :      *  then iterate over it again to actually get and display them.
     341                 :            :      *
     342                 :            :      *  The default implementation is a no-op.
     343                 :            :      */
     344                 :            :     void request_document(Xapian::docid did) const;
     345                 :            : 
     346                 :            :     /// Get the current revision of the database.
     347                 :            :     Xapian::rev get_revision() const;
     348                 :            : 
     349                 :            :     /** Get a UUID for the database.
     350                 :            :      *
     351                 :            :      *  The UUID will persist for the lifetime of the database.
     352                 :            :      *
     353                 :            :      *  Replicas (eg, made with the replication protocol, or by copying all
     354                 :            :      *  the database files) will have the same UUID.  However, copies (made
     355                 :            :      *  with copydatabase, or xapian-compact) will have different UUIDs.
     356                 :            :      *
     357                 :            :      *  If the backend does not support UUIDs the empty string is returned.
     358                 :            :      */
     359                 :            :     std::string get_uuid() const;
     360                 :            : 
     361                 :            :     /** Get backend information about this database.
     362                 :            :      *
     363                 :            :      *  @param path     If non-NULL, and set the pointed to string to the file
     364                 :            :      *                  path of this database (or if to some string describing
     365                 :            :      *                  the database in a backend-specified format if "path"
     366                 :            :      *                  isn't a concept which  make sense).
     367                 :            :      *
     368                 :            :      *  @return A constant indicating the backend type.
     369                 :            :      */
     370                 :            :     int get_backend_info(std::string* path) const;
     371                 :            : 
     372                 :            :     /** Find lowest and highest docids actually in use.
     373                 :            :      *
     374                 :            :      *  Only used by compaction, so only needs to be implemented by
     375                 :            :      *  backends which support compaction.
     376                 :            :      */
     377                 :            :     void get_used_docid_range(Xapian::docid& first, Xapian::docid& last) const;
     378                 :            : 
     379                 :            :     static
     380                 :            :     void compact(Xapian::Compactor* compactor,
     381                 :            :                  const char* destdir,
     382                 :            :                  int fd,
     383                 :            :                  int source_backend,
     384                 :            :                  const std::vector<const Xapian::Database::Internal*>& sources,
     385                 :            :                  const std::vector<Xapian::docid>& offset,
     386                 :            :                  Xapian::Compactor::compaction_level compaction,
     387                 :            :                  unsigned flags,
     388                 :            :                  Xapian::docid last_docid);
     389                 :            : 
     390                 :          2 :     bool has_uncommitted_changes() const {
     391                 :          2 :         return false;
     392                 :            :     }
     393                 :            : 
     394                 :        105 :     bool single_file() const {
     395                 :        105 :         return false;
     396                 :            :     }
     397                 :            : 
     398                 :       3257 :     HoneyCursor* get_postlist_cursor() const {
     399                 :       3257 :         return postlist_table.cursor_get();
     400                 :            :     }
     401                 :            : 
     402                 :            :     /// Return a string describing this object.
     403                 :            :     std::string get_description() const;
     404                 :            : };
     405                 :            : 
     406                 :            : #endif // XAPIAN_INCLUDED_HONEY_DATABASE_H

Generated by: LCOV version 1.11