LCOV - code coverage report
Current view: top level - backends - documentinternal.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 104 114 91.2 %
Date: 2019-06-30 05:20:33 Functions: 22 22 100.0 %
Branches: 83 142 58.5 %

           Branch data     Line data    Source code
       1                 :            : /** @file documentinternal.h
       2                 :            :  * @brief Abstract base class for a document
       3                 :            :  */
       4                 :            : /* Copyright 2017,2018,2019 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #ifndef XAPIAN_INCLUDED_DOCUMENTINTERNAL_H
      22                 :            : #define XAPIAN_INCLUDED_DOCUMENTINTERNAL_H
      23                 :            : 
      24                 :            : #include <xapian/document.h>
      25                 :            : #include <xapian/intrusive_ptr.h>
      26                 :            : #include <xapian/types.h>
      27                 :            : 
      28                 :            : #include "api/terminfo.h"
      29                 :            : #include "api/termlist.h"
      30                 :            : #include "backends/databaseinternal.h"
      31                 :            : #include "overflow.h"
      32                 :            : 
      33                 :            : #include <map>
      34                 :            : #include <memory>
      35                 :            : #include <string>
      36                 :            : 
      37                 :            : class DocumentTermList;
      38                 :            : class DocumentValueList;
      39                 :            : class GlassValueManager;
      40                 :            : class HoneyValueManager;
      41                 :            : class ValueStreamDocument;
      42                 :            : 
      43                 :            : namespace Xapian {
      44                 :            : 
      45                 :            : /// Abstract base class for a document.
      46                 :            : class Document::Internal : public Xapian::Internal::intrusive_base {
      47                 :            :     friend class ::DocumentTermList;
      48                 :            :     friend class ::DocumentValueList;
      49                 :            :     // For ensure_values_fetched():
      50                 :            :     friend class ::GlassValueManager;
      51                 :            :     friend class ::HoneyValueManager;
      52                 :            :     friend class ::ValueStreamDocument;
      53                 :            : 
      54                 :            :     /// Don't allow assignment.
      55                 :            :     void operator=(const Internal &) = delete;
      56                 :            : 
      57                 :            :     /// Don't allow copying.
      58                 :            :     Internal(const Internal &) = delete;
      59                 :            : 
      60                 :            :     /** The document data.
      61                 :            :      *
      62                 :            :      *  If NULL, this hasn't been fetched or set yet.
      63                 :            :      */
      64                 :            :     std::unique_ptr<std::string> data;
      65                 :            : 
      66                 :            :     /** Terms in the document and their associated metadata.
      67                 :            :      *
      68                 :            :      *  If NULL, the terms haven't been fetched or set yet.
      69                 :            :      *
      70                 :            :      *  We use std::map<> rather than std::unordered_map<> because the latter
      71                 :            :      *  invalidates existing iterators upon insert() if rehashing occurs,
      72                 :            :      *  whereas existing iterators remain valid for std::map<>.
      73                 :            :      */
      74                 :            :     mutable std::unique_ptr<std::map<std::string, TermInfo>> terms;
      75                 :            : 
      76                 :            :     /** The number of distinct terms in @a terms.
      77                 :            :      *
      78                 :            :      *  Only valid when terms is non-NULL.
      79                 :            :      *
      80                 :            :      *  This may be less than terms.size() if any terms have been deleted.
      81                 :            :      */
      82                 :            :     mutable Xapian::termcount termlist_size;
      83                 :            : 
      84                 :            :     /** Are there any changes to term positions in @a terms?
      85                 :            :      *
      86                 :            :      *  If a document is read from a database, modified and then replaced at
      87                 :            :      *  the same docid, then we can save a lot of work if we know when there
      88                 :            :      *  are no changes to term positions, even if there are changes to terms
      89                 :            :      *  (a common example is adding filter terms to an existing document).
      90                 :            :      *
      91                 :            :      *  It's OK for this to be true when there aren't any modifications (it
      92                 :            :      *  just means that the backend can't shortcut as directly).
      93                 :            :      */
      94                 :            :     mutable bool positions_modified_ = false;
      95                 :            : 
      96                 :            :     /** Ensure terms have been fetched from @a database.
      97                 :            :      *
      98                 :            :      *  After this call, @a terms will be non-NULL.  If @a database is NULL,
      99                 :            :      *  @a terms will be initialised to an empty map if it was NULL.
     100                 :            :      */
     101                 :            :     void ensure_terms_fetched() const;
     102                 :            : 
     103                 :            :     /** Ensure values have been fetched from @a database.
     104                 :            :      *
     105                 :            :      *  After this call, @a values will be non-NULL.  If @a database is NULL,
     106                 :            :      *  @a values will be initialised to an empty map if it was NULL.
     107                 :            :      */
     108                 :            :     void ensure_values_fetched() const;
     109                 :            : 
     110                 :            :   protected:
     111                 :            :     /** Document value slots and their contents.
     112                 :            :      *
     113                 :            :      *  If NULL, the values haven't been fetched or set yet.
     114                 :            :      *
     115                 :            :      *  We use std::map<> rather than std::unordered_map<> because the latter
     116                 :            :      *  invalidates existing iterators upon insert() if rehashing occurs,
     117                 :            :      *  whereas existing iterators remain valid for std::map<>.
     118                 :            :      */
     119                 :            :     mutable std::unique_ptr<std::map<Xapian::valueno, std::string>> values;
     120                 :            : 
     121                 :            :     /** Database this document came from.
     122                 :            :      *
     123                 :            :      *  If this document didn't come from a database, this will be NULL.
     124                 :            :      */
     125                 :            :     Xapian::Internal::intrusive_ptr<const Xapian::Database::Internal> database;
     126                 :            : 
     127                 :            :     /** The document ID this document came from in @a database.
     128                 :            :      *
     129                 :            :      *  If this document didn't come from a database, this will be 0.
     130                 :            :      *
     131                 :            :      *  Note that this is the docid in the sub-database when multiple databases
     132                 :            :      *  are being searched.
     133                 :            :      */
     134                 :            :     Xapian::docid did;
     135                 :            : 
     136                 :            :     /// Constructor used by subclasses.
     137                 :     917694 :     Internal(Xapian::Internal::intrusive_ptr<const Xapian::Database::Internal> database_,
     138                 :            :              Xapian::docid did_)
     139                 :     917694 :         : database(database_), did(did_) {}
     140                 :            : 
     141                 :            :     /// Constructor used by RemoteDocument subclass.
     142                 :     229960 :     Internal(const Xapian::Database::Internal* database_,
     143                 :            :              Xapian::docid did_,
     144                 :            :              std::string&& data_,
     145                 :            :              std::map<Xapian::valueno, std::string>&& values_)
     146                 :     229960 :         : data(new std::string(std::move(data_))),
     147                 :     229960 :           values(new std::map<Xapian::valueno, std::string>(std::move(values_))),
     148                 :            :           database(database_),
     149         [ +  - ]:     689880 :           did(did_) {}
     150                 :            : 
     151                 :            :     /** Fetch the document data from the database.
     152                 :            :      *
     153                 :            :      *  The default implementation (used when there's no associated database)
     154                 :            :      *  returns an empty string.
     155                 :            :      */
     156                 :            :     virtual std::string fetch_data() const;
     157                 :            : 
     158                 :            :     /** Fetch all set values from the database.
     159                 :            :      *
     160                 :            :      *  The default implementation (used when there's no associated database)
     161                 :            :      *  clears @a values_.
     162                 :            :      */
     163                 :            :     virtual void fetch_all_values(std::map<Xapian::valueno,
     164                 :            :                                   std::string>& values_) const;
     165                 :            : 
     166                 :            :     /** Fetch a single value from the database.
     167                 :            :      *
     168                 :            :      *  The default implementation (used when there's no associated database)
     169                 :            :      *  returns an empty string.
     170                 :            :      */
     171                 :            :     virtual std::string fetch_value(Xapian::valueno slot) const;
     172                 :            : 
     173                 :            :   public:
     174                 :            :     /// Construct an empty document.
     175                 :     233614 :     Internal() : did(0) {}
     176                 :            : 
     177                 :            :     /** We have virtual methods and want to be able to delete derived classes
     178                 :            :      *  using a pointer to the base class, so we need a virtual destructor.
     179                 :            :      */
     180                 :            :     virtual ~Internal();
     181                 :            : 
     182                 :            :     /** Return true if the document data might have been modified.
     183                 :            :      *
     184                 :            :      *  If the document is from a database, this means modifications
     185                 :            :      *  compared to the version read, otherwise it means modifications
     186                 :            :      *  compared to an empty database.
     187                 :            :      */
     188                 :      40408 :     bool data_modified() const { return data != NULL; }
     189                 :            : 
     190                 :            :     /** Return true if the document's terms might have been modified.
     191                 :            :      *
     192                 :            :      *  If the document is from a database, this means modifications
     193                 :            :      *  compared to the version read, otherwise it means modifications
     194                 :            :      *  compared to an empty database.
     195                 :            :      */
     196                 :      40408 :     bool terms_modified() const { return terms != NULL; }
     197                 :            : 
     198                 :            :     /** Return true if the document's values might have been modified.
     199                 :            :      *
     200                 :            :      *  If the document is from a database, this means modifications
     201                 :            :      *  compared to the version read, otherwise it means modifications
     202                 :            :      *  compared to an empty database.
     203                 :            :      */
     204                 :      40218 :     bool values_modified() const { return values != NULL; }
     205                 :            : 
     206                 :            :     /** Return true if the document might have been modified in any way.
     207                 :            :      *
     208                 :            :      *  If the document is from a database, this means modifications
     209                 :            :      *  compared to the version read, otherwise it means modifications
     210                 :            :      *  compared to an empty database.
     211                 :            :      */
     212                 :      20105 :     bool modified() const {
     213 [ +  - ][ +  + ]:      20105 :         return data_modified() || terms_modified() || values_modified();
                 [ +  + ]
     214                 :            :     }
     215                 :            : 
     216                 :            :     /** Return true if the document's term positions might have been modified.
     217                 :            :      *
     218                 :            :      *  If the document is from a database, this means modifications
     219                 :            :      *  compared to the version read, otherwise it means modifications
     220                 :            :      *  compared to an empty database.
     221                 :            :      */
     222                 :        190 :     bool positions_modified() const { return positions_modified_; }
     223                 :            : 
     224                 :            :     /** Get the document ID this document came from.
     225                 :            :      *
     226                 :            :      *  If this document didn't come from a database, this will be 0.
     227                 :            :      *
     228                 :            :      *  Note that this is the docid in the sub-database when multiple databases
     229                 :            :      *  are being searched.
     230                 :            :      */
     231                 :      64856 :     Xapian::docid get_docid() const { return did; }
     232                 :            : 
     233                 :            :     /// Get the document data.
     234                 :     439011 :     std::string get_data() const {
     235         [ +  + ]:     439011 :         if (data)
     236                 :      83217 :             return *data;
     237                 :     355794 :         return fetch_data();
     238                 :            :     }
     239                 :            : 
     240                 :            :     /// Set the document data.
     241                 :      82541 :     void set_data(const std::string& data_) {
     242         [ +  - ]:      82541 :         data.reset(new std::string(data_));
     243                 :      82541 :     }
     244                 :            : 
     245                 :            :     /// Add a term to this document.
     246                 :     536569 :     void add_term(const std::string& term, Xapian::termcount wdf_inc) {
     247         [ +  - ]:     536569 :         ensure_terms_fetched();
     248                 :            : 
     249         [ +  - ]:     536569 :         auto i = terms->find(term);
     250         [ +  + ]:     536569 :         if (i == terms->end()) {
     251                 :     534864 :             ++termlist_size;
     252 [ +  - ][ +  - ]:     534864 :             terms->emplace(make_pair(term, TermInfo(wdf_inc)));
                 [ +  - ]
     253                 :            :         } else {
     254 [ +  - ][ -  + ]:       1705 :             if (i->second.increase_wdf(wdf_inc))
     255                 :          0 :                 ++termlist_size;
     256                 :            :         }
     257                 :     536569 :     }
     258                 :            : 
     259                 :            :     /// Remove a term from this document.
     260                 :         87 :     bool remove_term(const std::string& term) {
     261         [ +  - ]:         87 :         ensure_terms_fetched();
     262                 :            : 
     263         [ +  - ]:         87 :         auto i = terms->find(term);
     264         [ -  + ]:         87 :         if (i == terms->end()) {
     265                 :          0 :             return false;
     266                 :            :         }
     267 [ +  - ][ +  + ]:         87 :         if (i->second.has_positions()) {
     268                 :         86 :             positions_modified_ = true;
     269                 :            :         }
     270 [ +  - ][ -  + ]:         87 :         if (!i->second.remove()) {
     271                 :          0 :             return false;
     272                 :            :         }
     273                 :         87 :         --termlist_size;
     274                 :         87 :         return true;
     275                 :            :     }
     276                 :            : 
     277                 :            :     /// Add a posting for a term.
     278                 :    4140451 :     void add_posting(const std::string& term,
     279                 :            :                      Xapian::termpos term_pos,
     280                 :            :                      Xapian::termcount wdf_inc) {
     281         [ +  - ]:    4140451 :         ensure_terms_fetched();
     282                 :    4140451 :         positions_modified_ = true;
     283                 :            : 
     284         [ +  - ]:    4140451 :         auto i = terms->find(term);
     285         [ +  + ]:    4140451 :         if (i == terms->end()) {
     286                 :    2843852 :             ++termlist_size;
     287 [ +  - ][ +  - ]:    2843852 :             terms->emplace(term, TermInfo(wdf_inc, term_pos));
     288                 :    4140451 :             return;
     289                 :            :         }
     290 [ +  - ][ +  + ]:    1296599 :         if (i->second.add_position(wdf_inc, term_pos))
     291                 :    1296599 :             ++termlist_size;
     292                 :            :     }
     293                 :            : 
     294                 :            :     enum remove_posting_result { OK, NO_TERM, NO_POS };
     295                 :            : 
     296                 :            :     /// Remove a posting for a term.
     297                 :            :     remove_posting_result
     298                 :         14 :     remove_posting(const std::string& term,
     299                 :            :                    Xapian::termpos term_pos,
     300                 :            :                    Xapian::termcount wdf_dec) {
     301         [ +  - ]:         14 :         ensure_terms_fetched();
     302                 :            : 
     303         [ +  - ]:         14 :         auto i = terms->find(term);
     304 [ +  - ][ +  - ]:         14 :         if (i == terms->end() || i->second.is_deleted()) {
         [ -  + ][ +  - ]
           [ -  +  #  # ]
     305                 :          0 :             return remove_posting_result::NO_TERM;
     306                 :            :         }
     307 [ +  - ][ -  + ]:         14 :         if (!i->second.remove_position(term_pos)) {
     308                 :          0 :             return remove_posting_result::NO_POS;
     309                 :            :         }
     310 [ +  - ][ +  + ]:         14 :         if (i->second.decrease_wdf(wdf_dec))
     311                 :          5 :             --termlist_size;
     312                 :         14 :         positions_modified_ = true;
     313                 :         14 :         return remove_posting_result::OK;
     314                 :            :     }
     315                 :            : 
     316                 :            :     /** Remove a range of postings for a term.
     317                 :            :      *
     318                 :            :      *  Can only return OK or NO_TERM.
     319                 :            :      */
     320                 :            :     remove_posting_result
     321                 :          7 :     remove_postings(const std::string& term,
     322                 :            :                     Xapian::termpos term_pos_first,
     323                 :            :                     Xapian::termpos term_pos_last,
     324                 :            :                     Xapian::termcount wdf_dec,
     325                 :            :                     Xapian::termpos& n_removed) {
     326         [ +  - ]:          7 :         ensure_terms_fetched();
     327                 :            : 
     328         [ +  - ]:          7 :         auto i = terms->find(term);
     329 [ +  - ][ +  - ]:          7 :         if (i == terms->end() || i->second.is_deleted()) {
         [ -  + ][ +  - ]
           [ -  +  #  # ]
     330                 :          0 :             return remove_posting_result::NO_TERM;
     331                 :            :         }
     332                 :          7 :         n_removed = i->second.remove_positions(term_pos_first,
     333         [ +  - ]:          7 :                                                term_pos_last);
     334         [ +  + ]:          7 :         if (n_removed) {
     335                 :          5 :             positions_modified_ = true;
     336                 :            :             Xapian::termcount wdf_delta;
     337         [ -  + ]:          5 :             if (mul_overflows(n_removed, wdf_dec, wdf_delta)) {
     338                 :            :                 // Decreasing by the maximum value will zero the wdf.
     339                 :          0 :                 wdf_delta = numeric_limits<Xapian::termcount>::max();
     340                 :            :             }
     341 [ +  - ][ +  + ]:          5 :             if (i->second.decrease_wdf(wdf_delta))
     342                 :          5 :                 --termlist_size;
     343                 :            :         }
     344                 :          7 :         return remove_posting_result::OK;
     345                 :            :     }
     346                 :            : 
     347                 :            :     /// Clear all terms from the document.
     348                 :          7 :     void clear_terms() {
     349         [ +  + ]:          7 :         if (!terms) {
     350         [ -  + ]:          1 :             if (database.get()) {
     351         [ #  # ]:          0 :                 terms.reset(new map<string, TermInfo>());
     352                 :          0 :                 termlist_size = 0;
     353                 :            :             } else {
     354                 :            :                 // We didn't come from a database, so there are no unfetched
     355                 :            :                 // terms to clear.
     356                 :            :             }
     357                 :            :         } else {
     358                 :          6 :             terms->clear();
     359                 :          6 :             termlist_size = 0;
     360                 :            :             // Assume there was positional data if there's any in the database.
     361 [ -  + ][ #  # ]:          6 :             positions_modified_ = database.get() && database->has_positions();
     362                 :            :         }
     363                 :          7 :     }
     364                 :            : 
     365                 :            :     /// Return the number of distinct terms in this document.
     366                 :     206229 :     Xapian::termcount termlist_count() const {
     367         [ +  + ]:     206229 :         if (terms)
     368                 :      99425 :             return termlist_size;
     369                 :            : 
     370         [ +  + ]:     106804 :         if (!database.get())
     371                 :      76328 :             return 0;
     372                 :            : 
     373         [ +  - ]:      30476 :         std::unique_ptr<TermList> tl(database->open_term_list(did));
     374                 :            :         // get_approx_size() is exact for TermList from a database.
     375         [ +  - ]:     206229 :         return tl->get_approx_size();
     376                 :            :     }
     377                 :            : 
     378                 :            :     /** Start iterating the terms in this document.
     379                 :            :      *
     380                 :            :      *  @return A new TermList object (caller takes ownership) or NULL if
     381                 :            :      *          there are no terms.
     382                 :            :      */
     383                 :            :     TermList* open_term_list() const;
     384                 :            : 
     385                 :            :     /** Read a value slot in this document.
     386                 :            :      *
     387                 :            :      *  @return The value in slot @a slot, or an empty string if not set.
     388                 :            :      */
     389                 :     772835 :     std::string get_value(Xapian::valueno slot) const {
     390         [ +  + ]:     772835 :         if (values) {
     391         [ +  - ]:     229250 :             auto i = values->find(slot);
     392         [ +  + ]:     229250 :             if (i != values->end())
     393         [ +  - ]:     210871 :                 return i->second;
     394         [ +  - ]:     229250 :             return std::string();
     395                 :            :         }
     396                 :            : 
     397                 :     772835 :         return fetch_value(slot);
     398                 :            :     }
     399                 :            : 
     400                 :            :     /// Add a value to a slot in this document.
     401                 :     914947 :     void add_value(Xapian::valueno slot, const std::string& value) {
     402                 :     914947 :         ensure_values_fetched();
     403                 :            : 
     404         [ +  + ]:     914947 :         if (!value.empty()) {
     405                 :     909100 :             (*values)[slot] = value;
     406                 :            :         } else {
     407                 :            :             // Empty values aren't stored, but replace any existing value by
     408                 :            :             // removing it.
     409                 :       5847 :             values->erase(slot);
     410                 :            :         }
     411                 :     914947 :     }
     412                 :            : 
     413                 :            :     /// Clear all value slots in this document.
     414                 :          3 :     void clear_values() {
     415         [ +  + ]:          3 :         if (!values) {
     416         [ -  + ]:          1 :             if (database.get()) {
     417         [ #  # ]:          0 :                 values.reset(new map<Xapian::valueno, string>());
     418                 :            :             } else {
     419                 :            :                 // We didn't come from a database, so there are no unfetched
     420                 :            :                 // values to clear.
     421                 :            :             }
     422                 :            :         } else {
     423                 :          2 :             values->clear();
     424                 :            :         }
     425                 :          3 :     }
     426                 :            : 
     427                 :            :     /// Count the value slots used in this document.
     428                 :     134067 :     Xapian::valueno values_count() const {
     429                 :     134067 :         ensure_values_fetched();
     430                 :     134067 :         return values->size();
     431                 :            :     }
     432                 :            : 
     433                 :            :     Xapian::ValueIterator values_begin() const;
     434                 :            : 
     435                 :            :     /// Return a string describing this object.
     436                 :            :     std::string get_description() const;
     437                 :            : };
     438                 :            : 
     439                 :            : }
     440                 :            : 
     441                 :            : #endif // XAPIAN_INCLUDED_DOCUMENTINTERNAL_H

Generated by: LCOV version 1.11