LCOV - code coverage report
Current view: top level - include/xapian - postingsource.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 7822d31adece Lines: 24 24 100.0 %
Date: 2019-05-23 11:15:29 Functions: 15 15 100.0 %
Branches: 6 12 50.0 %

           Branch data     Line data    Source code
       1                 :            : /** @file postingsource.h
       2                 :            :  *  @brief External sources of posting information
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
       5                 :            :  * Copyright (C) 2008,2009 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or modify
       8                 :            :  * it under the terms of the GNU General Public License as published by
       9                 :            :  * the Free Software Foundation; either version 2 of the License, or
      10                 :            :  * (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H
      23                 :            : #define XAPIAN_INCLUDED_POSTINGSOURCE_H
      24                 :            : 
      25                 :            : #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
      26                 :            : # error "Never use <xapian/postingsource.h> directly; include <xapian.h> instead."
      27                 :            : #endif
      28                 :            : 
      29                 :            : #include <xapian/attributes.h>
      30                 :            : #include <xapian/database.h>
      31                 :            : #include <xapian/intrusive_ptr.h>
      32                 :            : #include <xapian/postingiterator.h>
      33                 :            : #include <xapian/types.h>
      34                 :            : #include <xapian/valueiterator.h>
      35                 :            : #include <xapian/visibility.h>
      36                 :            : 
      37                 :            : #include <string>
      38                 :            : #include <map>
      39                 :            : 
      40                 :            : namespace Xapian {
      41                 :            : 
      42                 :            : class Registry;
      43                 :            : 
      44                 :            : /** Base class which provides an "external" source of postings.
      45                 :            :  */
      46                 :            : class XAPIAN_VISIBILITY_DEFAULT PostingSource
      47                 :            :     : public Xapian::Internal::opt_intrusive_base {
      48                 :            :     /// Don't allow assignment.
      49                 :            :     void operator=(const PostingSource &) = delete;
      50                 :            : 
      51                 :            :     /// Don't allow copying.
      52                 :            :     PostingSource(const PostingSource &) = delete;
      53                 :            : 
      54                 :            :     /// The current upper bound on what get_weight() can return.
      55                 :            :     double max_weight_;
      56                 :            : 
      57                 :            :     /** The object to inform of maxweight changes.
      58                 :            :      *
      59                 :            :      *  We store this as a (void*) to avoid needing to declare an internal
      60                 :            :      *  type in an external header.  It's actually (PostListTree *).
      61                 :            :      */
      62                 :            :     void * matcher_;
      63                 :            : 
      64                 :            :   public:
      65                 :            :     /// Allow subclasses to be instantiated.
      66                 :       9260 :     XAPIAN_NOTHROW(PostingSource())
      67                 :       9260 :         : max_weight_(0), matcher_(NULL) { }
      68                 :            : 
      69                 :            :     /** @private @internal Set the object to inform of maxweight changes.
      70                 :            :      *
      71                 :            :      *  This method is for internal use only - it would be private except that
      72                 :            :      *  would force us to forward declare an internal class in an external API
      73                 :            :      *  header just to make it a friend.
      74                 :            :      */
      75                 :            :     XAPIAN_VISIBILITY_INTERNAL
      76                 :        666 :     void register_matcher_(void * matcher) { matcher_ = matcher; }
      77                 :            : 
      78                 :            :     // Destructor.
      79                 :            :     virtual ~PostingSource();
      80                 :            : 
      81                 :            :     /** A lower bound on the number of documents this object can return.
      82                 :            :      *
      83                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
      84                 :            :      *  for the first time.
      85                 :            :      */
      86                 :            :     virtual Xapian::doccount get_termfreq_min() const = 0;
      87                 :            : 
      88                 :            :     /** An estimate of the number of documents this object can return.
      89                 :            :      *
      90                 :            :      *  It must always be true that:
      91                 :            :      *
      92                 :            :      *  get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
      93                 :            :      *
      94                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
      95                 :            :      *  for the first time.
      96                 :            :      */
      97                 :            :     virtual Xapian::doccount get_termfreq_est() const = 0;
      98                 :            : 
      99                 :            :     /** An upper bound on the number of documents this object can return.
     100                 :            :      *
     101                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
     102                 :            :      *  for the first time.
     103                 :            :      */
     104                 :            :     virtual Xapian::doccount get_termfreq_max() const = 0;
     105                 :            : 
     106                 :            :     /** Specify an upper bound on what get_weight() will return from now on.
     107                 :            :      *
     108                 :            :      *  This upper bound is used by the matcher to perform various
     109                 :            :      *  optimisations, so if you can return a good bound, then matches
     110                 :            :      *  will generally run faster.
     111                 :            :      *
     112                 :            :      *  This method should be called after calling reset(), and may be called
     113                 :            :      *  during iteration if the upper bound drops.  It is probably only useful
     114                 :            :      *  to call from subclasses (it was actually a "protected" method prior to
     115                 :            :      *  Xapian 1.3.4, but that makes it tricky to wrap for other languages).
     116                 :            :      *
     117                 :            :      *  It is valid for the posting source to have returned a higher value from
     118                 :            :      *  get_weight() earlier in the iteration, but the posting source must not
     119                 :            :      *  return a higher value from get_weight() than the currently set upper
     120                 :            :      *  bound, and the upper bound must not be increased (until reset() has
     121                 :            :      *  been called).
     122                 :            :      *
     123                 :            :      *  If you don't call this method, the upper bound will default to 0, for
     124                 :            :      *  convenience when implementing "weight-less" PostingSource subclasses.
     125                 :            :      *
     126                 :            :      *  @param max_weight       The upper bound to set.
     127                 :            :      */
     128                 :            :     void set_maxweight(double max_weight);
     129                 :            : 
     130                 :            :     /// Return the currently set upper bound on what get_weight() can return.
     131                 :       8982 :     double XAPIAN_NOTHROW(get_maxweight() const) { return max_weight_; }
     132                 :            : 
     133                 :            :     /** Return the weight contribution for the current document.
     134                 :            :      *
     135                 :            :      *  This default implementation always returns 0, for convenience when
     136                 :            :      *  implementing "weight-less" PostingSource subclasses.
     137                 :            :      *
     138                 :            :      *  This method may assume that it will only be called when there is a
     139                 :            :      *  "current document".  In detail: Xapian will always call reset() on a
     140                 :            :      *  PostingSource before calling this for the first time.  It will also
     141                 :            :      *  only call this if the PostingSource reports that it is pointing to a
     142                 :            :      *  valid document (ie, it will not call it before calling at least one of
     143                 :            :      *  next(), skip_to() or check(), and will ensure that the PostingSource is
     144                 :            :      *  not at the end by calling at_end()).
     145                 :            :      */
     146                 :            :     virtual double get_weight() const;
     147                 :            : 
     148                 :            :     /** Return the current docid.
     149                 :            :      *
     150                 :            :      *  This method may assume that it will only be called when there is a
     151                 :            :      *  "current document".  See @a get_weight() for details.
     152                 :            :      *
     153                 :            :      *  Note: in the case of a multi-database search, the returned docid should
     154                 :            :      *  be in the single subdatabase relevant to this posting source.  See the
     155                 :            :      *  @a reset() method for details.
     156                 :            :      */
     157                 :            :     virtual Xapian::docid get_docid() const = 0;
     158                 :            : 
     159                 :            :     /** Advance the current position to the next matching document.
     160                 :            :      *
     161                 :            :      *  The PostingSource starts before the first entry in the list, so next(),
     162                 :            :      *  skip_to() or check() must be called before any methods which need the
     163                 :            :      *  context of the current position.
     164                 :            :      *
     165                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
     166                 :            :      *  for the first time.
     167                 :            :      *
     168                 :            :      *  @param min_wt   The minimum weight contribution that is needed (this is
     169                 :            :      *                  just a hint which subclasses may ignore).
     170                 :            :      */
     171                 :            :     virtual void next(double min_wt) = 0;
     172                 :            : 
     173                 :            :     /** Advance to the specified docid.
     174                 :            :      *
     175                 :            :      *  If the specified docid isn't in the list, position ourselves on the
     176                 :            :      *  first document after it (or at_end() if no greater docids are present).
     177                 :            :      *
     178                 :            :      *  If the current position is already the specified docid, this method will
     179                 :            :      *  leave the position unmodified.
     180                 :            :      *
     181                 :            :      *  If the specified docid is earlier than the current position, the
     182                 :            :      *  behaviour is unspecified.  A sensible behaviour would be to leave the
     183                 :            :      *  current position unmodified, but it is also reasonable to move to the
     184                 :            :      *  specified docid.
     185                 :            :      *
     186                 :            :      *  The default implementation calls next() repeatedly, which works but
     187                 :            :      *  skip_to() can often be implemented much more efficiently.
     188                 :            :      *
     189                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
     190                 :            :      *  for the first time.
     191                 :            :      *
     192                 :            :      *  Note: in the case of a multi-database search, the docid specified is
     193                 :            :      *  the docid in the single subdatabase relevant to this posting source.
     194                 :            :      *  See the @a reset() method for details.
     195                 :            :      *
     196                 :            :      *  @param did      The document id to advance to.
     197                 :            :      *  @param min_wt   The minimum weight contribution that is needed (this is
     198                 :            :      *                  just a hint which subclasses may ignore).
     199                 :            :      */
     200                 :            :     virtual void skip_to(Xapian::docid did, double min_wt);
     201                 :            : 
     202                 :            :     /** Check if the specified docid occurs.
     203                 :            :      *
     204                 :            :      *  The caller is required to ensure that the specified document id @a did
     205                 :            :      *  actually exists in the database.  If it does, it must move to that
     206                 :            :      *  document id, and return true.  If it does not, it may either:
     207                 :            :      *
     208                 :            :      *   - return true, having moved to a definite position (including
     209                 :            :      *   "at_end"), which must be the same position as skip_to() would have
     210                 :            :      *   moved to.
     211                 :            :      *
     212                 :            :      *  or
     213                 :            :      *
     214                 :            :      *   - return false, having moved to an "indeterminate" position, such that
     215                 :            :      *   a subsequent call to next() or skip_to() will move to the next
     216                 :            :      *   matching position after @a did.
     217                 :            :      *
     218                 :            :      *  Generally, this method should act like skip_to() and return true if
     219                 :            :      *  that can be done at little extra cost.
     220                 :            :      *
     221                 :            :      *  Otherwise it should simply check if a particular docid is present,
     222                 :            :      *  returning true if it is, and false if it isn't.
     223                 :            :      *
     224                 :            :      *  The default implementation calls skip_to() and always returns true.
     225                 :            :      *
     226                 :            :      *  Xapian will always call reset() on a PostingSource before calling this
     227                 :            :      *  for the first time.
     228                 :            :      *
     229                 :            :      *  Note: in the case of a multi-database search, the docid specified is
     230                 :            :      *  the docid in the single subdatabase relevant to this posting source.
     231                 :            :      *  See the @a reset() method for details.
     232                 :            :      *
     233                 :            :      *  @param did      The document id to check.
     234                 :            :      *  @param min_wt   The minimum weight contribution that is needed (this is
     235                 :            :      *                  just a hint which subclasses may ignore).
     236                 :            :      */
     237                 :            :     virtual bool check(Xapian::docid did, double min_wt);
     238                 :            : 
     239                 :            :     /** Return true if the current position is past the last entry in this list.
     240                 :            :      *
     241                 :            :      *  At least one of @a next(), @a skip_to() or @a check() will be called
     242                 :            :      *  before this method is first called.
     243                 :            :      */
     244                 :            :     virtual bool at_end() const = 0;
     245                 :            : 
     246                 :            :     /** Clone the posting source.
     247                 :            :      *
     248                 :            :      *  The clone should inherit the configuration of the parent, but need not
     249                 :            :      *  inherit the state.  ie, the clone does not need to be in the same
     250                 :            :      *  iteration position as the original: the matcher will always call
     251                 :            :      *  reset() on the clone before attempting to move the iterator, or read
     252                 :            :      *  the information about the current position of the iterator.
     253                 :            :      *
     254                 :            :      *  This may return NULL to indicate that cloning is not supported.  In
     255                 :            :      *  this case, the PostingSource may only be used with a single-database
     256                 :            :      *  search.
     257                 :            :      *
     258                 :            :      *  The default implementation returns NULL.
     259                 :            :      *
     260                 :            :      *  Note that the returned object will be deallocated by Xapian after use
     261                 :            :      *  with "delete".  If you want to handle the deletion in a special way
     262                 :            :      *  (for example when wrapping the Xapian API for use from another
     263                 :            :      *  language) then you can define a static <code>operator delete</code>
     264                 :            :      *  method in your subclass as shown here:
     265                 :            :      *  https://trac.xapian.org/ticket/554#comment:1
     266                 :            :      */
     267                 :            :     virtual PostingSource * clone() const;
     268                 :            : 
     269                 :            :     /** Name of the posting source class.
     270                 :            :      *
     271                 :            :      *  This is used when serialising and unserialising posting sources; for
     272                 :            :      *  example, for performing remote searches.
     273                 :            :      *
     274                 :            :      *  If the subclass is in a C++ namespace, the namespace should be included
     275                 :            :      *  in the name, using "::" as a separator.  For example, for a
     276                 :            :      *  PostingSource subclass called "FooPostingSource" in the "Xapian"
     277                 :            :      *  namespace the result of this call should be "Xapian::FooPostingSource".
     278                 :            :      *
     279                 :            :      *  This should only be implemented if serialise() and unserialise() are
     280                 :            :      *  also implemented.  The default implementation returns an empty string.
     281                 :            :      *
     282                 :            :      *  If this returns an empty string, Xapian will assume that serialise()
     283                 :            :      *  and unserialise() are not implemented.
     284                 :            :      */
     285                 :            :     virtual std::string name() const;
     286                 :            : 
     287                 :            :     /** Serialise object parameters into a string.
     288                 :            :      *
     289                 :            :      *  The serialised parameters should represent the configuration of the
     290                 :            :      *  posting source, but need not (indeed, should not) represent the current
     291                 :            :      *  iteration state.
     292                 :            :      *
     293                 :            :      *  If you don't want to support the remote backend, you can use the
     294                 :            :      *  default implementation which simply throws Xapian::UnimplementedError.
     295                 :            :      */
     296                 :            :     virtual std::string serialise() const;
     297                 :            : 
     298                 :            :     /** Create object given string serialisation returned by serialise().
     299                 :            :      *
     300                 :            :      *  Note that the returned object will be deallocated by Xapian after use
     301                 :            :      *  with "delete".  If you want to handle the deletion in a special way
     302                 :            :      *  (for example when wrapping the Xapian API for use from another
     303                 :            :      *  language) then you can define a static <code>operator delete</code>
     304                 :            :      *  method in your subclass as shown here:
     305                 :            :      *  https://trac.xapian.org/ticket/554#comment:1
     306                 :            :      *
     307                 :            :      *  If you don't want to support the remote backend, you can use the
     308                 :            :      *  default implementation which simply throws Xapian::UnimplementedError.
     309                 :            :      *
     310                 :            :      *  @param serialised A serialised instance of this PostingSource subclass.
     311                 :            :      */
     312                 :            :     virtual PostingSource * unserialise(const std::string &serialised) const;
     313                 :            : 
     314                 :            :     /** Create object given string serialisation returned by serialise().
     315                 :            :      *
     316                 :            :      *  Note that the returned object will be deallocated by Xapian after use
     317                 :            :      *  with "delete".  If you want to handle the deletion in a special way
     318                 :            :      *  (for example when wrapping the Xapian API for use from another
     319                 :            :      *  language) then you can define a static <code>operator delete</code>
     320                 :            :      *  method in your subclass as shown here:
     321                 :            :      *  https://trac.xapian.org/ticket/554#comment:1
     322                 :            :      *
     323                 :            :      *  This method is supplied with a Registry object, which can be used when
     324                 :            :      *  unserialising objects contained within the posting source.  The default
     325                 :            :      *  implementation simply calls unserialise() which doesn't take the
     326                 :            :      *  Registry object, so you do not need to implement this method unless you
     327                 :            :      *  want to take advantage of the Registry object when unserialising.
     328                 :            :      *
     329                 :            :      *  @param serialised A serialised instance of this PostingSource subclass.
     330                 :            :      *  @param registry   The Xapian::Registry object to use.
     331                 :            :      */
     332                 :            :     virtual PostingSource * unserialise_with_registry(const std::string &serialised,
     333                 :            :                                       const Registry & registry) const;
     334                 :            : 
     335                 :            :     /** Set this PostingSource to the start of the list of postings.
     336                 :            :      *
     337                 :            :      *  This is called automatically by the matcher prior to each query being
     338                 :            :      *  processed.
     339                 :            :      *
     340                 :            :      *  If a PostingSource is used for multiple searches, @a reset() will
     341                 :            :      *  therefore be called multiple times, and must handle this by using the
     342                 :            :      *  database passed in the most recent call.
     343                 :            :      *
     344                 :            :      *  @param db The database which the PostingSource should iterate through.
     345                 :            :      *  @param shard_index  The 0-based index indicating which shard in a
     346                 :            :      *                      multi-database db is.  This can be useful if you
     347                 :            :      *                      have an external source of postings corresponding
     348                 :            :      *                      to each shard.
     349                 :            :      *
     350                 :            :      *  Note: in the case of a multi-database search, a separate PostingSource
     351                 :            :      *  will be used for each database (the separate PostingSources will be
     352                 :            :      *  obtained using @a clone()), and each PostingSource will be passed one of
     353                 :            :      *  the sub-databases as the @a db parameter here.  The @a db parameter
     354                 :            :      *  will therefore always refer to a single database.  All docids passed
     355                 :            :      *  to, or returned from, the PostingSource refer to docids in that single
     356                 :            :      *  database, rather than in the multi-database.
     357                 :            :      *
     358                 :            :      *  A default implementation is provided which calls the older init()
     359                 :            :      *  method to allow existing subclasses to continue to work, but the
     360                 :            :      *  default implementation of init() throws Xapian::InvalidOperationError
     361                 :            :      *  so you must override either this method or init().  In new code,
     362                 :            :      *  override this method in preference.
     363                 :            :      *
     364                 :            :      *  @since Added in Xapian 1.5.0.
     365                 :            :      */
     366                 :            :     virtual void reset(const Database& db, Xapian::doccount shard_index);
     367                 :            : 
     368                 :            :     /** Older method which did the same job as reset().
     369                 :            :      *
     370                 :            :      *  Prior to 1.5.0, instead of reset() there was a method called init()
     371                 :            :      *  taking one parameter.  The default implementation of reset() calls
     372                 :            :      *  init() to allow existing subclasses to continue to work.
     373                 :            :      *
     374                 :            :      *  A default implementation of init() is provided so that new subclasses
     375                 :            :      *  can just override reset() (the default implementation should not
     376                 :            :      *  actually get called, and will throw Xapian::InvalidOperationError if
     377                 :            :      *  it is).
     378                 :            :      */
     379                 :            :     virtual void init(const Database& db);
     380                 :            : 
     381                 :            :     /** Return a string describing this object.
     382                 :            :      *
     383                 :            :      *  This default implementation returns a generic answer.  This default
     384                 :            :      *  it provided to avoid forcing those deriving their own PostingSource
     385                 :            :      *  subclass from having to implement this (they may not care what
     386                 :            :      *  get_description() gives for their subclass).
     387                 :            :      */
     388                 :            :     virtual std::string get_description() const;
     389                 :            : 
     390                 :            :     /** Start reference counting this object.
     391                 :            :      *
     392                 :            :      *  You can hand ownership of a dynamically allocated PostingSource
     393                 :            :      *  object to Xapian by calling release() and then passing the object to a
     394                 :            :      *  Xapian method.  Xapian will arrange to delete the object once it is no
     395                 :            :      *  longer required.
     396                 :            :      */
     397                 :        656 :     PostingSource * release() {
     398                 :        656 :         opt_intrusive_base::release();
     399                 :        656 :         return this;
     400                 :            :     }
     401                 :            : 
     402                 :            :     /** Start reference counting this object.
     403                 :            :      *
     404                 :            :      *  You can hand ownership of a dynamically allocated PostingSource
     405                 :            :      *  object to Xapian by calling release() and then passing the object to a
     406                 :            :      *  Xapian method.  Xapian will arrange to delete the object once it is no
     407                 :            :      *  longer required.
     408                 :            :      */
     409                 :            :     const PostingSource * release() const {
     410                 :            :         opt_intrusive_base::release();
     411                 :            :         return this;
     412                 :            :     }
     413                 :            : };
     414                 :            : 
     415                 :            : 
     416                 :            : /** A posting source which generates weights from a value slot.
     417                 :            :  *
     418                 :            :  *  This is a base class for classes which generate weights using values stored
     419                 :            :  *  in the specified slot. For example, ValueWeightPostingSource uses
     420                 :            :  *  sortable_unserialise to convert values directly to weights.
     421                 :            :  *
     422                 :            :  *  The upper bound on the weight returned is set to DBL_MAX.  Subclasses
     423                 :            :  *  should call set_maxweight() in their init() methods after calling
     424                 :            :  *  ValuePostingSource::init() if they know a tighter bound on the weight.
     425                 :            :  */
     426         [ -  + ]:      12480 : class XAPIAN_VISIBILITY_DEFAULT ValuePostingSource : public PostingSource {
     427                 :            :     Xapian::Database db;
     428                 :            : 
     429                 :            :     Xapian::valueno slot;
     430                 :            : 
     431                 :            :     Xapian::ValueIterator value_it;
     432                 :            : 
     433                 :            :     bool started;
     434                 :            : 
     435                 :            :     Xapian::doccount termfreq_min;
     436                 :            : 
     437                 :            :     Xapian::doccount termfreq_est;
     438                 :            : 
     439                 :            :     Xapian::doccount termfreq_max;
     440                 :            : 
     441                 :            :   public:
     442                 :            :     /** Construct a ValuePostingSource.
     443                 :            :      *
     444                 :            :      *  @param slot_ The value slot to read values from.
     445                 :            :      */
     446                 :       6240 :     explicit XAPIAN_NOTHROW(ValuePostingSource(Xapian::valueno slot_))
     447                 :       6240 :         : slot(slot_) {}
     448                 :            : 
     449                 :            :     Xapian::doccount get_termfreq_min() const;
     450                 :            :     Xapian::doccount get_termfreq_est() const;
     451                 :            :     Xapian::doccount get_termfreq_max() const;
     452                 :            : 
     453                 :            :     void next(double min_wt);
     454                 :            :     void skip_to(Xapian::docid min_docid, double min_wt);
     455                 :            :     bool check(Xapian::docid min_docid, double min_wt);
     456                 :            : 
     457                 :            :     bool at_end() const;
     458                 :            : 
     459                 :            :     Xapian::docid get_docid() const;
     460                 :            : 
     461                 :            :     void init(const Database & db_);
     462                 :            : 
     463                 :            :     /** The database we're reading values from.
     464                 :            :      *
     465                 :            :      *  Added in 1.2.23 and 1.3.5.
     466                 :            :      */
     467                 :        524 :     Xapian::Database get_database() const { return db; }
     468                 :            : 
     469                 :            :     /** The slot we're reading values from.
     470                 :            :      *
     471                 :            :      *  Added in 1.2.23 and 1.3.5.
     472                 :            :      */
     473                 :        888 :     Xapian::valueno get_slot() const { return slot; }
     474                 :            : 
     475                 :            :     /** Read current value.
     476                 :            :      *
     477                 :            :      *  Added in 1.2.23 and 1.3.5.
     478                 :            :      */
     479                 :       2036 :     std::string get_value() const { return *value_it; }
     480                 :            : 
     481                 :            :     /** End the iteration.
     482                 :            :      *
     483                 :            :      *  Calls to at_end() will return true after calling this method.
     484                 :            :      *
     485                 :            :      *  Added in 1.2.23 and 1.3.5.
     486                 :            :      */
     487                 :         26 :     void done() {
     488         [ +  - ]:         26 :         value_it = db.valuestream_end(slot);
     489                 :         26 :         started = true;
     490                 :         26 :     }
     491                 :            : 
     492                 :            :     /** Flag indicating if we've started (true if we have).
     493                 :            :      *
     494                 :            :      *  Added in 1.2.23 and 1.3.5.
     495                 :            :      */
     496                 :            :     bool get_started() const { return started; }
     497                 :            : 
     498                 :            :     /** Set a lower bound on the term frequency.
     499                 :            :      *
     500                 :            :      *  Subclasses should set this if they are overriding the next(), skip_to()
     501                 :            :      *  or check() methods to return fewer documents.
     502                 :            :      *
     503                 :            :      *  Added in 1.2.23 and 1.3.5.
     504                 :            :      */
     505                 :         16 :     void set_termfreq_min(Xapian::doccount termfreq_min_) {
     506                 :         16 :         termfreq_min = termfreq_min_;
     507                 :         16 :     }
     508                 :            : 
     509                 :            :     /** An estimate of the term frequency.
     510                 :            :      *
     511                 :            :      *  Subclasses should set this if they are overriding the next(), skip_to()
     512                 :            :      *  or check() methods.
     513                 :            :      *
     514                 :            :      *  Added in 1.2.23 and 1.3.5.
     515                 :            :      */
     516                 :            :     void set_termfreq_est(Xapian::doccount termfreq_est_) {
     517                 :            :         termfreq_est = termfreq_est_;
     518                 :            :     }
     519                 :            : 
     520                 :            :     /** An upper bound on the term frequency.
     521                 :            :      *
     522                 :            :      *  Subclasses should set this if they are overriding the next(), skip_to()
     523                 :            :      *  or check() methods.
     524                 :            :      *
     525                 :            :      *  Added in 1.2.23 and 1.3.5.
     526                 :            :      */
     527                 :            :     void set_termfreq_max(Xapian::doccount termfreq_max_) {
     528                 :            :         termfreq_max = termfreq_max_;
     529                 :            :     }
     530                 :            : 
     531                 :            :     std::string get_description() const;
     532                 :            : };
     533                 :            : 
     534                 :            : 
     535                 :            : /** A posting source which reads weights from a value slot.
     536                 :            :  *
     537                 :            :  *  This returns entries for all documents in the given database which have a
     538                 :            :  *  non empty values in the specified slot.  It returns a weight calculated by
     539                 :            :  *  applying sortable_unserialise to the value stored in the slot (so the
     540                 :            :  *  values stored should probably have been calculated by applying
     541                 :            :  *  sortable_serialise to a floating point number at index time).
     542                 :            :  *
     543                 :            :  *  The upper bound on the weight returned is set using the upper bound on the
     544                 :            :  *  values in the specified slot, or DBL_MAX if value bounds aren't supported
     545                 :            :  *  by the current backend.
     546                 :            :  *
     547                 :            :  *  For efficiency, this posting source doesn't check that the stored values
     548                 :            :  *  are valid in any way, so it will never raise an exception due to invalid
     549                 :            :  *  stored values.  In particular, it doesn't ensure that the unserialised
     550                 :            :  *  values are positive, which is a requirement for weights.  The behaviour if
     551                 :            :  *  the slot contains values which unserialise to negative values is undefined.
     552                 :            :  */
     553         [ -  + ]:       9582 : class XAPIAN_VISIBILITY_DEFAULT ValueWeightPostingSource
     554                 :            :         : public ValuePostingSource {
     555                 :            :   public:
     556                 :            :     /** Construct a ValueWeightPostingSource.
     557                 :            :      *
     558                 :            :      *  @param slot_ The value slot to read values from.
     559                 :            :      */
     560                 :            :     explicit ValueWeightPostingSource(Xapian::valueno slot_);
     561                 :            : 
     562                 :            :     double get_weight() const;
     563                 :            :     ValueWeightPostingSource * clone() const;
     564                 :            :     std::string name() const;
     565                 :            :     std::string serialise() const;
     566                 :            :     ValueWeightPostingSource * unserialise(const std::string &serialised) const;
     567                 :            :     void init(const Database & db_);
     568                 :            : 
     569                 :            :     std::string get_description() const;
     570                 :            : };
     571                 :            : 
     572                 :            : 
     573                 :            : /** Read weights from a value which is known to decrease as docid increases.
     574                 :            :  *
     575                 :            :  *  This posting source can be used, like ValueWeightPostingSource, to add a
     576                 :            :  *  weight contribution to a query based on the values stored in a slot.  The
     577                 :            :  *  values in the slot must be serialised as by @a sortable_serialise().
     578                 :            :  *
     579                 :            :  *  However, this posting source is additionally given a range of document IDs,
     580                 :            :  *  within which the weight is known to be decreasing.  ie, for all documents
     581                 :            :  *  with ids A and B within this range (including the endpoints), where A is
     582                 :            :  *  less than B, the weight of A is less than or equal to the weight of B.
     583                 :            :  *  This can allow the posting source to skip to the end of the range quickly
     584                 :            :  *  if insufficient weight is left in the posting source for a particular
     585                 :            :  *  source.
     586                 :            :  *
     587                 :            :  *  By default, the range is assumed to cover all document IDs.
     588                 :            :  *
     589                 :            :  *  The ordering property can be arranged at index time, or by sorting an
     590                 :            :  *  indexed database to produce a new, sorted, database.
     591                 :            :  */
     592         [ -  + ]:       6450 : class XAPIAN_VISIBILITY_DEFAULT DecreasingValueWeightPostingSource
     593                 :            :         : public Xapian::ValueWeightPostingSource {
     594                 :            :   protected:
     595                 :            :     /** Start of range of docids for which weights are known to be decreasing.
     596                 :            :      *
     597                 :            :      *  0 => first docid.
     598                 :            :      */
     599                 :            :     Xapian::docid range_start;
     600                 :            : 
     601                 :            :     /** End of range of docids for which weights are known to be decreasing.
     602                 :            :      *
     603                 :            :      *  0 => last docid.
     604                 :            :      */
     605                 :            :     Xapian::docid range_end;
     606                 :            : 
     607                 :            :     /// Weight at current position.
     608                 :            :     double curr_weight;
     609                 :            : 
     610                 :            :     /// Flag, set to true if there are docs after the end of the range.
     611                 :            :     bool items_at_end;
     612                 :            : 
     613                 :            :     /// Skip the iterator forward if in the decreasing range, and weight is low.
     614                 :            :     void skip_if_in_range(double min_wt);
     615                 :            : 
     616                 :            :   public:
     617                 :            :     /** Construct a DecreasingValueWeightPostingSource.
     618                 :            :      *
     619                 :            :      *  @param slot_ The value slot to read values from.
     620                 :            :      *  @param range_start_ Start of range of docids for which weights are
     621                 :            :      *                  known to be decreasing (default: first docid)
     622                 :            :      *  @param range_end_ End of range of docids for which weights are
     623                 :            :      *                  known to be decreasing (default: last docid)
     624                 :            :      */
     625                 :            :     DecreasingValueWeightPostingSource(Xapian::valueno slot_,
     626                 :            :                                        Xapian::docid range_start_ = 0,
     627                 :            :                                        Xapian::docid range_end_ = 0);
     628                 :            : 
     629                 :            :     double get_weight() const;
     630                 :            :     DecreasingValueWeightPostingSource * clone() const;
     631                 :            :     std::string name() const;
     632                 :            :     std::string serialise() const;
     633                 :            :     DecreasingValueWeightPostingSource * unserialise(const std::string &serialised) const;
     634                 :            :     void init(const Xapian::Database & db_);
     635                 :            : 
     636                 :            :     void next(double min_wt);
     637                 :            :     void skip_to(Xapian::docid min_docid, double min_wt);
     638                 :            :     bool check(Xapian::docid min_docid, double min_wt);
     639                 :            : 
     640                 :            :     std::string get_description() const;
     641                 :            : };
     642                 :            : 
     643                 :            : 
     644                 :            : /** A posting source which looks up weights in a map using values as the key.
     645                 :            :  *
     646                 :            :  *  This allows will return entries for all documents in the given database
     647                 :            :  *  which have a value in the slot specified.  The values will be mapped to the
     648                 :            :  *  corresponding weight in the weight map. If there is no mapping for a
     649                 :            :  *  particular value, the default weight will be returned (which itself
     650                 :            :  *  defaults to 0.0).
     651                 :            :  */
     652         [ -  + ]:       5996 : class XAPIAN_VISIBILITY_DEFAULT ValueMapPostingSource
     653                 :            :         : public ValuePostingSource {
     654                 :            :     /// The default weight
     655                 :            :     double default_weight;
     656                 :            : 
     657                 :            :     /// The maximum weight in weight_map.
     658                 :            :     double max_weight_in_map;
     659                 :            : 
     660                 :            :     /// The value -> weight map
     661                 :            :     std::map<std::string, double> weight_map;
     662                 :            : 
     663                 :            :   public:
     664                 :            :     /** Construct a ValueMapPostingSource.
     665                 :            :      *
     666                 :            :      *  @param slot_ The value slot to read values from.
     667                 :            :      */
     668                 :            :     explicit ValueMapPostingSource(Xapian::valueno slot_);
     669                 :            : 
     670                 :            :     /** Add a mapping.
     671                 :            :      *
     672                 :            :      *  @param key The key looked up from the value slot.
     673                 :            :      *  @param wt The weight to give this key.
     674                 :            :      */
     675                 :            :     void add_mapping(const std::string &key, double wt);
     676                 :            : 
     677                 :            :     /** Clear all mappings. */
     678                 :            :     void clear_mappings();
     679                 :            : 
     680                 :            :     /** Set a default weight for document values not in the map.
     681                 :            :      *
     682                 :            :      *  @param wt The weight to set as the default.
     683                 :            :      */
     684                 :            :     void set_default_weight(double wt);
     685                 :            : 
     686                 :            :     double get_weight() const;
     687                 :            :     ValueMapPostingSource * clone() const;
     688                 :            :     std::string name() const;
     689                 :            :     std::string serialise() const;
     690                 :            :     ValueMapPostingSource * unserialise(const std::string &serialised) const;
     691                 :            :     void init(const Database & db_);
     692                 :            : 
     693                 :            :     std::string get_description() const;
     694                 :            : };
     695                 :            : 
     696                 :            : 
     697                 :            : /** A posting source which returns a fixed weight for all documents.
     698                 :            :  *
     699                 :            :  *  This returns entries for all documents in the given database, with a fixed
     700                 :            :  *  weight (specified by a parameter to the constructor).
     701                 :            :  */
     702         [ -  + ]:       6018 : class XAPIAN_VISIBILITY_DEFAULT FixedWeightPostingSource : public PostingSource {
     703                 :            :     /// The database we're reading documents from.
     704                 :            :     Xapian::Database db;
     705                 :            : 
     706                 :            :     /// Number of documents in the posting source.
     707                 :            :     Xapian::doccount termfreq;
     708                 :            : 
     709                 :            :     /// Iterator over all documents.
     710                 :            :     Xapian::PostingIterator it;
     711                 :            : 
     712                 :            :     /// Flag indicating if we've started (true if we have).
     713                 :            :     bool started;
     714                 :            : 
     715                 :            :     /// The docid last passed to check() (0 if check() wasn't the last move).
     716                 :            :     Xapian::docid check_docid;
     717                 :            : 
     718                 :            :   public:
     719                 :            :     /** Construct a FixedWeightPostingSource.
     720                 :            :      *
     721                 :            :      *  @param wt The fixed weight to return.
     722                 :            :      */
     723                 :            :     explicit FixedWeightPostingSource(double wt);
     724                 :            : 
     725                 :            :     Xapian::doccount get_termfreq_min() const;
     726                 :            :     Xapian::doccount get_termfreq_est() const;
     727                 :            :     Xapian::doccount get_termfreq_max() const;
     728                 :            : 
     729                 :            :     double get_weight() const;
     730                 :            : 
     731                 :            :     void next(double min_wt);
     732                 :            :     void skip_to(Xapian::docid min_docid, double min_wt);
     733                 :            :     bool check(Xapian::docid min_docid, double min_wt);
     734                 :            : 
     735                 :            :     bool at_end() const;
     736                 :            : 
     737                 :            :     Xapian::docid get_docid() const;
     738                 :            : 
     739                 :            :     FixedWeightPostingSource * clone() const;
     740                 :            :     std::string name() const;
     741                 :            :     std::string serialise() const;
     742                 :            :     FixedWeightPostingSource * unserialise(const std::string &serialised) const;
     743                 :            :     void init(const Database & db_);
     744                 :            : 
     745                 :            :     std::string get_description() const;
     746                 :            : };
     747                 :            : 
     748                 :            : }
     749                 :            : 
     750                 :            : #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H

Generated by: LCOV version 1.11