LCOV - code coverage report
Current view: top level - matcher - multiandpostlist.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 42 43 97.7 %
Date: 2019-06-30 05:20:33 Functions: 8 8 100.0 %
Branches: 18 28 64.3 %

           Branch data     Line data    Source code
       1                 :            : /** @file multiandpostlist.h
       2                 :            :  * @brief N-way AND postlist
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2009,2011,2017 Olly Betts
       5                 :            :  * Copyright (C) 2009 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #ifndef XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
      23                 :            : #define XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
      24                 :            : 
      25                 :            : #include "omassert.h"
      26                 :            : #include "api/postlist.h"
      27                 :            : #include "postlisttree.h"
      28                 :            : 
      29                 :            : #include <algorithm>
      30                 :            : 
      31                 :            : /// N-way AND postlist.
      32                 :            : class MultiAndPostList : public PostList {
      33                 :            :     /** Comparison functor which orders PostList* by ascending
      34                 :            :      *  get_termfreq_est(). */
      35                 :            :     struct ComparePostListTermFreqAscending {
      36                 :            :         /// Order by ascending get_termfreq_est().
      37                 :       2562 :         bool operator()(const PostList *a, const PostList *b) const {
      38                 :       2562 :             return a->get_termfreq_est() < b->get_termfreq_est();
      39                 :            :         }
      40                 :            :     };
      41                 :            : 
      42                 :            :     /// Don't allow assignment.
      43                 :            :     void operator=(const MultiAndPostList &);
      44                 :            : 
      45                 :            :     /// Don't allow copying.
      46                 :            :     MultiAndPostList(const MultiAndPostList &);
      47                 :            : 
      48                 :            :     /// The current docid, or zero if we haven't started or are at_end.
      49                 :            :     Xapian::docid did;
      50                 :            : 
      51                 :            :     /// The number of sub-postlists.
      52                 :            :     size_t n_kids;
      53                 :            : 
      54                 :            :     /// Array of pointers to sub-postlists.
      55                 :            :     PostList ** plist;
      56                 :            : 
      57                 :            :     /// Array of maximum weights for the sub-postlists.
      58                 :            :     double * max_wt;
      59                 :            : 
      60                 :            :     /// Total maximum weight (== sum of max_wt values).
      61                 :            :     double max_total;
      62                 :            : 
      63                 :            :     /// The number of documents in the database.
      64                 :            :     Xapian::doccount db_size;
      65                 :            : 
      66                 :            :     /// Pointer to the matcher object, so we can report pruning.
      67                 :            :     PostListTree *matcher;
      68                 :            : 
      69                 :            :     /// Calculate the new minimum weight for sub-postlist n.
      70                 :      17106 :     double new_min(double w_min, size_t n) {
      71                 :      17106 :         return w_min - (max_total - max_wt[n]);
      72                 :            :     }
      73                 :            : 
      74                 :            :     /// Call next on a sub-postlist n, and handle any pruning.
      75                 :       6683 :     void next_helper(size_t n, double w_min) {
      76                 :       6683 :         PostList * res = plist[n]->next(new_min(w_min, n));
      77         [ +  + ]:       6683 :         if (res) {
      78         [ +  - ]:         10 :             delete plist[n];
      79                 :         10 :             plist[n] = res;
      80                 :         10 :             matcher->force_recalc();
      81                 :            :         }
      82                 :       6683 :     }
      83                 :            : 
      84                 :            :     /// Call skip_to on a sub-postlist n, and handle any pruning.
      85                 :       1460 :     void skip_to_helper(size_t n, Xapian::docid did_min, double w_min) {
      86                 :       1460 :         PostList * res = plist[n]->skip_to(did_min, new_min(w_min, n));
      87         [ +  + ]:       1460 :         if (res) {
      88         [ +  - ]:         15 :             delete plist[n];
      89                 :         15 :             plist[n] = res;
      90                 :         15 :             matcher->force_recalc();
      91                 :            :         }
      92                 :       1460 :     }
      93                 :            : 
      94                 :            :     /// Call check on a sub-postlist n, and handle any pruning.
      95                 :       8963 :     void check_helper(size_t n, Xapian::docid did_min, double w_min,
      96                 :            :                       bool &valid) {
      97                 :       8963 :         PostList * res = plist[n]->check(did_min, new_min(w_min, n), valid);
      98         [ +  + ]:       8963 :         if (res) {
      99         [ +  - ]:          6 :             delete plist[n];
     100                 :          6 :             plist[n] = res;
     101                 :          6 :             matcher->force_recalc();
     102                 :            :         }
     103                 :       8963 :     }
     104                 :            : 
     105                 :            :     /** Allocate plist and max_wt arrays of @a n_kids each.
     106                 :            :      *
     107                 :            :      *  @exception  std::bad_alloc.
     108                 :            :      */
     109                 :            :     void allocate_plist_and_max_wt();
     110                 :            : 
     111                 :            :     /// Advance the sublists to the next match.
     112                 :            :     PostList * find_next_match(double w_min);
     113                 :            : 
     114                 :            :   public:
     115                 :            :     /** Construct from 2 random-access iterators to a container of PostList*,
     116                 :            :      *  a pointer to the matcher, and the document collection size.
     117                 :            :      */
     118                 :            :     template<class RandomItor>
     119                 :       1306 :     MultiAndPostList(RandomItor pl_begin, RandomItor pl_end,
     120                 :            :                      PostListTree * matcher_, Xapian::doccount db_size_)
     121                 :       1256 :         : did(0), n_kids(pl_end - pl_begin), plist(NULL), max_wt(NULL),
     122                 :       1306 :           max_total(0), db_size(db_size_), matcher(matcher_)
     123                 :            :     {
     124   [ +  -  +  - ]:       1306 :         allocate_plist_and_max_wt();
     125                 :            : 
     126                 :            :         // Copy the postlists in ascending termfreq order, since it will
     127                 :            :         // be more efficient to look at the shorter lists first, and skip
     128                 :            :         // the longer lists based on those.
     129 [ +  - ][ +  - ]:       1306 :         std::partial_sort_copy(pl_begin, pl_end, plist, plist + n_kids,
     130                 :          0 :                                ComparePostListTermFreqAscending());
     131                 :       1306 :     }
     132                 :            : 
     133                 :            :     /** Construct as the decay product of an OrPostList or AndMaybePostList. */
     134                 :        183 :     MultiAndPostList(PostList *l, PostList *r,
     135                 :            :                      double lmax, double rmax,
     136                 :            :                      PostListTree * matcher_, Xapian::doccount db_size_)
     137                 :            :         : did(0), n_kids(2), plist(NULL), max_wt(NULL),
     138                 :        183 :           max_total(lmax + rmax), db_size(db_size_), matcher(matcher_)
     139                 :            :     {
     140                 :            :         // Even if we're the decay product of an OrPostList, we may want to
     141                 :            :         // swap here, as the subqueries may also have decayed and so their
     142                 :            :         // estimated termfreqs may have changed.
     143 [ +  - ][ +  - ]:        183 :         if (l->get_termfreq_est() < r->get_termfreq_est()) {
                 [ +  + ]
     144                 :          9 :             std::swap(l, r);
     145                 :          9 :             std::swap(lmax, rmax);
     146                 :            :         }
     147         [ +  - ]:        183 :         allocate_plist_and_max_wt();
     148                 :            :         // Put the least frequent postlist first.
     149                 :        183 :         plist[0] = r;
     150                 :        183 :         plist[1] = l;
     151                 :        183 :         max_wt[0] = rmax;
     152                 :        183 :         max_wt[1] = lmax;
     153                 :        183 :     }
     154                 :            : 
     155                 :            :     ~MultiAndPostList();
     156                 :            : 
     157                 :            :     Xapian::doccount get_termfreq_min() const;
     158                 :            : 
     159                 :            :     Xapian::doccount get_termfreq_max() const;
     160                 :            : 
     161                 :            :     Xapian::doccount get_termfreq_est() const;
     162                 :            : 
     163                 :            :     TermFreqs get_termfreq_est_using_stats(
     164                 :            :         const Xapian::Weight::Internal & stats) const;
     165                 :            : 
     166                 :            :     Xapian::docid get_docid() const;
     167                 :            : 
     168                 :            :     double get_weight(Xapian::termcount doclen,
     169                 :            :                       Xapian::termcount unique_terms) const;
     170                 :            : 
     171                 :            :     bool at_end() const;
     172                 :            : 
     173                 :            :     double recalc_maxweight();
     174                 :            : 
     175                 :            :     PostList* next(double w_min);
     176                 :            : 
     177                 :            :     PostList* skip_to(Xapian::docid, double w_min);
     178                 :            : 
     179                 :            :     std::string get_description() const;
     180                 :            : 
     181                 :            :     /** get_wdf() for MultiAndPostlists returns the sum of the wdfs of the
     182                 :            :      *  sub postlists.
     183                 :            :      *
     184                 :            :      *  The wdf isn't really meaningful in many situations, but if the lists
     185                 :            :      *  are being combined as a synonym we want the sum of the wdfs, so we do
     186                 :            :      *  that in general.
     187                 :            :      */
     188                 :            :     Xapian::termcount get_wdf() const;
     189                 :            : 
     190                 :            :     Xapian::termcount count_matching_subqs() const;
     191                 :            : 
     192                 :            :     void gather_position_lists(OrPositionList* orposlist);
     193                 :            : };
     194                 :            : 
     195                 :            : #endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H

Generated by: LCOV version 1.11