LCOV - code coverage report
Current view: top level - matcher - phrasepostlist.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 34 52 65.4 %
Date: 2019-06-30 05:20:33 Functions: 5 8 62.5 %
Branches: 17 44 38.6 %

           Branch data     Line data    Source code
       1                 :            : /** @file phrasepostlist.cc
       2                 :            :  * @brief Return docs containing terms forming a particular phrase.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2006,2007,2009,2010,2011,2014,2015,2017 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or modify
       7                 :            :  * it under the terms of the GNU General Public License as published by
       8                 :            :  * the Free Software Foundation; either version 2 of the License, or
       9                 :            :  * (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include "phrasepostlist.h"
      24                 :            : 
      25                 :            : #include "debuglog.h"
      26                 :            : #include "backends/positionlist.h"
      27                 :            : #include "omassert.h"
      28                 :            : #include "str.h"
      29                 :            : 
      30                 :            : #include <algorithm>
      31                 :            : #include <vector>
      32                 :            : 
      33                 :            : using namespace std;
      34                 :            : 
      35                 :        160 : PhrasePostList::PhrasePostList(PostList *source_,
      36                 :            :                                Xapian::termpos window_,
      37                 :            :                                const vector<PostList*>::const_iterator &terms_begin,
      38                 :            :                                const vector<PostList*>::const_iterator &terms_end,
      39                 :            :                                PostListTree* pltree_)
      40                 :            :     : SelectPostList(source_, pltree_),
      41                 :            :       window(window_),
      42         [ +  - ]:        160 :       terms(terms_begin, terms_end)
      43                 :            : {
      44                 :        160 :     size_t n = terms.size();
      45                 :            :     Assert(n > 1);
      46 [ +  - ][ +  - ]:        160 :     poslists = new PositionList*[n];
      47                 :        160 : }
      48                 :            : 
      49                 :        480 : PhrasePostList::~PhrasePostList()
      50                 :            : {
      51         [ +  - ]:        160 :     delete [] poslists;
      52         [ -  + ]:        320 : }
      53                 :            : 
      54                 :            : void
      55                 :       1491 : PhrasePostList::start_position_list(unsigned i)
      56                 :            : {
      57                 :       1491 :     poslists[i] = terms[i]->read_position_list();
      58                 :       1491 : }
      59                 :            : 
      60                 :            : bool
      61                 :        605 : PhrasePostList::test_doc()
      62                 :            : {
      63                 :            :     LOGCALL(MATCH, bool, "PhrasePostList::test_doc", NO_ARGS);
      64                 :            : 
      65                 :        605 :     start_position_list(0);
      66         [ +  + ]:        605 :     if (!poslists[0]->next())
      67                 :          6 :         RETURN(false);
      68                 :            : 
      69                 :        599 :     unsigned read_hwm = 0;
      70                 :            :     Xapian::termpos b;
      71         [ +  + ]:        167 :     do {
      72                 :        640 :         Xapian::termpos base = poslists[0]->get_position();
      73                 :        640 :         Xapian::termpos pos = base;
      74                 :        640 :         unsigned i = 0;
      75         [ +  + ]:        544 :         do {
      76         [ +  + ]:       1017 :             if (++i == terms.size()) RETURN(true);
      77         [ +  + ]:        927 :             if (i > read_hwm) {
      78                 :        886 :                 read_hwm = i;
      79                 :        886 :                 start_position_list(i);
      80                 :            :             }
      81         [ +  + ]:        927 :             if (!poslists[i]->skip_to(pos + 1))
      82                 :        383 :                 RETURN(false);
      83                 :        544 :             pos = poslists[i]->get_position();
      84                 :        544 :             b = pos + (terms.size() - i);
      85                 :        544 :         } while (b - base <= window);
      86                 :            :         // Advance the start of the window to the first position it could match
      87                 :            :         // in given the current position of term i.
      88                 :        167 :     } while (poslists[0]->skip_to(b - window));
      89                 :        126 :     RETURN(false);
      90                 :            : }
      91                 :            : 
      92                 :            : Xapian::termcount
      93                 :          0 : PhrasePostList::get_wdf() const
      94                 :            : {
      95                 :            :     // Calculate an estimate for the wdf of a phrase postlist.
      96                 :            :     //
      97                 :            :     // We use the minimum wdf of a sub-postlist as our estimate.  See the
      98                 :            :     // comment in NearPostList::get_wdf() for justification of this estimate.
      99                 :          0 :     vector<PostList *>::const_iterator i = terms.begin();
     100         [ #  # ]:          0 :     Xapian::termcount wdf = (*i)->get_wdf();
     101         [ #  # ]:          0 :     while (++i != terms.end()) {
     102 [ #  # ][ #  # ]:          0 :         wdf = min(wdf, (*i)->get_wdf());
     103                 :            :     }
     104                 :          0 :     return wdf;
     105                 :            : }
     106                 :            : 
     107                 :            : Xapian::doccount
     108                 :        160 : PhrasePostList::get_termfreq_est() const
     109                 :            : {
     110                 :            :     // It's hard to estimate how many times the phrase will occur as
     111                 :            :     // it depends a lot on the phrase, but usually the phrase will
     112                 :            :     // occur significantly less often than the individual terms.
     113                 :        160 :     return pl->get_termfreq_est() / 3;
     114                 :            : }
     115                 :            : 
     116                 :            : TermFreqs
     117                 :          0 : PhrasePostList::get_termfreq_est_using_stats(
     118                 :            :         const Xapian::Weight::Internal & stats) const
     119                 :            : {
     120                 :            :     LOGCALL(MATCH, TermFreqs, "PhrasePostList::get_termfreq_est_using_stats", stats);
     121                 :            :     // No idea how to estimate this - do the same as get_termfreq_est() for
     122                 :            :     // now.
     123                 :          0 :     TermFreqs result(pl->get_termfreq_est_using_stats(stats));
     124                 :          0 :     result.termfreq /= 3;
     125                 :          0 :     result.reltermfreq /= 3;
     126                 :          0 :     RETURN(result);
     127                 :            : }
     128                 :            : 
     129                 :            : string
     130                 :          0 : PhrasePostList::get_description() const
     131                 :            : {
     132         [ #  # ]:          0 :     string m = "(Phrase ";
     133 [ #  # ][ #  # ]:          0 :     m += str(window);
     134         [ #  # ]:          0 :     m += ' ';
     135 [ #  # ][ #  # ]:          0 :     m += pl->get_description();
     136         [ #  # ]:          0 :     m += ")";
     137                 :          0 :     return m;
     138                 :            : }

Generated by: LCOV version 1.11