LCOV - code coverage report
Current view: top level - matcher - localsubmatch.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core c2b6f1024d3a Lines: 56 83 67.5 %
Date: 2019-05-16 09:13:18 Functions: 3 14 21.4 %
Branches: 60 138 43.5 %

           Branch data     Line data    Source code
       1                 :            : /** @file localsubmatch.cc
       2                 :            :  *  @brief SubMatch class for a local database.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2006,2007,2009,2010,2011,2013,2014,2015,2016,2017,2018 Olly Betts
       5                 :            :  * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or modify
       8                 :            :  * it under the terms of the GNU General Public License as published by
       9                 :            :  * the Free Software Foundation; either version 2 of the License, or
      10                 :            :  * (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <config.h>
      23                 :            : 
      24                 :            : #include "localsubmatch.h"
      25                 :            : 
      26                 :            : #include "backends/databaseinternal.h"
      27                 :            : #include "debuglog.h"
      28                 :            : #include "extraweightpostlist.h"
      29                 :            : #include "api/leafpostlist.h"
      30                 :            : #include "omassert.h"
      31                 :            : #include "queryoptimiser.h"
      32                 :            : #include "synonympostlist.h"
      33                 :            : #include "api/termlist.h"
      34                 :            : #include "weight/weightinternal.h"
      35                 :            : 
      36                 :            : #include "xapian/error.h"
      37                 :            : 
      38                 :            : #include <memory>
      39                 :            : #include <map>
      40                 :            : #include <string>
      41                 :            : 
      42                 :            : using namespace std;
      43                 :            : 
      44                 :            : /** Xapian::Weight subclass which adds laziness.
      45                 :            :  *
      46                 :            :  *  For terms from a wildcard when remote databases are involved, we need to
      47                 :            :  *  delay calling init_() on the weight object until the stats for the terms
      48                 :            :  *  from the wildcard have been collated.
      49                 :            :  */
      50         [ #  # ]:          0 : class LazyWeight : public Xapian::Weight {
      51                 :            :     LeafPostList * pl;
      52                 :            : 
      53                 :            :     Xapian::Weight * real_wt;
      54                 :            : 
      55                 :            :     Xapian::Weight::Internal * stats;
      56                 :            : 
      57                 :            :     Xapian::termcount qlen;
      58                 :            : 
      59                 :            :     Xapian::termcount wqf;
      60                 :            : 
      61                 :            :     double factor;
      62                 :            : 
      63                 :            :     LazyWeight * clone() const;
      64                 :            : 
      65                 :            :     void init(double factor_);
      66                 :            : 
      67                 :            :   public:
      68                 :          0 :     LazyWeight(LeafPostList * pl_,
      69                 :            :                Xapian::Weight * real_wt_,
      70                 :            :                Xapian::Weight::Internal * stats_,
      71                 :            :                Xapian::termcount qlen_,
      72                 :            :                Xapian::termcount wqf__,
      73                 :            :                double factor_)
      74                 :            :         : pl(pl_),
      75                 :            :           real_wt(real_wt_),
      76                 :            :           stats(stats_),
      77                 :            :           qlen(qlen_),
      78                 :            :           wqf(wqf__),
      79                 :          0 :           factor(factor_)
      80                 :          0 :     { }
      81                 :            : 
      82                 :            :     std::string name() const;
      83                 :            : 
      84                 :            :     std::string serialise() const;
      85                 :            :     LazyWeight * unserialise(const std::string & serialised) const;
      86                 :            : 
      87                 :            :     double get_sumpart(Xapian::termcount wdf,
      88                 :            :                        Xapian::termcount doclen,
      89                 :            :                        Xapian::termcount uniqterms) const;
      90                 :            :     double get_maxpart() const;
      91                 :            : 
      92                 :            :     double get_sumextra(Xapian::termcount doclen,
      93                 :            :                         Xapian::termcount uniqterms) const;
      94                 :            :     double get_maxextra() const;
      95                 :            : };
      96                 :            : 
      97                 :            : LazyWeight *
      98                 :          0 : LazyWeight::clone() const
      99                 :            : {
     100 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::clone()");
                 [ #  # ]
     101                 :            : }
     102                 :            : 
     103                 :            : void
     104                 :          0 : LazyWeight::init(double factor_)
     105                 :            : {
     106                 :            :     (void)factor_;
     107 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::init()");
                 [ #  # ]
     108                 :            : }
     109                 :            : 
     110                 :            : string
     111                 :          0 : LazyWeight::name() const
     112                 :            : {
     113         [ #  # ]:          0 :     string desc = "LazyWeight(";
     114 [ #  # ][ #  # ]:          0 :     desc += real_wt->name();
     115         [ #  # ]:          0 :     desc += ")";
     116                 :          0 :     return desc;
     117                 :            : }
     118                 :            : 
     119                 :            : string
     120                 :          0 : LazyWeight::serialise() const
     121                 :            : {
     122 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::serialise()");
                 [ #  # ]
     123                 :            : }
     124                 :            : 
     125                 :            : LazyWeight *
     126                 :          0 : LazyWeight::unserialise(const string &) const
     127                 :            : {
     128 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::unserialise()");
                 [ #  # ]
     129                 :            : }
     130                 :            : 
     131                 :            : double
     132                 :          0 : LazyWeight::get_sumpart(Xapian::termcount wdf,
     133                 :            :                         Xapian::termcount doclen,
     134                 :            :                         Xapian::termcount uniqterms) const
     135                 :            : {
     136                 :            :     (void)wdf;
     137                 :            :     (void)doclen;
     138                 :            :     (void)uniqterms;
     139 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::get_sumpart()");
                 [ #  # ]
     140                 :            : }
     141                 :            : 
     142                 :            : double
     143                 :          0 : LazyWeight::get_sumextra(Xapian::termcount doclen,
     144                 :            :                          Xapian::termcount uniqterms) const
     145                 :            : {
     146                 :            :     (void)doclen;
     147                 :            :     (void)uniqterms;
     148 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::get_sumextra()");
                 [ #  # ]
     149                 :            : }
     150                 :            : 
     151                 :            : double
     152                 :          0 : LazyWeight::get_maxpart() const
     153                 :            : {
     154                 :            :     // This gets called first for the case we care about.
     155                 :          0 :     return pl->resolve_lazy_termweight(real_wt, stats, qlen, wqf, factor);
     156                 :            : }
     157                 :            : 
     158                 :            : double
     159                 :          0 : LazyWeight::get_maxextra() const
     160                 :            : {
     161 [ #  # ][ #  # ]:          0 :     throw Xapian::InvalidOperationError("LazyWeight::get_maxextra()");
                 [ #  # ]
     162                 :            : }
     163                 :            : 
     164                 :            : PostList *
     165                 :     178254 : LocalSubMatch::get_postlist(PostListTree * matcher,
     166                 :            :                             Xapian::termcount * total_subqs_ptr)
     167                 :            : {
     168                 :            :     LOGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist", matcher | total_subqs_ptr);
     169                 :            : 
     170         [ -  + ]:     178254 :     if (query.empty())
     171                 :          0 :         RETURN(NULL); // MatchNothing
     172                 :            : 
     173                 :            :     // Build the postlist tree for the query.  This calls
     174                 :            :     // LocalSubMatch::open_post_list() for each term in the query.
     175                 :            :     PostList * pl;
     176                 :            :     {
     177         [ +  - ]:     178254 :         QueryOptimiser opt(*db, *this, matcher, full_db_has_positions);
     178         [ +  + ]:     178254 :         pl = query.internal->postlist(&opt, 1.0);
     179                 :     178254 :         *total_subqs_ptr = opt.get_total_subqs();
     180                 :            :     }
     181                 :            : 
     182         [ +  + ]:     178202 :     if (pl) {
     183         [ +  - ]:     178024 :         unique_ptr<Xapian::Weight> extra_wt(wt_factory.clone());
     184                 :            :         // Only uses term-independent stats.
     185         [ +  - ]:     178024 :         extra_wt->init_(*total_stats, qlen);
     186 [ +  - ][ +  + ]:     178024 :         if (extra_wt->get_maxextra() != 0.0) {
     187                 :            :             // There's a term-independent weight contribution, so we combine
     188                 :            :             // the postlist tree with an ExtraWeightPostList which adds in this
     189                 :            :             // contribution.
     190 [ +  - ][ +  - ]:       1310 :             pl = new ExtraWeightPostList(pl, extra_wt.release(), matcher);
     191                 :     178024 :         }
     192                 :            :     }
     193                 :            : 
     194                 :     178202 :     RETURN(pl);
     195                 :            : }
     196                 :            : 
     197                 :            : PostList *
     198                 :        821 : LocalSubMatch::make_synonym_postlist(PostListTree* pltree,
     199                 :            :                                      PostList* or_pl,
     200                 :            :                                      double factor,
     201                 :            :                                      bool wdf_disjoint)
     202                 :            : {
     203                 :            :     LOGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", pltree | or_pl | factor | wdf_disjoint);
     204 [ +  - ][ +  + ]:        821 :     if (rare(or_pl->get_termfreq_max() == 0)) {
     205                 :            :         // We know or_pl doesn't match anything.
     206         [ +  - ]:         20 :         delete or_pl;
     207                 :         20 :         RETURN(NULL);
     208                 :            :     }
     209                 :            :     LOGVALUE(MATCH, or_pl->get_termfreq_est());
     210                 :            :     unique_ptr<SynonymPostList> res(new SynonymPostList(or_pl, db, pltree,
     211 [ +  - ][ +  - ]:        801 :                                                         wdf_disjoint));
     212         [ +  - ]:       1602 :     unique_ptr<Xapian::Weight> wt(wt_factory.clone());
     213                 :            : 
     214                 :        801 :     TermFreqs freqs;
     215                 :            :     // Avoid calling get_termfreq_est_using_stats() if the database is empty
     216                 :            :     // so we don't need to special case that repeatedly when implementing it.
     217                 :            :     // FIXME: it would be nicer to handle an empty database higher up, though
     218                 :            :     // we need to catch the case where all the non-empty subdatabases have
     219                 :            :     // failed, so we can't just push this right up to the start of get_mset().
     220         [ +  - ]:        801 :     if (usual(total_stats->collection_size != 0)) {
     221         [ +  - ]:        801 :         freqs = or_pl->get_termfreq_est_using_stats(*total_stats);
     222                 :            :     }
     223                 :            :     wt->init_(*total_stats, qlen, factor,
     224         [ +  - ]:        801 :               freqs.termfreq, freqs.reltermfreq, freqs.collfreq);
     225                 :            : 
     226         [ +  - ]:        801 :     res->set_weight(wt.release());
     227                 :       1622 :     RETURN(res.release());
     228                 :            : }
     229                 :            : 
     230                 :            : PostList *
     231                 :     313837 : LocalSubMatch::open_post_list(const string& term,
     232                 :            :                               Xapian::termcount wqf,
     233                 :            :                               double factor,
     234                 :            :                               bool need_positions,
     235                 :            :                               bool in_synonym,
     236                 :            :                               QueryOptimiser * qopt,
     237                 :            :                               bool lazy_weight)
     238                 :            : {
     239                 :            :     LOGCALL(MATCH, PostList *, "LocalSubMatch::open_post_list", term | wqf | factor | need_positions | qopt | lazy_weight);
     240                 :            : 
     241                 :     313837 :     bool weighted = false;
     242                 :            : 
     243                 :     313837 :     LeafPostList * pl = NULL;
     244         [ +  + ]:     313837 :     if (term.empty()) {
     245                 :            :         Assert(!need_positions);
     246                 :         72 :         pl = db->open_leaf_post_list(term, false);
     247                 :            :     } else {
     248                 :     313765 :         weighted = (factor != 0.0);
     249         [ +  + ]:     313765 :         if (!need_positions) {
     250         [ +  + ]:     623398 :             if ((!weighted && !in_synonym) ||
           [ +  +  +  + ]
                 [ +  + ]
     251                 :     311517 :                 !wt_factory.get_sumpart_needs_wdf_()) {
     252                 :            :                 Xapian::doccount sub_tf;
     253         [ +  - ]:        901 :                 db->get_freqs(term, &sub_tf, NULL);
     254 [ +  - ][ +  + ]:        901 :                 if (sub_tf == db->get_doccount()) {
     255                 :            :                     // If we're not going to use the wdf or term positions, and
     256                 :            :                     // the term indexes all documents, we can replace it with
     257                 :            :                     // the MatchAll postlist, which is especially efficient if
     258                 :            :                     // there are no gaps in the docids.
     259 [ +  - ][ +  - ]:        136 :                     pl = db->open_leaf_post_list(string(), false);
     260                 :            : 
     261                 :            :                     // Set the term name so the postlist looks up the correct
     262                 :            :                     // term frequencies - this is necessary if the weighting
     263                 :            :                     // scheme needs collection frequency or reltermfreq
     264                 :            :                     // (termfreq would be correct anyway since it's just the
     265                 :            :                     // collection size in this case).
     266         [ +  - ]:     311881 :                     pl->set_term(term);
     267                 :            :                 }
     268                 :            :             }
     269                 :            :         }
     270                 :            : 
     271         [ +  + ]:     313765 :         if (!pl) {
     272                 :     313629 :             const LeafPostList * hint = qopt->get_hint_postlist();
     273         [ +  + ]:     313629 :             if (hint)
     274                 :     148428 :                 pl = hint->open_nearby_postlist(term, need_positions);
     275         [ +  + ]:     313629 :             if (!pl) {
     276                 :     190137 :                 pl = db->open_leaf_post_list(term, need_positions);
     277                 :            :             }
     278                 :     313629 :             qopt->set_hint_postlist(pl);
     279                 :            :         }
     280                 :            :     }
     281                 :            : 
     282         [ +  + ]:     313837 :     if (lazy_weight) {
     283                 :            :         // Term came from a wildcard, but we may already have that term in the
     284                 :            :         // query anyway, so check before accumulating its TermFreqs.
     285         [ +  - ]:       1224 :         map<string, TermFreqs>::iterator i = total_stats->termfreqs.find(term);
     286         [ +  + ]:       1224 :         if (i == total_stats->termfreqs.end()) {
     287                 :            :             Xapian::doccount sub_tf;
     288                 :            :             Xapian::termcount sub_cf;
     289         [ +  - ]:       1214 :             db->get_freqs(term, &sub_tf, &sub_cf);
     290 [ +  - ][ +  - ]:       1224 :             total_stats->termfreqs.insert({term, TermFreqs(sub_tf, 0, sub_cf)});
     291                 :            :         }
     292                 :            :     }
     293                 :            : 
     294         [ +  + ]:     313837 :     if (weighted) {
     295                 :     310719 :         Xapian::Weight * wt = wt_factory.clone();
     296         [ +  - ]:     310719 :         if (!lazy_weight) {
     297                 :     310719 :             wt->init_(*total_stats, qlen, term, wqf, factor);
     298                 :     310719 :             total_stats->set_max_part(term, wt->get_maxpart());
     299                 :            :         } else {
     300                 :            :             // Delay initialising the actual weight object, so that we can
     301                 :            :             // gather stats for the terms lazily expanded from a wildcard
     302                 :            :             // (needed for the remote database case).
     303                 :          0 :             wt = new LazyWeight(pl, wt, total_stats, qlen, wqf, factor);
     304                 :            :         }
     305                 :     310719 :         pl->set_termweight(wt);
     306                 :            :     }
     307                 :     313837 :     RETURN(pl);
     308                 :            : }

Generated by: LCOV version 1.11