LCOV - code coverage report
Current view: top level - weight - dphweight.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 49 58 84.5 %
Date: 2019-06-30 05:20:33 Functions: 9 11 81.8 %
Branches: 20 40 50.0 %

           Branch data     Line data    Source code
       1                 :            : /** @file dphweight.cc
       2                 :            :  * @brief Xapian::DPHWeight class - The DPH weighting scheme of the DFR framework.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2013, 2014 Aarsh Shah
       5                 :            :  * Copyright (C) 2016,2017 Olly Betts
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <config.h>
      23                 :            : 
      24                 :            : #include "xapian/weight.h"
      25                 :            : 
      26                 :            : #include "xapian/error.h"
      27                 :            : #include "common/log2.h"
      28                 :            : #include <algorithm>
      29                 :            : #include <cmath>
      30                 :            : 
      31                 :            : using namespace std;
      32                 :            : 
      33                 :            : namespace Xapian {
      34                 :            : 
      35                 :            : DPHWeight *
      36                 :         66 : DPHWeight::clone() const
      37                 :            : {
      38                 :         66 :     return new DPHWeight();
      39                 :            : }
      40                 :            : 
      41                 :            : void
      42                 :         46 : DPHWeight::init(double factor)
      43                 :            : {
      44         [ +  + ]:         46 :     if (factor == 0.0) {
      45                 :            :         // This object is for the term-independent contribution, and that's
      46                 :            :         // always zero for this scheme.
      47                 :         46 :         return;
      48                 :            :     }
      49                 :            : 
      50                 :         23 :     double F = get_collection_freq();
      51                 :         23 :     double N = get_collection_size();
      52                 :         23 :     double wdf_lower = 1.0;
      53                 :         23 :     double wdf_upper = get_wdf_upper_bound();
      54                 :            : 
      55                 :         23 :     double len_upper = get_doclength_upper_bound();
      56                 :            : 
      57                 :         23 :     double min_wdf_to_len = wdf_lower / len_upper;
      58                 :            : 
      59         [ -  + ]:         23 :     if (wdf_upper == 0) {
      60                 :          0 :         upper_bound = 0.0;
      61                 :          0 :         return;
      62                 :            :     }
      63                 :            : 
      64                 :            :     /* Calculate constant value to be used in get_sumpart(). */
      65                 :         23 :     log_constant = get_average_length() * N / F;
      66                 :         23 :     wqf_product_factor = get_wqf() * factor;
      67                 :            : 
      68                 :            :     // Calculate the upper bound on the weight.
      69                 :            : 
      70                 :            :     /* Calculations to decide the values to be used for calculating upper bound. */
      71                 :            :     /* The upper bound of the term appearing in the second log is obtained
      72                 :            :        by taking the minimum and maximum wdf value in the formula as shown. */
      73                 :         23 :     double max_product_1 = wdf_upper * (1.0 - min_wdf_to_len);
      74                 :            :     /* A second upper bound of the term can be obtained by plugging in the
      75                 :            :        upper bound of the length and differentiating the term w.r.t wdf
      76                 :            :        to find the value of wdf at which function attains maximum value. */
      77         [ +  - ]:         23 :     double wdf_var = min(wdf_upper, len_upper / 2.0);
      78                 :         23 :     double max_product_2 = wdf_var * (1.0 - wdf_var / len_upper);
      79                 :            :     /* Take the minimum of the two upper bounds. */
      80         [ +  - ]:         23 :     double max_product = min(max_product_1, max_product_2);
      81                 :            : 
      82                 :            :     // Maximization of the product of wdf and normalized wdf.
      83                 :            :     /* The expression is (wdf * (1.0 - wdf / len) * (1.0 - wdf / len)) /
      84                 :            :                          (wdf + 1.0). */
      85                 :            :     /* Now, assuming len to be len_upper for the purpose of maximization,
      86                 :            :        (d)/(dx) (x * (1 - x / c) * (1 - x / c)) / (x+1) =
      87                 :            :        ((c - x) * (c - x * (2 * x + 3))) / (c ^ 2 * (x + 1) ^ 2)
      88                 :            :        Thus, if (c - x * (2 * x + 3)) is positive, the differentiation
      89                 :            :        value will be positive and hence the function will be an
      90                 :            :        increasing function. By finding the positive root of the equation
      91                 :            :        2 * x ^ 2 + 3 * x - c = 0, we get the value of x(wdf)
      92                 :            :        at which the differentiation value turns to negative from positive,
      93                 :            :        and hence, the function will have maximum value for that value of wdf. */
      94                 :         23 :     double wdf_root = 0.25 * (sqrt(8.0 * len_upper + 9.0) - 3.0);
      95                 :            : 
      96                 :            :     // If wdf_root outside valid range, use nearest value in range.
      97         [ +  + ]:         23 :     if (wdf_root > wdf_upper) {
      98                 :          6 :         wdf_root = wdf_upper;
      99         [ -  + ]:         17 :     } else if (wdf_root < wdf_lower) {
     100                 :          0 :         wdf_root = wdf_lower;
     101                 :            :     }
     102                 :            : 
     103                 :         23 :     double max_wdf_product_normalization = wdf_root / (wdf_root + 1) *
     104                 :         23 :         pow((1 - wdf_root / len_upper), 2.0);
     105                 :            : 
     106                 :            :     double max_weight = max_wdf_product_normalization *
     107                 :         23 :         (log2(log_constant) + (0.5 * log2(2 * M_PI * max_product)));
     108                 :            : 
     109                 :         23 :     upper_bound = wqf_product_factor * max_weight;
     110         [ -  + ]:         23 :     if (rare(upper_bound < 0.0)) upper_bound = 0.0;
     111                 :            : }
     112                 :            : 
     113                 :            : string
     114                 :       1566 : DPHWeight::name() const
     115                 :            : {
     116         [ +  - ]:       1566 :     return "Xapian::DPHWeight";
     117                 :            : }
     118                 :            : 
     119                 :            : string
     120                 :       1559 : DPHWeight::short_name() const
     121                 :            : {
     122         [ +  - ]:       1559 :     return "dph";
     123                 :            : }
     124                 :            : 
     125                 :            : string
     126                 :         10 : DPHWeight::serialise() const
     127                 :            : {
     128                 :         10 :     return string();
     129                 :            : }
     130                 :            : 
     131                 :            : DPHWeight *
     132                 :          8 : DPHWeight::unserialise(const string& s) const
     133                 :            : {
     134         [ +  + ]:          8 :     if (rare(!s.empty()))
     135 [ +  - ][ +  - ]:          1 :         throw Xapian::SerialisationError("Extra data in DPHWeight::unserialise()");
                 [ +  - ]
     136                 :          7 :     return new DPHWeight();
     137                 :            : }
     138                 :            : 
     139                 :            : double
     140                 :         76 : DPHWeight::get_sumpart(Xapian::termcount wdf, Xapian::termcount len,
     141                 :            :                        Xapian::termcount) const
     142                 :            : {
     143 [ +  - ][ +  + ]:         76 :     if (wdf == 0 || wdf == len) return 0.0;
     144                 :            : 
     145                 :         70 :     double wdf_to_len = double(wdf) / len;
     146                 :            : 
     147                 :         70 :     double normalization = pow((1 - wdf_to_len), 2) / (wdf + 1);
     148                 :            : 
     149                 :            :     double wt = normalization *
     150                 :        140 :         (wdf * log2(wdf_to_len * log_constant) +
     151                 :        140 :          (0.5 * log2(2 * M_PI * wdf * (1 - wdf_to_len))));
     152         [ -  + ]:         70 :     if (rare(wt <= 0.0)) return 0.0;
     153                 :            : 
     154                 :         70 :     return wqf_product_factor * wt;
     155                 :            : }
     156                 :            : 
     157                 :            : double
     158                 :         46 : DPHWeight::get_maxpart() const
     159                 :            : {
     160                 :         46 :     return upper_bound;
     161                 :            : }
     162                 :            : 
     163                 :            : double
     164                 :          0 : DPHWeight::get_sumextra(Xapian::termcount, Xapian::termcount) const
     165                 :            : {
     166                 :          0 :     return 0;
     167                 :            : }
     168                 :            : 
     169                 :            : double
     170                 :         23 : DPHWeight::get_maxextra() const
     171                 :            : {
     172                 :         23 :     return 0;
     173                 :            : }
     174                 :            : 
     175                 :            : DPHWeight *
     176                 :          0 : DPHWeight::create_from_parameters(const char * p) const
     177                 :            : {
     178         [ #  # ]:          0 :     if (*p != '\0')
     179 [ #  # ][ #  # ]:          0 :         throw InvalidArgumentError("No parameters are required for DPHWeight");
                 [ #  # ]
     180                 :          0 :     return new Xapian::DPHWeight();
     181                 :            : }
     182                 :            : 
     183                 :            : }

Generated by: LCOV version 1.11