LCOV - code coverage report
Current view: top level - diversify - diversifyinternal.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 4ba52dacf4fb Lines: 3 3 100.0 %
Date: 2019-05-20 14:58:19 Functions: 2 2 100.0 %
Branches: 3 6 50.0 %

           Branch data     Line data    Source code
       1                 :            : /** @file diversifyinternal.h
       2                 :            :  * @brief Xapian::Diversify internals
       3                 :            :  */
       4                 :            : /* Copyright (C) 2018 Uppinder Chugh
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      19                 :            :  * USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #ifndef XAPIAN_INCLUDED_DIVERSIFYINTERNAL_H
      23                 :            : #define XAPIAN_INCLUDED_DIVERSIFYINTERNAL_H
      24                 :            : 
      25                 :            : #include <xapian/intrusive_ptr.h>
      26                 :            : 
      27                 :            : #include <vector>
      28                 :            : 
      29                 :            : /** Internal class for Diversify
      30                 :            :  */
      31                 :          8 : class Xapian::Diversify::Internal : public Xapian::Internal::intrusive_base {
      32                 :            :     /// Copies are not allowed
      33                 :            :     Internal(const Internal&) = delete;
      34                 :            : 
      35                 :            :     /// Assignment is not allowed
      36                 :            :     void operator=(const Internal&) = delete;
      37                 :            : 
      38                 :            :     /// Top-k documents of given mset are diversified
      39                 :            :     Xapian::doccount k;
      40                 :            : 
      41                 :            :     /// Number of relevant documents from each cluster used for building topC
      42                 :            :     Xapian::doccount r;
      43                 :            : 
      44                 :            :     /// MPT parameters
      45                 :            :     double lambda, b, sigma_sqr;
      46                 :            : 
      47                 :            :     /// Store each document from given mset as a point
      48                 :            :     std::unordered_map<Xapian::docid, Xapian::Point> points;
      49                 :            : 
      50                 :            :     /// Store the relevance score of each document
      51                 :            :     std::unordered_map<Xapian::docid, double> scores;
      52                 :            : 
      53                 :            :     /// Store pairwise cosine similarities of documents of given mset
      54                 :            :     std::map<std::pair<Xapian::docid, Xapian::docid>, double> pairwise_sim;
      55                 :            : 
      56                 :            :     /// Store docids of top k diversified documents
      57                 :            :     std::vector<Xapian::docid> main_dmset;
      58                 :            : 
      59                 :            :   public:
      60                 :            :     /// Constructor for initialising diversification parameters
      61                 :          4 :     explicit Internal(Xapian::doccount k_,
      62                 :            :                       Xapian::doccount r_,
      63                 :            :                       double lambda_,
      64                 :            :                       double b_,
      65                 :            :                       double sigma_sqr_)
      66 [ +  - ][ +  - ]:          4 :         : k(k_), r(r_), lambda(lambda_), b(b_), sigma_sqr(sigma_sqr_) {}
                 [ +  - ]
      67                 :            : 
      68                 :            :     /** Initialise diversified document set
      69                 :            :      *
      70                 :            :      *  Convert top-k documents of mset into vector of Points, which
      71                 :            :      *  represents the initial diversified document set.
      72                 :            :      *
      73                 :            :      *  @param source   MSet object containing the documents of which
      74                 :            :      *                  top-k are to be diversified
      75                 :            :      */
      76                 :            :     void initialise_points(const Xapian::MSet& source);
      77                 :            : 
      78                 :            :     /** Return a key for a pair of documents
      79                 :            :      *
      80                 :            :      *  Returns a key as a pair of given documents ids
      81                 :            :      *
      82                 :            :      *  @param doc_id   Document id of the document
      83                 :            :      *  @param centroid_idx     Index of cluster to which the given centroid
      84                 :            :      *                          belongs to in the cluster set
      85                 :            :      */
      86                 :            :     std::pair<Xapian::docid, Xapian::docid>
      87                 :            :     get_key(Xapian::docid doc_id, unsigned int centroid_idx);
      88                 :            : 
      89                 :            :     /** Compute pairwise similarities
      90                 :            :      *
      91                 :            :      *  Used for pre-computing pairwise cosine similarities of documents
      92                 :            :      *  of given mset, which is used to speed up evaluate_dmset
      93                 :            :      *
      94                 :            :      *  @param cset     Cluster of given relevant documents
      95                 :            :      */
      96                 :            :     void compute_similarities(const Xapian::ClusterSet& cset);
      97                 :            : 
      98                 :            :     /** Return difference of 'points' and current dmset
      99                 :            :      *
     100                 :            :      *  Return the difference of 'points' and the current diversified
     101                 :            :      *  document match set
     102                 :            :      *
     103                 :            :      *  @param dmset    Document set representing a diversified document set
     104                 :            :      */
     105                 :            :     std::vector<Xapian::docid>
     106                 :            :     compute_diff_dmset(const std::vector<Xapian::docid>& dmset);
     107                 :            : 
     108                 :            :     /** Evaluate a diversified mset
     109                 :            :      *
     110                 :            :      *  Evaluate a diversified mset using MPT algorithm
     111                 :            :      *
     112                 :            :      *  @param dmset    Set of points representing candidate diversifed
     113                 :            :      *                  set of documents
     114                 :            :      *  @param cset     Set of clusters of given mset
     115                 :            :      */
     116                 :            :     double evaluate_dmset(const std::vector<Xapian::docid>& dmset,
     117                 :            :                           const Xapian::ClusterSet& cset);
     118                 :            : 
     119                 :            :     /// Return diversified document set from given mset
     120                 :            :     Xapian::DocumentSet get_dmset(const MSet& mset);
     121                 :            : };
     122                 :            : 
     123                 :            : #endif // XAPIAN_INCLUDED_DIVERSIFYINTERNAL_H

Generated by: LCOV version 1.11