LCOV - code coverage report
Current view: top level - tests - api_cluster.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 88 90 97.8 %
Date: 2019-06-30 05:20:33 Functions: 7 8 87.5 %
Branches: 116 466 24.9 %

           Branch data     Line data    Source code
       1                 :            : /** @file api_cluster.cc
       2                 :            :  *  @brief Cluster API tests
       3                 :            :  */
       4                 :            : /* Copyright (C) 2016 Richhiey Thomas
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      19                 :            :  * USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <config.h>
      23                 :            : 
      24                 :            : #include "api_cluster.h"
      25                 :            : 
      26                 :            : #include <xapian.h>
      27                 :            : 
      28                 :            : #include "apitest.h"
      29                 :            : #include "testsuite.h"
      30                 :            : #include "testutils.h"
      31                 :            : 
      32                 :            : static void
      33                 :          5 : make_stemmed_cluster_db(Xapian::WritableDatabase &db, const std::string &)
      34                 :            : {
      35                 :            :     static const char* const test_strings[] = {
      36                 :            :         "This line is about a cluster. Cluster is important and is everywhere",
      37                 :            :         "We need to search for special cluster. Cluster cluster cluster",
      38                 :            :         "Computer cluster is a special example of a cluster. Used to search fast",
      39                 :            :         "Another example of cluster is a star cluster. Star cluster has a lot of stars"
      40                 :            :     };
      41                 :            : 
      42         [ +  - ]:          5 :     Xapian::TermGenerator indexer;
      43 [ +  - ][ +  - ]:         10 :     Xapian::Stem stemmer("english");
      44         [ +  - ]:          5 :     indexer.set_stemmer(stemmer);
      45 [ +  - ][ +  + ]:         25 :     for (const std::string& document_data : test_strings) {
      46         [ +  - ]:         40 :         Xapian::Document document;
      47         [ +  - ]:         20 :         document.set_data(document_data);
      48         [ +  - ]:         20 :         indexer.set_document(document);
      49 [ +  - ][ +  - ]:         20 :         indexer.index_text(document_data);
      50         [ +  - ]:         20 :         db.add_document(document);
      51                 :         25 :     }
      52                 :          5 : }
      53                 :            : 
      54                 :            : /** Round Robin clusterer:
      55                 :            :  *  This clusterer is a minimal clusterer which will cluster documents as -
      56                 :            :  *  ith document goes to the (i % k)th cluster where k is the number of clusters and
      57                 :            :  *  0 <= i < N; where N is the number of documents
      58                 :            :  */
      59         [ -  + ]:         12 : class RoundRobin : public Xapian::Clusterer {
      60                 :            :     /// Number of clusters to be formed by the clusterer
      61                 :            :     unsigned int num_of_clusters;
      62                 :            : 
      63                 :            :   public:
      64                 :            :     /** Constructor
      65                 :            :      *
      66                 :            :      *  @param num_of_clusters_         Number of required clusters
      67                 :            :      */
      68                 :         12 :     explicit RoundRobin(unsigned int num_of_clusters_) : num_of_clusters(num_of_clusters_) {}
      69                 :            : 
      70                 :            :     /** Implements the RoundRobin clustering
      71                 :            :      *
      72                 :            :      *  @param mset    MSet object containing the documents that are to
      73                 :            :      *                 be clustered
      74                 :            :      */
      75                 :            :     Xapian::ClusterSet cluster(const Xapian::MSet &mset);
      76                 :            : 
      77                 :          0 :     std::string get_description() const {
      78         [ #  # ]:          0 :         return "RoundRobin()";
      79                 :            :     }
      80                 :            : };
      81                 :            : 
      82                 :            : Xapian::ClusterSet
      83                 :          6 : RoundRobin::cluster(const Xapian::MSet &mset)
      84                 :            : {
      85         [ +  - ]:          6 :     Xapian::TermListGroup tlg(mset);
      86         [ +  - ]:          6 :     Xapian::ClusterSet cset;
      87                 :         12 :     std::vector<Xapian::Point> points;
      88                 :            : 
      89 [ +  - ][ +  - ]:         30 :     for (Xapian::MSetIterator it = mset.begin(); it != mset.end(); ++it)
                 [ +  + ]
      90 [ +  - ][ +  - ]:         30 :         points.push_back(Xapian::Point(tlg, it.get_document()));
                 [ +  - ]
      91                 :            : 
      92                 :          6 :     unsigned int i = 0;
      93         [ +  + ]:         24 :     while (i < num_of_clusters) {
      94         [ +  - ]:         18 :         Xapian::Cluster cluster_rr;
      95         [ +  - ]:         18 :         cset.add_cluster(cluster_rr);
      96                 :         18 :         i++;
      97                 :         18 :     }
      98                 :            : 
      99                 :          6 :     unsigned int size = points.size();
     100         [ +  + ]:         30 :     for (i = 0; i < size; ++i)
     101         [ +  - ]:         24 :         cset.add_to_cluster(points[i], i % num_of_clusters);
     102                 :            : 
     103                 :         12 :     return cset;
     104                 :            : }
     105                 :            : 
     106                 :            : /** Test for cosine distance
     107                 :            :  *  Cosine distance = 1 - (cosine of the angle between two vectors).
     108                 :            :  *  Thus, if two vectors are equal, the distance between them will be zero
     109                 :            :  *  and if two vectors are unequal, the distance will be 1 >= dist >= 0.
     110                 :            :  */
     111                 :          6 : DEFINE_TESTCASE(cosine_distance1, generated)
     112                 :            : {
     113 [ +  - ][ +  - ]:          6 :     Xapian::Database db = get_database("stemmed_cluster", make_stemmed_cluster_db);
                 [ +  - ]
     114         [ +  - ]:         12 :     Xapian::Enquire enquire(db);
     115 [ +  - ][ +  - ]:          6 :     enquire.set_query(Xapian::Query("cluster"));
                 [ +  - ]
     116                 :            : 
     117         [ +  - ]:         12 :     Xapian::MSet matches = enquire.get_mset(0, 4);
     118         [ +  - ]:         12 :     Xapian::TermListGroup tlg(matches);
     119 [ +  - ][ +  - ]:         12 :     Xapian::Document doc1 = matches[0].get_document();
     120 [ +  - ][ +  - ]:         12 :     Xapian::Document doc2 = matches[1].get_document();
     121         [ +  - ]:         12 :     Xapian::Point x1(tlg, doc1);
     122         [ +  - ]:         12 :     Xapian::Point x2(tlg, doc2);
     123                 :            : 
     124                 :            :     // Check whether same vector gives zero distance
     125                 :          6 :     Xapian::CosineDistance d;
     126         [ +  - ]:          6 :     double distance = d.similarity(x1, x1);
     127 [ -  + ][ #  # ]:          6 :     TEST_EQUAL(distance, 0);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     128                 :            : 
     129                 :            :     // Check whether two different vectors gives a distance such that
     130                 :            :     // 0 < distance <= 1
     131         [ +  - ]:          6 :     distance = d.similarity(x1, x2);
     132 [ -  + ][ #  # ]:          6 :     TEST_REL(distance, >, 0);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     133 [ -  + ][ #  # ]:          6 :     TEST_REL(distance, <=, 1);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     134                 :            : 
     135                 :          6 :     return true;
     136                 :            : }
     137                 :            : 
     138                 :            : /** Round Robin Test
     139                 :            :  *  Test that none of the returned clusters are empty
     140                 :            :  */
     141                 :          6 : DEFINE_TESTCASE(round_robin1, generated)
     142                 :            : {
     143 [ +  - ][ +  - ]:          6 :     Xapian::Database db = get_database("stemmed_cluster", make_stemmed_cluster_db);
                 [ +  - ]
     144         [ +  - ]:         12 :     Xapian::Enquire enq(db);
     145 [ +  - ][ +  - ]:          6 :     enq.set_query(Xapian::Query("cluster"));
                 [ +  - ]
     146         [ +  - ]:         12 :     Xapian::MSet matches = enq.get_mset(0, 4);
     147                 :            : 
     148                 :          6 :     int num_clusters = 3;
     149                 :         12 :     RoundRobin rr(num_clusters);
     150         [ +  - ]:         12 :     Xapian::ClusterSet cset = rr.cluster(matches);
     151         [ +  - ]:          6 :     int size = cset.size();
     152         [ +  + ]:         24 :     for (int i = 0; i < size; ++i) {
     153 [ +  - ][ +  - ]:         18 :         Xapian::DocumentSet d = cset[i].get_documents();
     154 [ +  - ][ -  + ]:         18 :         TEST(d.size() != 0);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     155                 :         18 :     }
     156                 :          6 :     return true;
     157                 :            : }
     158                 :            : 
     159                 :          7 : DEFINE_TESTCASE(stem_stopper1, backend)
     160                 :            : {
     161 [ +  - ][ +  - ]:          7 :     Xapian::Stem stemmer("english");
     162                 :            :     // By default, stemming strategy used is STEM_SOME
     163         [ +  - ]:         14 :     Xapian::StemStopper stopper(stemmer);
     164         [ +  - ]:         14 :     std::string term = "the";
     165         [ +  - ]:          7 :     stopper.add(term);
     166 [ +  - ][ -  + ]:          7 :     TEST(stopper(term));
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     167 [ +  - ][ +  - ]:          7 :     TEST(stopper('Z' + stemmer(term)));
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     168         [ +  - ]:          7 :     term = "cluster";
     169 [ +  - ][ -  + ]:          7 :     TEST(!stopper(term));
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     170 [ +  - ][ +  - ]:          7 :     TEST(!stopper('Z' + stemmer(term)));
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     171                 :            : 
     172         [ +  - ]:         14 :     Xapian::StemStopper stopper_all_z(stemmer, Xapian::StemStopper::STEM_ALL_Z);
     173         [ +  - ]:         14 :     Xapian::StemStopper stopper_all(stemmer, Xapian::StemStopper::STEM_ALL);
     174         [ +  - ]:          7 :     term = "because";
     175         [ +  - ]:          7 :     stopper_all.add(term);
     176         [ +  - ]:          7 :     stopper_all_z.add(term);
     177 [ +  - ][ -  + ]:          7 :     TEST(!stopper_all_z(term));
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     178 [ +  - ][ +  - ]:          7 :     TEST(!stopper_all_z(stemmer(term)));
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     179 [ +  - ][ +  - ]:          7 :     TEST(stopper_all_z('Z' + stemmer(term)));
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     180 [ +  - ][ -  + ]:          7 :     TEST(!stopper_all(term));
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     181 [ +  - ][ +  - ]:          7 :     TEST(!stopper_all('Z' + stemmer(term)));
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     182 [ +  - ][ +  - ]:          7 :     TEST(stopper_all(stemmer(term)));
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     183                 :            : 
     184         [ +  - ]:         14 :     Xapian::StemStopper stopper_none(stemmer, Xapian::StemStopper::STEM_NONE);
     185         [ +  - ]:          7 :     term = "and";
     186         [ +  - ]:          7 :     stopper_none.add(term);
     187 [ +  - ][ -  + ]:          7 :     TEST(stopper_none(term));
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     188 [ +  - ][ +  - ]:          7 :     TEST(!stopper_none('Z' + stemmer(term)));
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     189                 :            : 
     190                 :          7 :     return true;
     191                 :            : }

Generated by: LCOV version 1.11