LCOV - code coverage report
Current view: top level - tests - api_snippets.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 954b5873a738 Lines: 215 216 99.5 %
Date: 2019-06-30 05:20:33 Functions: 15 15 100.0 %
Branches: 582 2878 20.2 %

           Branch data     Line data    Source code
       1                 :            : /** @file api_snippets.cc
       2                 :            :  * @brief tests snippets
       3                 :            :  */
       4                 :            : /* Copyright 2012 Mihai Bivol
       5                 :            :  * Copyright 2015,2016,2017,2019 Olly Betts
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      20                 :            :  * USA
      21                 :            :  */
      22                 :            : 
      23                 :            : #include <config.h>
      24                 :            : 
      25                 :            : #include "api_snippets.h"
      26                 :            : 
      27                 :            : #include <fstream>
      28                 :            : #include <string>
      29                 :            : 
      30                 :            : #include <xapian.h>
      31                 :            : 
      32                 :            : #include "apitest.h"
      33                 :            : #include "backendmanager_local.h"
      34                 :            : #include "testsuite.h"
      35                 :            : #include "testutils.h"
      36                 :            : 
      37                 :            : using namespace std;
      38                 :            : 
      39                 :            : struct snippet_testcase {
      40                 :            :     const char * input;
      41                 :            :     size_t len;
      42                 :            :     const char * expect;
      43                 :            : };
      44                 :            : 
      45                 :            : /// Test snippets without stemming.
      46                 :          7 : DEFINE_TESTCASE(snippet1, backend) {
      47 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
      48                 :            :     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
      49                 :            :                                     Xapian::Query("rubbish"),
      50 [ +  - ][ +  - ]:          7 :                                     Xapian::Query("mention")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
      51         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
      52                 :            : 
      53                 :            :     static const snippet_testcase testcases[] = {
      54                 :            :         // Test highlighting in full sample.
      55                 :            :         { "Rubbish and junk", 20, "<b>Rubbish</b> and junk" },
      56                 :            :         { "Project R.U.B.B.I.S.H. greenlit", 31, "Project <b>R.U.B.B.I.S.H.</b> greenlit" },
      57                 :            :         { "What a load of rubbish", 100, "What a load of <b>rubbish</b>" },
      58                 :            :         { "Mention rubbish", 100, "<b>Mention</b> <b>rubbish</b>" },
      59                 :            :         { "A mention of rubbish", 100, "A <b>mention</b> of <b>rubbish</b>" },
      60                 :            :         { "Rubbish mention of rubbish", 100, "<b>Rubbish</b> <b>mention</b> of <b>rubbish</b>" },
      61                 :            : 
      62                 :            :         // Test selection of snippet.
      63                 :            :         { "Rubbish and junk", 12, "<b>Rubbish</b> and..." },
      64                 :            :         { "Project R.U.B.B.I.S.H. greenlit", 14, "...<b>R.U.B.B.I.S.H.</b>..." },
      65                 :            :         { "What a load of rubbish", 12, "...of <b>rubbish</b>" },
      66                 :            :         { "What a load of rubbish", 8, "...<b>rubbish</b>" },
      67                 :            :         { "Rubbish mention where the start is better than the rubbish ending", 18, "<b>Rubbish</b> <b>mention</b>..." },
      68                 :            : 
      69                 :            :         // Should prefer "interesting" words for context.
      70                 :            :         { "And of the rubbish document to this", 18, "...<b>rubbish</b> document..." },
      71                 :            :         { "And if they document rubbish to be this", 18, "...document <b>rubbish</b>..." },
      72                 :            :     };
      73                 :            : 
      74         [ +  + ]:         98 :     for (auto i : testcases) {
      75 [ +  - ][ +  - ]:         91 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
      76                 :            :     }
      77                 :            : 
      78                 :          7 :     return true;
      79                 :            : }
      80                 :            : 
      81                 :            : /// Test snippets with stemming.
      82                 :          7 : DEFINE_TESTCASE(snippetstem1, backend) {
      83 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
      84                 :            :     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
      85                 :            :                                     Xapian::Query("rubbish"),
      86 [ +  - ][ +  - ]:          7 :                                     Xapian::Query("Zexampl")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
      87         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
      88                 :            : 
      89                 :            :     // Term Zexampl isn't in the database, but the highlighter should still
      90                 :            :     // handle it.
      91                 :            :     static const snippet_testcase testcases[] = {
      92                 :            :         // "rubbish" isn't stemmed, example is.
      93                 :            :         { "You rubbished my ideas", 24, "You rubbished my ideas" },
      94                 :            :         { "Rubbished all my examples", 20, "...all my <b>examples</b>" },
      95                 :            :         { "Examples of text", 20, "<b>Examples</b> of text" },
      96                 :            :     };
      97                 :            : 
      98 [ +  - ][ +  - ]:         14 :     Xapian::Stem stem("en");
      99         [ +  + ]:         28 :     for (auto i : testcases) {
     100 [ +  - ][ +  - ]:         21 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     101                 :            :     }
     102                 :            : 
     103                 :          7 :     return true;
     104                 :            : }
     105                 :            : 
     106                 :            : /// Test snippets with phrases.
     107                 :          7 : DEFINE_TESTCASE(snippetphrase1, backend) {
     108 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     109                 :            :     Xapian::Query q(Xapian::Query::OP_PHRASE,
     110                 :            :                     Xapian::Query("rubbish"),
     111 [ +  - ][ +  - ]:         14 :                     Xapian::Query("mention"));
         [ +  - ][ +  - ]
                 [ +  - ]
     112                 :            :     // Regression test - a phrase with a following sibling query would crash in
     113                 :            :     // the highlighting code.
     114 [ +  - ][ +  - ]:          7 :     enquire.set_query(q &~ Xapian::Query("banana"));
         [ +  - ][ +  - ]
     115         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     116                 :            : 
     117                 :            :     static const snippet_testcase testcases[] = {
     118                 :            :         { "A mention of rubbish", 18, "...mention of rubbish" },
     119                 :            :         { "This is a rubbish mention", 20, "...is a <b>rubbish mention</b>" },
     120                 :            :         { "Mention of a rubbish mention of rubbish", 45, "Mention of a <b>rubbish mention</b> of rubbish" },
     121                 :            :         { "Mention of a rubbish mention of rubbish", 18, "...<b>rubbish mention</b> of..." },
     122                 :            :         { "rubbish rubbish mention mention", 45, "rubbish <b>rubbish mention</b> mention" },
     123                 :            :         { "rubbish mention rubbish mention", 45, "<b>rubbish mention</b> <b>rubbish mention</b>" },
     124                 :            :     };
     125                 :            : 
     126 [ +  - ][ +  - ]:         14 :     Xapian::Stem stem("en");
     127         [ +  + ]:         49 :     for (auto i : testcases) {
     128 [ +  - ][ +  - ]:         42 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     129                 :            :     }
     130                 :            : 
     131                 :          7 :     return true;
     132                 :            : }
     133                 :            : 
     134                 :            : /// Index file to a DB with TermGenerator.
     135                 :            : static void
     136                 :          4 : make_tg_db(Xapian::WritableDatabase &db, const string & source)
     137                 :            : {
     138         [ +  - ]:          4 :     string file = test_driver::get_srcdir();
     139         [ +  - ]:          4 :     file += "/testdata/";
     140         [ +  - ]:          4 :     file += source;
     141         [ +  - ]:          4 :     file += ".txt";
     142         [ +  - ]:          8 :     ifstream input;
     143         [ +  - ]:          4 :     input.open(file.c_str());
     144 [ +  - ][ -  + ]:          4 :     if (!input.is_open()) {
     145 [ #  # ][ #  # ]:          0 :         FAIL_TEST("Couldn't open input: " << file);
         [ #  # ][ #  # ]
                 [ #  # ]
     146                 :            :     }
     147                 :            : 
     148         [ +  - ]:          8 :     Xapian::TermGenerator tg;
     149 [ +  - ][ +  - ]:          4 :     tg.set_stemmer(Xapian::Stem("en"));
                 [ +  - ]
     150 [ +  - ][ +  + ]:         32 :     while (!input.eof()) {
     151         [ +  - ]:         28 :         Xapian::Document doc;
     152         [ +  - ]:         28 :         tg.set_document(doc);
     153 [ +  - ][ +  - ]:         56 :         string line, data;
     154                 :            :         while (true) {
     155         [ +  - ]:         68 :             getline(input, line);
     156 [ +  - ][ +  - ]:         68 :             if (find_if(line.begin(), line.end(), C_isnotspace) == line.end())
         [ +  - ][ +  - ]
                 [ +  + ]
     157                 :         28 :                 break;
     158 [ +  - ][ +  - ]:         40 :             tg.index_text(line);
     159 [ +  + ][ +  - ]:         40 :             if (!data.empty()) data += ' ';
     160         [ +  - ]:         40 :             data += line;
     161                 :            :         }
     162         [ +  - ]:         28 :         doc.set_data(data);
     163         [ +  - ]:         28 :         db.add_document(doc);
     164                 :         32 :     }
     165                 :          4 : }
     166                 :            : 
     167                 :            : /// Test snippets in various ways.
     168                 :          6 : DEFINE_TESTCASE(snippetmisc1, generated) {
     169 [ +  - ][ +  - ]:          6 :     Xapian::Database db = get_database("snippet", make_tg_db, "snippet");
                 [ +  - ]
     170         [ +  - ]:         12 :     Xapian::Enquire enquire(db);
     171         [ +  - ]:          6 :     enquire.set_weighting_scheme(Xapian::BoolWeight());
     172 [ +  - ][ +  - ]:         12 :     Xapian::Stem stem("en");
     173                 :            : 
     174                 :            :     static const char * const words[] = { "do", "we", "have" };
     175         [ +  - ]:         12 :     Xapian::Query q(Xapian::Query::OP_PHRASE, words, words + 3);
     176         [ +  - ]:          6 :     enquire.set_query(q);
     177         [ +  - ]:         12 :     Xapian::MSet mset = enquire.get_mset(0, 6);
     178 [ +  - ][ -  + ]:          6 :     TEST_EQUAL(mset.size(), 3);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     179 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 40, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     180                 :            :                        "How much o'brien <b>do we have</b>?  Miles...");
     181 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 40, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     182                 :            :                        "...Unicode: How much o’brien <b>do we have</b>?");
     183 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 32, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     184                 :            :                        "We do have we <b>do we have</b> do we.");
     185                 :            : 
     186 [ +  - ][ +  - ]:          6 :     enquire.set_query(Xapian::Query("Zwelcom") | Xapian::Query("Zmike"));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     187 [ +  - ][ +  - ]:          6 :     mset = enquire.get_mset(0, 6);
     188 [ +  - ][ -  + ]:          6 :     TEST_EQUAL(mset.size(), 3);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     189 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 25, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     190                 :            :                        "\"<b>Welcome</b> to <b>Mike's</b>...");
     191 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 5, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     192                 :            :                        "<b>Mike</b>...");
     193 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 10, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     194                 :            :                        "...<b>Mike</b> can...");
     195                 :            : 
     196 [ +  - ][ +  - ]:          6 :     enquire.set_query(Xapian::Query(q.OP_WILDCARD, "m"));
                 [ +  - ]
     197 [ +  - ][ +  - ]:          6 :     mset = enquire.get_mset(0, 6);
     198 [ +  - ][ -  + ]:          6 :     TEST_EQUAL(mset.size(), 5);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     199 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 18, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     200                 :            :                        "...<b>Mike's</b> <b>Mechanical</b>...");
     201 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 80, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     202                 :            :                        "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
     203 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 102, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     204                 :            :                        "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
     205 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[3].get_document().get_data(), 64, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     206                 :            :                        "How <b>much</b> o'brien do we have?  <b>Miles</b> O'Brien, that's how <b>much</b>.");
     207                 :            :     // The requested length is in bytes, so the "fancy" apostrophe results in
     208                 :            :     // fewer Unicode characters in this sample than the previous one.
     209 [ +  - ][ +  - ]:          6 :     TEST_STRINGS_EQUAL(mset.snippet(mset[4].get_document().get_data(), 64, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     210                 :            :                        "...<b>much</b> o’brien do we have?  <b>Miles</b> O’Brien, that’s how <b>much</b>.");
     211                 :            : 
     212                 :          6 :     return true;
     213                 :            : }
     214                 :            : 
     215                 :            : /// Test snippet term diversity.
     216                 :          7 : DEFINE_TESTCASE(snippet_termcover1, backend) {
     217                 :            :     static const snippet_testcase testcases[] = {
     218                 :            :         // "Zexample" isn't in the database, so should get termweight 0.  Once
     219                 :            :         // max_tw is added on, "rubbish" should have just under twice the
     220                 :            :         // relevance of "example" so clearly should win in a straight fight.
     221                 :            :         { "A rubbish, but a good example", 14, "...<b>rubbish</b>, but a..."},
     222                 :            :         // But a second occurrence of "rubbish" has half the relevance, so
     223                 :            :         // "example" should add slightly more relevance.
     224                 :            :         { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
     225                 :            :         // And again.
     226                 :            :         { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
     227                 :            :     };
     228                 :            : 
     229 [ +  - ][ +  - ]:          7 :     Xapian::Stem stem("en");
     230                 :            :     // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
     231                 :            :     // for repeated terms.
     232                 :          7 :     unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
     233         [ +  + ]:         28 :     for (auto i : testcases) {
     234 [ +  - ][ +  - ]:         21 :         Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     235                 :            :         enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
     236                 :            :                     Xapian::Query("rubbish"),
     237 [ +  - ][ +  - ]:         21 :                     Xapian::Query("Zexampl")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     238                 :            : 
     239         [ +  - ]:         42 :         Xapian::MSet mset = enquire.get_mset(0, 0);
     240 [ +  - ][ +  - ]:         21 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     241                 :         21 :     }
     242                 :            : 
     243                 :          7 :     return true;
     244                 :            : }
     245                 :            : 
     246                 :            : /// Test snippet term diversity cases with BoolWeight.
     247                 :          7 : DEFINE_TESTCASE(snippet_termcover2, backend) {
     248                 :            :     // With BoolWeight, all terms have 0 termweight, and so relevance 1.0
     249                 :            :     // (since max_tw is set to 1.0 if it is zero).
     250                 :            :     static const snippet_testcase testcases[] = {
     251                 :            :         // Diversity should pick two different terms in preference.
     252                 :            :         { "rubbish rubbish example rubbish rubbish", 16, "...<b>example</b> <b>rubbish</b>..." },
     253                 :            :         // And again.
     254                 :            :         { "Rubbish and rubbish, and rubbish examples", 22, "...and <b>rubbish</b> <b>examples</b>"},
     255                 :            :         // The last of two equal snippet should win.
     256                 :            :         { "A rubbish, but a good example", 14, "...a good <b>example</b>"},
     257                 :            :     };
     258                 :            : 
     259 [ +  - ][ +  - ]:          7 :     Xapian::Stem stem("en");
     260                 :            :     // Disable SNIPPET_BACKGROUND_MODEL so we can test the relevance decay
     261                 :            :     // for repeated terms.
     262                 :          7 :     unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE;
     263         [ +  + ]:         28 :     for (auto i : testcases) {
     264 [ +  - ][ +  - ]:         21 :         Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     265                 :            :         enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
     266                 :            :                     Xapian::Query("rubbish"),
     267 [ +  - ][ +  - ]:         21 :                     Xapian::Query("Zexampl")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     268         [ +  - ]:         21 :         enquire.set_weighting_scheme(Xapian::BoolWeight());
     269                 :            : 
     270         [ +  - ]:         42 :         Xapian::MSet mset = enquire.get_mset(0, 0);
     271 [ +  - ][ +  - ]:         21 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem, flags), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     272                 :         21 :     }
     273                 :            : 
     274                 :          7 :     return true;
     275                 :            : }
     276                 :            : 
     277                 :            : /// Test snippet EMPTY_WITHOUT_MATCH flag
     278                 :          7 : DEFINE_TESTCASE(snippet_empty, backend) {
     279 [ +  - ][ +  - ]:          7 :     Xapian::Stem stem("en");
     280                 :            : 
     281 [ +  - ][ +  - ]:         14 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     282                 :            :     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
     283                 :            :                       Xapian::Query("rubbish"),
     284 [ +  - ][ +  - ]:          7 :                       Xapian::Query("Zexampl")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     285                 :            : 
     286         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     287                 :            : 
     288                 :            :     // A non-matching text
     289                 :          7 :     const char *input = "A string without a match.";
     290                 :          7 :     size_t len = strlen(input);
     291                 :            : 
     292                 :            :     // By default, snippet() returns len bytes of input without markup
     293                 :          7 :     unsigned flags = 0;
     294 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, 0), input);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     295                 :            : 
     296                 :            :     // force snippet() to return the empty string if no term got matched
     297                 :          7 :     flags |= Xapian::MSet::SNIPPET_EMPTY_WITHOUT_MATCH;
     298 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags), "");
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     299                 :            : 
     300                 :            :     // A text with a match
     301                 :          7 :     input = "A rubbish example text";
     302                 :          7 :     len = strlen(input);
     303                 :            : 
     304                 :          7 :     flags = 0;
     305 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     306                 :            :                        "A <b>rubbish</b> <b>example</b> text");
     307                 :            : 
     308                 :          7 :     flags |= Xapian::MSet::SNIPPET_EMPTY_WITHOUT_MATCH;
     309 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, len, stem, flags),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     310                 :            :                        "A <b>rubbish</b> <b>example</b> text");
     311                 :            : 
     312                 :          7 :     return true;
     313                 :            : }
     314                 :            : 
     315                 :            : /// Check snippets include certain preceding punctuation.
     316                 :          7 : DEFINE_TESTCASE(snippet_start_nonspace, backend) {
     317 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     318 [ +  - ][ +  - ]:          7 :     enquire.set_query(Xapian::Query("foo"));
                 [ +  - ]
     319                 :            : 
     320         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     321                 :            : 
     322         [ +  - ]:         14 :     Xapian::Stem stem;
     323                 :            : 
     324                 :          7 :     const char *input = "[xapian-devel] Re: foo";
     325 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     326                 :            :                        "[xapian-devel] Re: <b>foo</b>");
     327                 :            : 
     328                 :          7 :     input = "bar [xapian-devel] Re: foo";
     329 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 24, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     330                 :            :                        "...[xapian-devel] Re: <b>foo</b>");
     331                 :            : 
     332                 :          7 :     input = "there is a $1000 prize for foo";
     333 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 20, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     334                 :            :                        "...$1000 prize for <b>foo</b>");
     335                 :            : 
     336                 :          7 :     input = "-1 is less than foo";
     337 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     338                 :            :                        "-1 is less than <b>foo</b>");
     339                 :            : 
     340                 :          7 :     input = "+1 is less than foo";
     341 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     342                 :            :                        "+1 is less than <b>foo</b>");
     343                 :            : 
     344                 :          7 :     input = "/bin/sh is a foo";
     345 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     346                 :            :                        "/bin/sh is a <b>foo</b>");
     347                 :            : 
     348                 :          7 :     input = "'tis pity foo is a bar";
     349 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     350                 :            :                        "'tis pity <b>foo</b> is a bar");
     351                 :            : 
     352                 :          7 :     input = "\"foo bar\" he whispered";
     353 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 11, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     354                 :            :                        "\"<b>foo</b> bar\" he...");
     355                 :            : 
     356                 :          7 :     input = "\\\\server\\share\\foo is a UNC path";
     357 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     358                 :            :                        "\\\\server\\share\\<b>foo</b> is a UNC path");
     359                 :            : 
     360                 :          7 :     input = "«foo» is a placeholder";
     361 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 9, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     362                 :            :                        "«<b>foo</b>» is...");
     363                 :            : 
     364                 :          7 :     input = "#include <foo.h> to use libfoo";
     365 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     366                 :            :                        "...&lt;<b>foo</b>.h&gt; to...");
     367                 :            : 
     368                 :          7 :     input = "¡foo!";
     369 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     370                 :            :                        "¡<b>foo</b>!");
     371                 :            : 
     372                 :          7 :     input = "¿foo?";
     373 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     374                 :            :                        "¿<b>foo</b>?");
     375                 :            : 
     376                 :          7 :     input = "(foo) test";
     377 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     378                 :            :                        "(<b>foo</b>) test");
     379                 :            : 
     380                 :          7 :     input = "{foo} test";
     381 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     382                 :            :                        "{<b>foo</b>} test");
     383                 :            : 
     384                 :          7 :     input = "`foo` test";
     385 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     386                 :            :                        "`<b>foo</b>` test");
     387                 :            : 
     388                 :          7 :     input = "@foo@ is replaced";
     389 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     390                 :            :                        "@<b>foo</b>@ is replaced");
     391                 :            : 
     392                 :          7 :     input = "%foo is a perl hash";
     393 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     394                 :            :                        "%<b>foo</b> is a perl hash");
     395                 :            : 
     396                 :          7 :     input = "&foo takes the address of foo";
     397 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     398                 :            :                        "&amp;<b>foo</b> takes the address of <b>foo</b>");
     399                 :            : 
     400                 :          7 :     input = "§3.1.4 foo";
     401 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     402                 :            :                        "§3.1.4 <b>foo</b>");
     403                 :            : 
     404                 :          7 :     input = "#foo";
     405 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     406                 :            :                        "#<b>foo</b>");
     407                 :            : 
     408                 :          7 :     input = "~foo~ test";
     409 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     410                 :            :                        "~<b>foo</b>~ test");
     411                 :            : 
     412                 :          7 :     input = "( foo )";
     413 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     414                 :            :                        "<b>foo</b>...");
     415                 :            : 
     416                 :          7 :     input = "(=foo=)";
     417 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     418                 :            :                        "<b>foo</b>...");
     419                 :            : 
     420                 :            :     // Check that excessive non-word characters aren't included.
     421                 :          7 :     input = "((((((foo";
     422 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     423                 :            :                        "<b>foo</b>");
     424                 :            : 
     425                 :            :     // Check we don't include characters that aren't useful.
     426                 :          7 :     input = "bar,foo!";
     427 [ +  - ][ +  - ]:          7 :     TEST_STRINGS_EQUAL(mset.snippet(input, 5, stem),
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     428                 :            :                        "...<b>foo</b>!");
     429                 :            : 
     430                 :          7 :     return true;
     431                 :            : }
     432                 :            : 
     433                 :            : /// Test snippets with small and zero length.
     434                 :          7 : DEFINE_TESTCASE(snippet_small_zerolength, backend) {
     435 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     436                 :            :     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
     437                 :            :                                     Xapian::Query("rubbish"),
     438 [ +  - ][ +  - ]:          7 :                                     Xapian::Query("mention")));
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     439         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     440                 :            : 
     441                 :            :     static const snippet_testcase testcases[] = {
     442                 :            :         // Test with small length
     443                 :            :         { "mention junk rubbish", 3, "" },
     444                 :            :         { "Project R.U.B.B.I.S.H. greenlit", 5, "" },
     445                 :            :         { "What load rubbish", 3, "" },
     446                 :            :         { "Mention rubbish", 4, "" },
     447                 :            : 
     448                 :            :         // Test with zero length.
     449                 :            :         { "Rubbish and junk", 0, "" },
     450                 :            :         { "Project R.U.B.B.I.S.H. greenlit", 0, "" },
     451                 :            :         { "What a load of rubbish", 0, "" },
     452                 :            :         { "rubbish mention rubbish mention", 0, "" },
     453                 :            :     };
     454                 :            : 
     455         [ +  + ]:         63 :     for (auto i : testcases) {
     456 [ +  - ][ +  - ]:         56 :         TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
         [ +  - ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     457                 :            :     }
     458                 :            : 
     459                 :          7 :     return true;
     460                 :            : }
     461                 :            : 
     462                 :            : /// Test CJK ngrams.
     463                 :          6 : DEFINE_TESTCASE(snippet_cjkngrams, generated) {
     464                 :            :     Xapian::Database db = get_database("snippet_cjkngrams",
     465                 :            :         [](Xapian::WritableDatabase& wdb,
     466                 :         14 :            const string&)
     467                 :            :         {
     468         [ +  - ]:          4 :             Xapian::Document doc;
     469         [ +  - ]:          8 :             Xapian::TermGenerator tg;
     470         [ +  - ]:          4 :             tg.set_flags(Xapian::TermGenerator::FLAG_CJK_NGRAM);
     471         [ +  - ]:          4 :             tg.set_document(doc);
     472 [ +  - ][ +  - ]:          4 :             tg.index_text("明末時已經有香港地方的概念");
                 [ +  - ]
     473         [ +  - ]:          4 :             wdb.add_document(doc);
     474 [ +  - ][ +  - ]:         20 :         });
                 [ +  - ]
     475         [ +  - ]:         12 :     Xapian::Enquire enquire(db);
     476         [ +  - ]:         12 :     Xapian::QueryParser qp;
     477 [ +  - ][ +  - ]:         12 :     auto q = qp.parse_query("已經完成", qp.FLAG_DEFAULT | qp.FLAG_CJK_NGRAM);
                 [ +  - ]
     478         [ +  - ]:          6 :     enquire.set_query(q);
     479                 :            : 
     480         [ +  - ]:         12 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     481                 :            : 
     482         [ +  - ]:         12 :     Xapian::Stem stem;
     483                 :          6 :     const char *input = "明末時已經有香港地方的概念";
     484                 :          6 :     size_t len = strlen(input);
     485                 :            : 
     486                 :          6 :     unsigned flags = Xapian::MSet::SNIPPET_CJK_NGRAM;
     487         [ +  - ]:         12 :     string s;
     488 [ +  - ][ +  - ]:          6 :     s = mset.snippet(input, len, stem, flags, "<b>", "</b>", "...");
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     489 [ +  - ][ -  + ]:          6 :     TEST_STRINGS_EQUAL(s, "明末時<b>已</b><b>經</b>有香港地方的概念");
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     490                 :            : 
     491 [ +  - ][ +  - ]:          6 :     s = mset.snippet(input, len / 2, stem, flags, "<b>", "</b>", "...");
         [ +  - ][ +  - ]
         [ +  - ][ +  - ]
     492 [ +  - ][ -  + ]:          6 :     TEST_STRINGS_EQUAL(s, "...<b>已</b><b>經</b>有香港地...");
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
     493                 :            : 
     494                 :          6 :     return true;
     495                 :            : }
     496                 :            : 
     497                 :            : /// Test CJK word segmentation.
     498                 :          7 : DEFINE_TESTCASE(snippet_cjkwords, backend) {
     499 [ +  - ][ +  - ]:          7 :     Xapian::Enquire enquire(get_database("apitest_simpledata"));
                 [ +  - ]
     500 [ +  - ][ +  - ]:          7 :     enquire.set_query(Xapian::Query("已經"));
                 [ +  - ]
     501                 :            : 
     502         [ +  - ]:         14 :     Xapian::MSet mset = enquire.get_mset(0, 0);
     503                 :            : 
     504         [ +  - ]:         14 :     Xapian::Stem stem;
     505                 :          7 :     const char *input = "明末時已經有香港地方的概念";
     506                 :          7 :     const char *input2 = "明末時已經有香港地方的概念. Hello!";
     507                 :          7 :     size_t len = strlen(input);
     508                 :            : 
     509                 :          7 :     unsigned cjk_flags = Xapian::MSet::SNIPPET_CJK_WORDS;
     510                 :            : 
     511                 :            : #ifdef USE_ICU
     512                 :            : # define DO_TEST(CODE, RESULT) TEST_STRINGS_EQUAL(CODE, RESULT)
     513                 :            : #else
     514                 :            : # define DO_TEST(CODE, RESULT) \
     515                 :            :     try { \
     516                 :            :         CODE; \
     517                 :            :         FAIL_TEST("No exception thrown, expected FeatureUnavailableError"); \
     518                 :            :     } catch (const Xapian::FeatureUnavailableError& e) { \
     519                 :            :         TEST_STRINGS_EQUAL( \
     520                 :            :             e.get_msg(), \
     521                 :            :             "SNIPPET_CJK_WORDS requires building Xapian to use ICU"); \
     522                 :            :     }
     523                 :            : #endif
     524 [ +  - ][ +  - ]:          7 :     DO_TEST(mset.snippet(input, len, stem, cjk_flags, "<b>", "</b>", "..."),
         [ +  - ][ +  - ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ -  + ]
         [ -  + ][ -  +  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
           #  # ][ +  - ]
     525                 :            :             "明末時<b>已經</b>有香港地方的概念");
     526 [ +  - ][ +  - ]:          7 :     DO_TEST(mset.snippet(input2, len / 2, stem, cjk_flags, "[", "]", "~"),
         [ +  - ][ +  - ]
           [ -  +  #  #  
          #  #  #  #  #  
              # ][ -  + ]
         [ -  + ][ -  +  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
           #  # ][ +  - ]
     527                 :            :             "~時[已經]有香港~");
     528                 :            : #undef DO_TEST
     529                 :            : 
     530                 :          7 :     return true;
     531                 :            : }

Generated by: LCOV version 1.11