LCOV - code coverage report
Current view: top level - api - terminfo.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core eba1a2e3082b Lines: 32 36 88.9 %
Date: 2019-06-13 13:35:36 Functions: 13 13 100.0 %
Branches: 17 20 85.0 %

           Branch data     Line data    Source code
       1                 :            : /** @file terminfo.h
       2                 :            :  * @brief Metadata for a term in a document
       3                 :            :  */
       4                 :            : /* Copyright 2017,2018,2019 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #ifndef XAPIAN_INCLUDED_TERMINFO_H
      22                 :            : #define XAPIAN_INCLUDED_TERMINFO_H
      23                 :            : 
      24                 :            : #include "api/smallvector.h"
      25                 :            : #include <xapian/types.h>
      26                 :            : 
      27                 :            : using namespace std;
      28                 :            : 
      29                 :            : /// Metadata for a term in a document
      30                 :   22388064 : class TermInfo {
      31                 :            :     Xapian::termcount wdf;
      32                 :            : 
      33                 :            :     /** Split point in the position range.
      34                 :            :      *
      35                 :            :      *  To allow more efficient insertion of positions, we support the
      36                 :            :      *  positions being split into two sorted ranges, and if this is the
      37                 :            :      *  case, split will be > 0 and there will be two sorted ranges [0, split)
      38                 :            :      *  and [split, positions.size()).
      39                 :            :      *
      40                 :            :      *  If split is 0, then [0, positions.size()) form a single sorted range.
      41                 :            :      *
      42                 :            :      *  If positions.empty(), then split > 0 indicates that the term has been
      43                 :            :      *  deleted (this allows us to delete terms without invalidating existing
      44                 :            :      *  TermIterator objects).
      45                 :            :      *
      46                 :            :      *  Use type unsigned here to avoid bloating this structure.  More than
      47                 :            :      *  4 billion positions in one document is not sensible (and not possible
      48                 :            :      *  unless termpos is configured to be 64 bit).
      49                 :            :      */
      50                 :            :     mutable unsigned split = 0;
      51                 :            : 
      52                 :            :     /** Positions at which the term occurs.
      53                 :            :      *
      54                 :            :      *  The entries are sorted in strictly increasing order (so duplicate
      55                 :            :      *  entries are not allowed).
      56                 :            :      */
      57                 :            :     mutable Xapian::VecCOW<Xapian::termpos> positions;
      58                 :            : 
      59                 :            :     /** Merge sorted ranges before and after @a split. */
      60                 :            :     void merge() const;
      61                 :            : 
      62                 :            :   public:
      63                 :            :     /** Constructor.
      64                 :            :      *
      65                 :            :      *  @param wdf_   Within-document frequency
      66                 :            :      */
      67                 :    1066112 :     explicit TermInfo(Xapian::termcount wdf_) : wdf(wdf_) {}
      68                 :            : 
      69                 :            :     /** Constructor which also adds an initial position.
      70                 :            :      *
      71                 :            :      *  @param wdf_   Within-document frequency
      72                 :            :      *  @param termpos  Position to add
      73                 :            :      */
      74                 :    5686096 :     TermInfo(Xapian::termcount wdf_, Xapian::termpos termpos) : wdf(wdf_) {
      75         [ +  - ]:    2843048 :         positions.push_back(termpos);
      76                 :    2843048 :     }
      77                 :            : 
      78                 :            :     /// Get a pointer to the positions.
      79                 :    3629775 :     const Xapian::VecCOW<Xapian::termpos>* get_positions() const {
      80         [ +  + ]:    3629775 :         if (split) merge();
      81                 :    3629775 :         return &positions;
      82                 :            :     }
      83                 :            : 
      84                 :        174 :     bool has_positions() const { return !positions.empty(); }
      85                 :            : 
      86                 :    1044354 :     size_t count_positions() const { return positions.size(); }
      87                 :            : 
      88                 :            :     /// Get the within-document frequency.
      89                 :   18596646 :     Xapian::termcount get_wdf() const { return wdf; }
      90                 :            : 
      91                 :            :     /** Increase within-document frequency.
      92                 :            :      *
      93                 :            :      *  @return true if the term was flagged as deleted before the operation.
      94                 :            :      */
      95                 :       1362 :     bool increase_wdf(Xapian::termcount delta) {
      96         [ -  + ]:       1362 :         if (rare(is_deleted())) {
      97                 :          0 :             split = 0;
      98                 :          0 :             wdf = delta;
      99                 :          0 :             return true;
     100                 :            :         }
     101                 :       1362 :         wdf += delta;
     102                 :       1362 :         return false;
     103                 :            :     }
     104                 :            : 
     105                 :            :     /** Decrease within-document frequency.
     106                 :            :      *
     107                 :            :      *  @return true If the adjusted wdf is zero and there are no positions.
     108                 :            :      */
     109                 :         19 :     bool decrease_wdf(Xapian::termcount delta) {
     110                 :            :         // Saturating arithmetic - don't let the wdf go below zero.
     111         [ +  + ]:         19 :         if (wdf >= delta) {
     112                 :         15 :             wdf -= delta;
     113                 :            :         } else {
     114                 :          4 :             wdf = 0;
     115                 :            :         }
     116 [ +  + ][ +  + ]:         19 :         if (wdf == 0 && positions.empty()) {
                 [ +  + ]
     117                 :            :             // Flag term as deleted if no wdf or positions.
     118                 :          6 :             split = 1;
     119                 :          6 :             return true;
     120                 :            :         }
     121                 :         13 :         return false;
     122                 :            :     }
     123                 :            : 
     124                 :         87 :     bool remove() {
     125         [ -  + ]:         87 :         if (is_deleted())
     126                 :          0 :             return false;
     127                 :         87 :         positions.clear();
     128                 :         87 :         split = 1;
     129                 :         87 :         return true;
     130                 :            :     }
     131                 :            : 
     132                 :            :     /** Add a position.
     133                 :            :      *
     134                 :            :      *  If @a termpos is already present, this is a no-op.
     135                 :            :      *
     136                 :            :      *  @param wdf_inc  wdf increment
     137                 :            :      *  @param termpos  Position to add
     138                 :            :      *
     139                 :            :      *  @return true if the term was flagged as deleted before the operation.
     140                 :            :      */
     141                 :            :     bool add_position(Xapian::termcount wdf_inc, Xapian::termpos termpos);
     142                 :            : 
     143                 :            :     /** Append a position.
     144                 :            :      *
     145                 :            :      *  The position must be >= the largest currently in the list.
     146                 :            :      */
     147                 :         95 :     void append_position(Xapian::termpos termpos) {
     148                 :         95 :         positions.push_back(termpos);
     149                 :         95 :     }
     150                 :            : 
     151                 :            :     /** Remove a position.
     152                 :            :      *
     153                 :            :      *  @param termpos  Position to remove
     154                 :            :      *
     155                 :            :      *  @return If @a termpos wasn't present, returns false.
     156                 :            :      */
     157                 :            :     bool remove_position(Xapian::termpos termpos);
     158                 :            : 
     159                 :            :     /** Remove a range of positions.
     160                 :            :      *
     161                 :            :      *  @param termpos_first    First position to remove
     162                 :            :      *  @param termpos_last     Last position to remove
     163                 :            :      *
     164                 :            :      *  It's OK if there are no positions in the specified range.
     165                 :            :      *
     166                 :            :      *  @return the number of positions removed.
     167                 :            :      */
     168                 :            :     Xapian::termpos remove_positions(Xapian::termpos termpos_first,
     169                 :            :                                      Xapian::termpos termpos_last);
     170                 :            : 
     171                 :            :     /** Has this term been deleted from this document?
     172                 :            :      *
     173                 :            :      *  We flag entries as deleted instead of actually deleting them to avoid
     174                 :            :      *  invalidating existing TermIterator objects.
     175                 :            :      */
     176 [ +  + ][ +  + ]:   13945572 :     bool is_deleted() const { return positions.empty() && split > 0; }
     177                 :            : };
     178                 :            : 
     179                 :            : #endif // XAPIAN_INCLUDED_TERMINFO_H

Generated by: LCOV version 1.11