LCOV - code coverage report
Current view: top level - api - sortable-serialise.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core 7028d852e609 Lines: 100 101 99.0 %
Date: 2019-02-17 14:59:59 Functions: 3 3 100.0 %
Branches: 77 86 89.5 %

           Branch data     Line data    Source code
       1                 :            : /** @file sortable-serialise.cc
       2                 :            :  * @brief Serialise floating point values to string which sort the same way.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2009,2015,2016 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or modify
       7                 :            :  * it under the terms of the GNU General Public License as published by
       8                 :            :  * the Free Software Foundation; either version 2 of the License, or
       9                 :            :  * (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include <xapian/queryparser.h>
      24                 :            : 
      25                 :            : // Disable these assertions when building the library as these functions are
      26                 :            : // marked not to throw exceptions in the API headers.  In unittest we define
      27                 :            : // UNITTEST_ASSERT_NOTHROW to set a variable to the exception message and
      28                 :            : // return, then the harness checks if that variable has been set.
      29                 :            : #ifndef XAPIAN_UNITTEST
      30                 :            : # define UNITTEST_ASSERT_NOTHROW(COND,RET)
      31                 :            : #endif
      32                 :            : 
      33                 :            : #include <cfloat>
      34                 :            : #include <cmath>
      35                 :            : #include <cstring>
      36                 :            : 
      37                 :            : #include <string>
      38                 :            : 
      39                 :            : using namespace std;
      40                 :            : 
      41                 :            : #if FLT_RADIX != 2
      42                 :            : # error Code currently assumes FLT_RADIX == 2
      43                 :            : #endif
      44                 :            : 
      45                 :            : #ifdef _MSC_VER
      46                 :            : // Disable warning about negating an unsigned type, which we do deliberately.
      47                 :            : # pragma warning(disable:4146)
      48                 :            : #endif
      49                 :            : 
      50                 :            : size_t
      51                 :     310341 : Xapian::sortable_serialise_(double value, char * buf) XAPIAN_NOEXCEPT
      52                 :            : {
      53                 :            :     double mantissa;
      54                 :            :     int exponent;
      55                 :            : 
      56                 :            :     // Negative infinity.
      57         [ +  + ]:     310341 :     if (value < -DBL_MAX) return 0;
      58                 :            : 
      59                 :     310339 :     mantissa = frexp(value, &exponent);
      60                 :            : 
      61                 :            :     /* Deal with zero specially.
      62                 :            :      *
      63                 :            :      * IEEE representation of doubles uses 11 bits for the exponent, with a
      64                 :            :      * bias of 1023.  We bias this by subtracting 8, and non-IEEE
      65                 :            :      * representations may allow higher exponents, so allow exponents down to
      66                 :            :      * -2039 - if smaller exponents are possible anywhere, we underflow such
      67                 :            :      *  numbers to 0.
      68                 :            :      */
      69 [ +  + ][ -  + ]:     310339 :     if (mantissa == 0.0 || exponent < -2039) {
      70                 :      14998 :         *buf = '\x80';
      71                 :      14998 :         return 1;
      72                 :            :     }
      73                 :            : 
      74                 :     295341 :     bool negative = (mantissa < 0);
      75         [ +  + ]:     295341 :     if (negative) mantissa = -mantissa;
      76                 :            : 
      77                 :            :     // Infinity, or extremely large non-IEEE representation.
      78 [ +  + ][ -  + ]:     295341 :     if (value > DBL_MAX || exponent > 2055) {
      79         [ -  + ]:          2 :         if (negative) {
      80                 :            :             // This can only happen with a non-IEEE representation, because
      81                 :            :             // we've already tested for value < -DBL_MAX
      82                 :          0 :             return 0;
      83                 :            :         } else {
      84                 :          2 :             memset(buf, '\xff', 9);
      85                 :          2 :             return 9;
      86                 :            :         }
      87                 :            :     }
      88                 :            : 
      89                 :            :     // Encoding:
      90                 :            :     //
      91                 :            :     // [ 7 | 6 | 5 | 4 3 2 1 0]
      92                 :            :     //   Sm  Se  Le
      93                 :            :     //
      94                 :            :     // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
      95                 :            :     // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
      96                 :            :     // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
      97         [ +  + ]:     295339 :     unsigned char next = (negative ? 0 : 0xe0);
      98                 :            : 
      99                 :            :     // Bias the exponent by 8 so that more small integers get short encodings.
     100                 :     295339 :     exponent -= 8;
     101                 :     295339 :     bool exponent_negative = (exponent < 0);
     102         [ +  + ]:     295339 :     if (exponent_negative) {
     103                 :     278623 :         exponent = -exponent;
     104                 :     278623 :         next ^= 0x60;
     105                 :            :     }
     106                 :            : 
     107                 :     295339 :     size_t len = 0;
     108                 :            : 
     109                 :            :     /* We store the exponent in 3 or 11 bits.  If the number is negative, we
     110                 :            :      * flip all the bits of the exponent, since larger negative numbers should
     111                 :            :      * sort first.
     112                 :            :      *
     113                 :            :      * If the exponent is negative, we flip the bits of the exponent, since
     114                 :            :      * larger negative exponents should sort first (unless the number is
     115                 :            :      * negative, in which case they should sort later).
     116                 :            :      */
     117         [ -  + ]:         38 :     UNITTEST_ASSERT_NOTHROW(exponent >= 0, 0);
     118         [ +  + ]:     295339 :     if (exponent < 8) {
     119                 :     285477 :         next ^= 0x20;
     120                 :     285477 :         next |= static_cast<unsigned char>(exponent << 2);
     121         [ +  + ]:     285477 :         if (negative ^ exponent_negative) next ^= 0x1c;
     122                 :            :     } else {
     123         [ -  + ]:         24 :         UNITTEST_ASSERT_NOTHROW((exponent >> 11) == 0, 0);
     124                 :            :         // Put the top 5 bits of the exponent into the lower 5 bits of the
     125                 :            :         // first byte:
     126                 :       9862 :         next |= static_cast<unsigned char>(exponent >> 6);
     127         [ +  + ]:       9862 :         if (negative ^ exponent_negative) next ^= 0x1f;
     128                 :       9862 :         buf[len++] = next;
     129                 :            :         // And the lower 6 bits of the exponent go into the upper 6 bits
     130                 :            :         // of the second byte:
     131                 :       9862 :         next = static_cast<unsigned char>(exponent << 2);
     132         [ +  + ]:       9862 :         if (negative ^ exponent_negative) next ^= 0xfc;
     133                 :            :     }
     134                 :            : 
     135                 :            :     // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
     136         [ +  + ]:     295339 :     mantissa *= 1 << (negative ? 26 : 27);
     137                 :     295339 :     unsigned word1 = static_cast<unsigned>(mantissa);
     138                 :     295339 :     mantissa -= word1;
     139                 :     295339 :     unsigned word2 = static_cast<unsigned>(mantissa * 4294967296.0); // 1<<32
     140                 :            :     // If the number is positive, the first bit will always be set because 0.5
     141                 :            :     // <= mantissa < 1, unless mantissa is zero, which we handle specially
     142                 :            :     // above).  If the number is negative, we negate the mantissa instead of
     143                 :            :     // flipping all the bits, so in the case of 0.5, the first bit isn't set
     144                 :            :     // so we need to store it explicitly.  But for the cost of one extra
     145                 :            :     // leading bit, we can save several trailing 0xff bytes in lots of common
     146                 :            :     // cases.
     147 [ +  + ][ -  + ]:         38 :     UNITTEST_ASSERT_NOTHROW(negative || (word1 & (1<<26)), 0);
                 [ -  + ]
     148         [ +  + ]:     295339 :     if (negative) {
     149                 :            :         // We negate the mantissa for negative numbers, so that the sort order
     150                 :            :         // is reversed (since larger negative numbers should come first).
     151                 :      63069 :         word1 = -word1;
     152         [ +  + ]:      63069 :         if (word2 != 0) ++word1;
     153                 :      63069 :         word2 = -word2;
     154                 :            :     }
     155                 :            : 
     156                 :     295339 :     word1 &= 0x03ffffff;
     157                 :     295339 :     next |= static_cast<unsigned char>(word1 >> 24);
     158                 :     295339 :     buf[len++] = next;
     159                 :     295339 :     buf[len++] = char(word1 >> 16);
     160                 :     295339 :     buf[len++] = char(word1 >> 8);
     161                 :     295339 :     buf[len++] = char(word1);
     162                 :            : 
     163                 :     295339 :     buf[len++] = char(word2 >> 24);
     164                 :     295339 :     buf[len++] = char(word2 >> 16);
     165                 :     295339 :     buf[len++] = char(word2 >> 8);
     166                 :     295339 :     buf[len++] = char(word2);
     167                 :            : 
     168                 :            :     // Finally, we can chop off any trailing zero bytes.
     169 [ +  - ][ +  + ]:    2212847 :     while (len > 0 && buf[len - 1] == '\0') {
     170                 :    1917508 :         --len;
     171                 :            :     }
     172                 :            : 
     173                 :     310341 :     return len;
     174                 :            : }
     175                 :            : 
     176                 :            : /// Get a number from the character at a given position in a string, returning
     177                 :            : /// 0 if the string isn't long enough.
     178                 :            : static inline unsigned char
     179                 :       3842 : numfromstr(const std::string & str, std::string::size_type pos)
     180                 :            : {
     181         [ +  + ]:       3842 :     return (pos < str.size()) ? static_cast<unsigned char>(str[pos]) : '\0';
     182                 :            : }
     183                 :            : 
     184                 :            : double
     185                 :        932 : Xapian::sortable_unserialise(const std::string & value) XAPIAN_NOEXCEPT
     186                 :            : {
     187                 :            :     // Zero.
     188 [ +  + ][ +  + ]:        932 :     if (value.size() == 1 && value[0] == '\x80') return 0.0;
                 [ +  + ]
     189                 :            : 
     190                 :            :     // Positive infinity.
     191   [ +  +  +  + ]:        945 :     if (value.size() == 9 &&
                 [ +  + ]
     192                 :         14 :         memcmp(value.data(), "\xff\xff\xff\xff\xff\xff\xff\xff\xff", 9) == 0) {
     193                 :            : #ifdef INFINITY
     194                 :            :         // INFINITY is C99.  Oddly, it's of type "float" so sanity check in
     195                 :            :         // case it doesn't cast to double as infinity (apparently some
     196                 :            :         // implementations have this problem).
     197                 :            :         if (double(INFINITY) > HUGE_VAL) return INFINITY;
     198                 :            : #endif
     199                 :          2 :         return HUGE_VAL;
     200                 :            :     }
     201                 :            : 
     202                 :            :     // Negative infinity.
     203         [ +  + ]:        929 :     if (value.empty()) {
     204                 :            : #ifdef INFINITY
     205                 :            :         if (double(INFINITY) > HUGE_VAL) return -INFINITY;
     206                 :            : #endif
     207                 :          2 :         return -HUGE_VAL;
     208                 :            :     }
     209                 :            : 
     210                 :        927 :     unsigned char first = numfromstr(value, 0);
     211                 :        927 :     size_t i = 0;
     212                 :            : 
     213                 :        927 :     first ^= static_cast<unsigned char>(first & 0xc0) >> 1;
     214                 :        927 :     bool negative = !(first & 0x80);
     215                 :        927 :     bool exponent_negative = (first & 0x40);
     216                 :        927 :     bool explen = !(first & 0x20);
     217                 :        927 :     int exponent = first & 0x1f;
     218         [ +  + ]:        927 :     if (!explen) {
     219                 :        897 :         exponent >>= 2;
     220         [ +  + ]:        897 :         if (negative ^ exponent_negative) exponent ^= 0x07;
     221                 :            :     } else {
     222                 :         30 :         first = numfromstr(value, ++i);
     223                 :         30 :         exponent <<= 6;
     224                 :         30 :         exponent |= (first >> 2);
     225         [ +  + ]:         30 :         if (negative ^ exponent_negative) exponent ^= 0x07ff;
     226                 :            :     }
     227                 :            : 
     228                 :            :     unsigned word1;
     229                 :        927 :     word1 = (unsigned(first & 0x03) << 24);
     230                 :        927 :     word1 |= numfromstr(value, ++i) << 16;
     231                 :        927 :     word1 |= numfromstr(value, ++i) << 8;
     232                 :        927 :     word1 |= numfromstr(value, ++i);
     233                 :            : 
     234                 :        927 :     unsigned word2 = 0;
     235         [ +  + ]:        927 :     if (i < value.size()) {
     236                 :         26 :         word2 = numfromstr(value, ++i) << 24;
     237                 :         26 :         word2 |= numfromstr(value, ++i) << 16;
     238                 :         26 :         word2 |= numfromstr(value, ++i) << 8;
     239                 :         26 :         word2 |= numfromstr(value, ++i);
     240                 :            :     }
     241                 :            : 
     242         [ +  + ]:        927 :     if (negative) {
     243                 :         25 :         word1 = -word1;
     244         [ +  + ]:         25 :         if (word2 != 0) ++word1;
     245                 :         25 :         word2 = -word2;
     246         [ -  + ]:         19 :         UNITTEST_ASSERT_NOTHROW((word1 & 0xf0000000) != 0, 0);
     247                 :         25 :         word1 &= 0x03ffffff;
     248                 :            :     }
     249         [ +  + ]:        927 :     if (!negative) word1 |= 1<<26;
     250                 :            : 
     251                 :        927 :     double mantissa = 0;
     252         [ +  + ]:        927 :     if (word2) mantissa = word2 / 4294967296.0; // 1<<32
     253                 :        927 :     mantissa += word1;
     254         [ +  + ]:        927 :     mantissa /= 1 << (negative ? 26 : 27);
     255                 :            : 
     256         [ +  + ]:        927 :     if (exponent_negative) exponent = -exponent;
     257                 :        927 :     exponent += 8;
     258                 :            : 
     259         [ +  + ]:        927 :     if (negative) mantissa = -mantissa;
     260                 :            : 
     261                 :            :     // We use scalbn() since it's equivalent to ldexp() when FLT_RADIX == 2
     262                 :            :     // (which we currently assume), except that ldexp() will set errno if the
     263                 :            :     // result overflows or underflows, which isn't really desirable here.
     264                 :        927 :     return scalbn(mantissa, exponent);
     265                 :            : }

Generated by: LCOV version 1.11