@@ -256,19 +256,25 @@ bool slow_float_parsing(UNUSED const char * src, W writer) {
256256 return INVALID_NUMBER ((const uint8_t *)src);
257257}
258258
259+ template <typename I>
260+ NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
261+ really_inline bool parse_digit (const char c, I &i) {
262+ const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
263+ if (digit > 9 ) {
264+ return false ;
265+ }
266+ // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
267+ i = 10 * i + digit; // might overflow, we will handle the overflow later
268+ return true ;
269+ }
270+
259271really_inline bool parse_decimal (UNUSED const uint8_t *const src, const char *&p, uint64_t &i, int64_t &exponent) {
260272 // we continue with the fiction that we have an integer. If the
261273 // floating point number is representable as x * 10^z for some integer
262274 // z that fits in 53 bits, then we will be able to convert back the
263275 // the integer into a float in a lossless manner.
264276 const char *const first_after_period = p;
265277
266- unsigned char digit = static_cast <unsigned char >(*p - ' 0' );
267- if (digit > 9 ) { return INVALID_NUMBER (src); } // There must be at least one digit after the .
268- ++p;
269- i = i * 10 + digit; // might overflow + multiplication by 10 is likely
270- // cheaper than arbitrary mult.
271- // we will handle the overflow later
272278#ifdef SWAR_NUMBER_PARSING
273279 // this helps if we have lots of decimals!
274280 // this turns out to be frequent enough.
@@ -277,57 +283,38 @@ really_inline bool parse_decimal(UNUSED const uint8_t *const src, const char *&p
277283 p += 8 ;
278284 }
279285#endif
280- digit = static_cast <unsigned char >(*p - ' 0' );
281- while (digit <= 9 ) {
282- ++p;
283- i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
284- // because we have parse_highprecision_float later.
285- digit = static_cast <unsigned char >(*p - ' 0' );
286- }
286+ // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
287+ if (parse_digit (*p, i)) { ++p; }
288+ while (parse_digit (*p, i)) { p++; }
287289 exponent = first_after_period - p;
288- return true ;
289- }
290-
291- template <typename I>
292- really_inline bool parse_digit (const char c, I &i) {
293- const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
294- if (digit <= 9 ) {
295- // a multiplication by 10 is cheaper than an arbitrary integer
296- // multiplication
297- i = 10 * i + digit; // might overflow, we will handle the overflow later
298- return true ;
299- } else {
300- return false ;
290+ // Decimal without digits (123.) is illegal
291+ if (exponent == 0 ) {
292+ return INVALID_NUMBER (src);
301293 }
302- }
303- template <typename I>
304- really_inline bool parse_first_digit (const char c, I &i) {
305- const unsigned char digit = static_cast <unsigned char >(c - ' 0' );
306- i = digit;
307- return digit <= 9 ;
294+ return true ;
308295}
309296
310297really_inline bool parse_exponent (UNUSED const uint8_t *const src, const char *&p, int64_t &exponent) {
311- bool neg_exp = false ;
312- if (' -' == *p) {
313- neg_exp = true ;
314- ++p;
315- } else if (' +' == *p) {
316- ++p;
317- }
298+ // Exp Sign: -123.456e[-]78
299+ bool neg_exp = (' -' == *p);
300+ if (neg_exp || ' +' == *p) { p++; } // Skip + as well
301+
302+ // Exponent: -123.456e-[78]
303+ auto start_exp = p;
304+ int64_t exp_number = 0 ;
305+ while (parse_digit (*p, exp_number)) { ++p; }
306+ exponent += (neg_exp ? -exp_number : exp_number);
318307
319- // e[+-] must be followed by a number
320- int64_t exp_number;
321- if (!parse_first_digit (*p, exp_number)) { return INVALID_NUMBER (src); }
322- ++p;
323- if (parse_digit (*p, exp_number)) { ++p; }
324- if (parse_digit (*p, exp_number)) { ++p; }
325- while (parse_digit (*p, exp_number)) {
326- ++p;
327- // we need to check for overflows; we refuse to parse this
328- if (exp_number > 0x100000000 ) { return INVALID_NUMBER (src); }
308+ // If there were no digits, it's an error.
309+ // If there were more than 18 digits, we may have overflowed the integer.
310+ if (unlikely (p == start_exp || p > start_exp+18 )) {
311+ // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
312+ while (*start_exp == ' 0' ) { start_exp++; }
313+ // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
314+ // support exponents smaller than -9,999,999,999,999,999,999 and bigger
315+ // than 9,999,999,999,999,999,999.
316+ if (p == start_exp || p > start_exp+18 ) { return INVALID_NUMBER (src); }
329317 }
330- exponent += (neg_exp ? -exp_number : exp_number);
331318 return true ;
332319}
333320
0 commit comments