• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

arangodb / velocypack / 3998645281

pending completion
3998645281

Pull #148

github

GitHub
Merge b1e3c924b into 5a28b6413
Pull Request #148: use separate namespace for xxh functions

0 of 5107 relevant lines covered (0.0%)

0.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/Parser.cpp
1
////////////////////////////////////////////////////////////////////////////////
2
/// DISCLAIMER
3
///
4
/// Copyright 2014-2020 ArangoDB GmbH, Cologne, Germany
5
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
6
///
7
/// Licensed under the Apache License, Version 2.0 (the "License");
8
/// you may not use this file except in compliance with the License.
9
/// You may obtain a copy of the License at
10
///
11
///     http://www.apache.org/licenses/LICENSE-2.0
12
///
13
/// Unless required by applicable law or agreed to in writing, software
14
/// distributed under the License is distributed on an "AS IS" BASIS,
15
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
/// See the License for the specific language governing permissions and
17
/// limitations under the License.
18
///
19
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
20
///
21
/// @author Max Neunhoeffer
22
/// @author Jan Steemann
23
////////////////////////////////////////////////////////////////////////////////
24

25
#include "velocypack/velocypack-common.h"
26
#include "velocypack/Parser.h"
27
#include "velocypack/Value.h"
28
#include "velocypack/ValueType.h"
29
#include "asm-functions.h"
30

31
#include <cstdlib>
32

33
using namespace arangodb::velocypack;
34

35
// The following function does the actual parse. It gets bytes
36
// via peek, consume and reset appends the result to the Builder
37
// in *_builderPtr. Errors are reported via an exception.
38
// Behind the scenes it runs two parses, one to collect sizes and
39
// check for parse errors (scan phase) and then one to actually
40
// build the result (build phase).
41

42
ValueLength Parser::parseInternal(bool multi) {
×
43
  // skip over optional BOM
44
  if (_size >= 3 && _start[0] == 0xef && _start[1] == 0xbb &&
×
45
      _start[2] == 0xbf) {
×
46
    // found UTF-8 BOM. simply skip over it
47
    _pos += 3;
×
48
  }
49

50
  ValueLength nr = 0;
×
51
  do {
×
52
    bool haveReported = false;
×
53
    if (!_builderPtr->_stack.empty()) {
×
54
      ValueLength const tos = _builderPtr->_stack.back().startPos;
×
55
      if (_builderPtr->_start[tos] == 0x0b ||
×
56
          _builderPtr->_start[tos] == 0x14) {
×
57
        if (!_builderPtr->_keyWritten) {
×
58
          throw Exception(Exception::BuilderKeyMustBeString);
×
59
        } else {
60
          _builderPtr->_keyWritten = false;
×
61
        }
62
      } else {
63
        _builderPtr->reportAdd();
×
64
        haveReported = true;
×
65
      }
66
    }
67
    try {
68
      parseJson();
×
69
    } catch (...) {
×
70
      if (haveReported) {
×
71
        _builderPtr->cleanupAdd();
×
72
      }
73
      throw;
×
74
    }
75
    nr++;
×
76
    while (_pos < _size && isWhiteSpace(_start[_pos])) {
×
77
      ++_pos;
×
78
    }
79
    if (!multi && _pos != _size) {
×
80
      consume();  // to get error reporting right. return value intentionally
×
81
                  // not checked
82
      throw Exception(Exception::ParseError, "Expecting EOF");
×
83
    }
84
  } while (multi && _pos < _size);
×
85
  return nr;
×
86
}
87

88
// skips over all following whitespace tokens but does not consume the
89
// byte following the whitespace
90
int Parser::skipWhiteSpace(char const* err) {
×
91
  if (VELOCYPACK_UNLIKELY(_pos >= _size)) {
×
92
    throw Exception(Exception::ParseError, err);
×
93
  }
94
  uint8_t c = _start[_pos];
×
95
  if (!isWhiteSpace(c)) {
×
96
    return c;
×
97
  }
98
  if (c == ' ') {
×
99
    if (_pos + 1 >= _size) {
×
100
      _pos++;
×
101
      throw Exception(Exception::ParseError, err);
×
102
    }
103
    c = _start[_pos + 1];
×
104
    if (!isWhiteSpace(c)) {
×
105
      _pos++;
×
106
      return c;
×
107
    }
108
  }
109
  std::size_t remaining = _size - _pos;
×
110
  if (remaining >= 16) {
×
111
    std::size_t count = JSONSkipWhiteSpace(_start + _pos, remaining - 15);
×
112
    _pos += count;
×
113
  }
114
  do {
×
115
    if (!isWhiteSpace(_start[_pos])) {
×
116
      return static_cast<int>(_start[_pos]);
×
117
    }
118
    _pos++;
×
119
  } while (_pos < _size);
×
120
  throw Exception(Exception::ParseError, err);
×
121
}
122

123
void Parser::increaseNesting() {
×
124
  if (++_nesting >= options->nestingLimit) {
×
125
    throw Exception(Exception::TooDeepNesting);
×
126
  }
127
}
×
128

129
void Parser::decreaseNesting() noexcept {
×
130
  VELOCYPACK_ASSERT(_nesting > 0);
×
131
  --_nesting;
×
132
}
×
133

134
// parses a number value
135
void Parser::parseNumber() {
×
136
  std::size_t startPos = _pos;
×
137
  ParsedNumber numberValue;
×
138
  bool negative = false;
×
139
  int i = consume();
×
140
  // We know that a character is coming, and it's a number if it
141
  // starts with '-' or a digit. otherwise it's invalid
142
  if (i == '-') {
×
143
    i = getOneOrThrow("Incomplete number");
×
144
    negative = true;
×
145
  }
146
  if (i < '0' || i > '9') {
×
147
    throw Exception(Exception::ParseError, "Expecting digit");
×
148
  }
149

150
  if (i != '0') {
×
151
    unconsume();
×
152
    scanDigits(numberValue);
×
153
  }
154
  i = consume();
×
155
  if (i < 0 || (i != '.' && i != 'e' && i != 'E')) {
×
156
    if (i >= 0) {
×
157
      unconsume();
×
158
    }
159
    if (!numberValue.isInteger) {
×
160
      if (negative) {
×
161
        _builderPtr->addDouble(-numberValue.doubleValue);
×
162
      } else {
163
        _builderPtr->addDouble(numberValue.doubleValue);
×
164
      }
165
    } else if (negative) {
×
166
      if (numberValue.intValue <= static_cast<uint64_t>(INT64_MAX)) {
×
167
        _builderPtr->addInt(-static_cast<int64_t>(numberValue.intValue));
×
168
      } else if (numberValue.intValue == toUInt64(INT64_MIN)) {
×
169
        _builderPtr->addInt(INT64_MIN);
×
170
      } else {
171
        _builderPtr->addDouble(-static_cast<double>(numberValue.intValue));
×
172
      }
173
    } else {
174
      _builderPtr->addUInt(numberValue.intValue);
×
175
    }
176
    return;
×
177
  }
178

179
  double fractionalPart;
180
  if (i == '.') {
×
181
    // fraction. skip over '.'
182
    i = getOneOrThrow("Incomplete number");
×
183
    if (i < '0' || i > '9') {
×
184
      throw Exception(Exception::ParseError, "Incomplete number");
×
185
    }
186
    unconsume();
×
187
    fractionalPart = scanDigitsFractional();
×
188
    if (negative) {
×
189
      fractionalPart = -numberValue.asDouble() - fractionalPart;
×
190
    } else {
191
      fractionalPart = numberValue.asDouble() + fractionalPart;
×
192
    }
193
    i = consume();
×
194
    if (i < 0) {
×
195
      _builderPtr->addDouble(fractionalPart);
×
196
      return;
×
197
    }
198
  } else {
199
    if (negative) {
×
200
      fractionalPart = -numberValue.asDouble();
×
201
    } else {
202
      fractionalPart = numberValue.asDouble();
×
203
    }
204
  }
205
  if (i != 'e' && i != 'E') {
×
206
    unconsume();
×
207
    // use conventional atof() conversion here, to avoid precision loss
208
    // when interpreting and multiplying the single digits of the input stream
209
    // _builderPtr->addDouble(fractionalPart);
210
    _builderPtr->addDouble(
×
211
        atof(reinterpret_cast<char const*>(_start) + startPos));
×
212
    return;
×
213
  }
214
  i = getOneOrThrow("Incomplete number");
×
215
  negative = false;
×
216
  if (i == '+' || i == '-') {
×
217
    negative = (i == '-');
×
218
    i = getOneOrThrow("Incomplete number");
×
219
  }
220
  if (i < '0' || i > '9') {
×
221
    throw Exception(Exception::ParseError, "Incomplete number");
×
222
  }
223
  unconsume();
×
224
  ParsedNumber exponent;
×
225
  scanDigits(exponent);
×
226
  if (negative) {
×
227
    fractionalPart *= pow(10, -exponent.asDouble());
×
228
  } else {
229
    fractionalPart *= pow(10, exponent.asDouble());
×
230
  }
231
  if (std::isnan(fractionalPart) || !std::isfinite(fractionalPart)) {
×
232
    throw Exception(Exception::NumberOutOfRange);
×
233
  }
234
  // use conventional atof() conversion here, to avoid precision loss
235
  // when interpreting and multiplying the single digits of the input stream
236
  // _builderPtr->addDouble(fractionalPart);
237
  _builderPtr->addDouble(
×
238
      atof(reinterpret_cast<char const*>(_start) + startPos));
×
239
}
240

241
void Parser::parseString() {
×
242
  // When we get here, we have seen a " character and now want to
243
  // find the end of the string and parse the string value to its
244
  // VPack representation. We assume that the string is short and
245
  // insert 8 bytes for the length as soon as we reach 127 bytes
246
  // in the VPack representation.
247
  ValueLength const base = _builderPtr->_pos;
×
248
  _builderPtr->appendByte(0x40);  // correct this later
×
249

250
  bool large = false;          // set to true when we reach 128 bytes
×
251
  uint32_t highSurrogate = 0;  // non-zero if high-surrogate was seen
×
252

253
  while (true) {
254
    std::size_t remainder = _size - _pos;
×
255
    if (remainder >= 16) {
×
256
      _builderPtr->reserve(remainder);
×
257
      std::size_t count;
258
      // Note that the SSE4.2 accelerated string copying functions might
259
      // peek up to 15 bytes over the given end, because they use 128bit
260
      // registers. Therefore, we have to subtract 15 from remainder
261
      // to be on the safe side. Further bytes will be processed below.
262
      if (options->validateUtf8Strings) {
×
263
        count = JSONStringCopyCheckUtf8(_builderPtr->_start + _builderPtr->_pos,
×
264
                                        _start + _pos, remainder - 15);
×
265
      } else {
266
        count = JSONStringCopy(_builderPtr->_start + _builderPtr->_pos,
×
267
                               _start + _pos, remainder - 15);
×
268
      }
269
      _pos += count;
×
270
      _builderPtr->advance(count);
×
271
    }
272
    int i = getOneOrThrow("Unfinished string");
×
273
    if (!large && _builderPtr->_pos - (base + 1) > 126) {
×
274
      large = true;
×
275
      _builderPtr->reserve(8);
×
276
      ValueLength len = _builderPtr->_pos - (base + 1);
×
277
      memmove(_builderPtr->_start + base + 9, _builderPtr->_start + base + 1,
×
278
              checkOverflow(len));
279
      _builderPtr->advance(8);
×
280
    }
281
    switch (i) {
×
282
      case '"':
×
283
        ValueLength len;
284
        if (!large) {
×
285
          len = _builderPtr->_pos - (base + 1);
×
286
          _builderPtr->_start[base] = 0x40 + static_cast<uint8_t>(len);
×
287
          // String is ready
288
        } else {
289
          len = _builderPtr->_pos - (base + 9);
×
290
          _builderPtr->_start[base] = 0xbf;
×
291
          for (ValueLength i = 1; i <= 8; i++) {
×
292
            _builderPtr->_start[base + i] = len & 0xff;
×
293
            len >>= 8;
×
294
          }
295
        }
296
        return;
×
297
      case '\\':
×
298
        // Handle cases or throw error
299
        i = consume();
×
300
        if (VELOCYPACK_UNLIKELY(i < 0)) {
×
301
          throw Exception(Exception::ParseError, "Invalid escape sequence");
×
302
        }
303
        switch (i) {
304
          case '"':
×
305
          case '/':
306
          case '\\':
307
            _builderPtr->appendByte(static_cast<uint8_t>(i));
×
308
            highSurrogate = 0;
×
309
            break;
×
310
          case 'b':
×
311
            _builderPtr->appendByte('\b');
×
312
            highSurrogate = 0;
×
313
            break;
×
314
          case 'f':
×
315
            _builderPtr->appendByte('\f');
×
316
            highSurrogate = 0;
×
317
            break;
×
318
          case 'n':
×
319
            _builderPtr->appendByte('\n');
×
320
            highSurrogate = 0;
×
321
            break;
×
322
          case 'r':
×
323
            _builderPtr->appendByte('\r');
×
324
            highSurrogate = 0;
×
325
            break;
×
326
          case 't':
×
327
            _builderPtr->appendByte('\t');
×
328
            highSurrogate = 0;
×
329
            break;
×
330
          case 'u': {
×
331
            uint32_t v = 0;
×
332
            for (int j = 0; j < 4; j++) {
×
333
              i = consume();
×
334
              if (i < 0) {
×
335
                throw Exception(Exception::ParseError,
×
336
                                "Unfinished \\uXXXX escape sequence");
×
337
              }
338
              if (i >= '0' && i <= '9') {
×
339
                v = (v << 4) + i - '0';
×
340
              } else if (i >= 'a' && i <= 'f') {
×
341
                v = (v << 4) + i - 'a' + 10;
×
342
              } else if (i >= 'A' && i <= 'F') {
×
343
                v = (v << 4) + i - 'A' + 10;
×
344
              } else {
345
                throw Exception(Exception::ParseError,
×
346
                                "Illegal \\uXXXX escape sequence");
×
347
              }
348
            }
349
            if (v < 0x80) {
×
350
              _builderPtr->appendByte(static_cast<uint8_t>(v));
×
351
              highSurrogate = 0;
×
352
            } else if (v < 0x800) {
×
353
              _builderPtr->reserve(2);
×
354
              _builderPtr->appendByteUnchecked(0xc0 + (v >> 6));
×
355
              _builderPtr->appendByteUnchecked(0x80 + (v & 0x3f));
×
356
              highSurrogate = 0;
×
357
            } else if (v >= 0xdc00 && v < 0xe000 && highSurrogate != 0) {
×
358
              // Low surrogate, put the two together:
359
              v = 0x10000 + ((highSurrogate - 0xd800) << 10) + v - 0xdc00;
×
360
              _builderPtr->rollback(3);
×
361
              _builderPtr->reserve(4);
×
362
              _builderPtr->appendByteUnchecked(0xf0 + (v >> 18));
×
363
              _builderPtr->appendByteUnchecked(0x80 + ((v >> 12) & 0x3f));
×
364
              _builderPtr->appendByteUnchecked(0x80 + ((v >> 6) & 0x3f));
×
365
              _builderPtr->appendByteUnchecked(0x80 + (v & 0x3f));
×
366
              highSurrogate = 0;
×
367
            } else {
368
              if (v >= 0xd800 && v < 0xdc00) {
×
369
                // High surrogate:
370
                highSurrogate = v;
×
371
              } else {
372
                highSurrogate = 0;
×
373
              }
374
              _builderPtr->reserve(3);
×
375
              _builderPtr->appendByteUnchecked(0xe0 + (v >> 12));
×
376
              _builderPtr->appendByteUnchecked(0x80 + ((v >> 6) & 0x3f));
×
377
              _builderPtr->appendByteUnchecked(0x80 + (v & 0x3f));
×
378
            }
379
            break;
×
380
          }
381
          default:
×
382
            throw Exception(Exception::ParseError, "Invalid escape sequence");
×
383
        }
384
        break;
×
385
      default:
×
386
        if ((i & 0x80) == 0) {
×
387
          // non-UTF-8 sequence
388
          if (VELOCYPACK_UNLIKELY(i < 0x20)) {
×
389
            // control character
390
            throw Exception(Exception::UnexpectedControlCharacter);
×
391
          }
392
          highSurrogate = 0;
×
393
          _builderPtr->appendByte(static_cast<uint8_t>(i));
×
394
        } else {
395
          if (!options->validateUtf8Strings) {
×
396
            highSurrogate = 0;
×
397
            _builderPtr->appendByte(static_cast<uint8_t>(i));
×
398
          } else {
399
            // multi-byte UTF-8 sequence!
400
            int follow = 0;
×
401
            if ((i & 0xe0) == 0x80) {
×
402
              throw Exception(Exception::InvalidUtf8Sequence);
×
403
            } else if ((i & 0xe0) == 0xc0) {
×
404
              // two-byte sequence
405
              follow = 1;
×
406
            } else if ((i & 0xf0) == 0xe0) {
×
407
              // three-byte sequence
408
              follow = 2;
×
409
            } else if ((i & 0xf8) == 0xf0) {
×
410
              // four-byte sequence
411
              follow = 3;
×
412
            } else {
413
              throw Exception(Exception::InvalidUtf8Sequence);
×
414
            }
415

416
            // validate follow up characters
417
            _builderPtr->reserve(1 + follow);
×
418
            _builderPtr->appendByteUnchecked(static_cast<uint8_t>(i));
×
419
            for (int j = 0; j < follow; ++j) {
×
420
              i = getOneOrThrow("scanString: truncated UTF-8 sequence");
×
421
              if ((i & 0xc0) != 0x80) {
×
422
                throw Exception(Exception::InvalidUtf8Sequence);
×
423
              }
424
              _builderPtr->appendByteUnchecked(static_cast<uint8_t>(i));
×
425
            }
426
            highSurrogate = 0;
×
427
          }
428
        }
429
        break;
×
430
    }
431
  }
×
432
}
433

434
void Parser::parseArray() {
×
435
  _builderPtr->addArray();
×
436

437
  increaseNesting();
×
438

439
  int i = skipWhiteSpace("Expecting item or ']'");
×
440
  if (i == ']') {
×
441
    // empty array
442
    ++_pos;  // the closing ']'
×
443
    decreaseNesting();
×
444
    _builderPtr->close();
×
445
    return;
×
446
  }
447

448
  while (true) {
449
    // parse array element itself
450
    _builderPtr->reportAdd();
×
451
    parseJson();
×
452
    i = skipWhiteSpace("Expecting ',' or ']'");
×
453
    if (i == ']') {
×
454
      // end of array
455
      ++_pos;  // the closing ']'
×
456
      _builderPtr->close();
×
457
      decreaseNesting();
×
458
      return;
×
459
    }
460
    // skip over ','
461
    if (VELOCYPACK_UNLIKELY(i != ',')) {
×
462
      throw Exception(Exception::ParseError, "Expecting ',' or ']'");
×
463
    }
464
    ++_pos;  // the ','
×
465
  }
466

467
  // should never get here
468
  VELOCYPACK_ASSERT(false);
469
}
470

471
void Parser::parseObject() {
×
472
  _builderPtr->addObject();
×
473

474
  increaseNesting();
×
475
  int i = skipWhiteSpace("Expecting item or '}'");
×
476
  if (i == '}') {
×
477
    // empty object
478
    consume();  // the closing '}'. return value intentionally not checked
×
479

480
    if (_nesting != 0 || !options->keepTopLevelOpen) {
×
481
      // only close if we've not been asked to keep top level open
482
      decreaseNesting();
×
483
      _builderPtr->close();
×
484
    }
485
    return;
×
486
  }
487

488
  while (true) {
489
    // always expecting a string attribute name here
490
    if (VELOCYPACK_UNLIKELY(i != '"')) {
×
491
      throw Exception(Exception::ParseError, "Expecting '\"' or '}'");
×
492
    }
493
    // get past the initial '"'
494
    ++_pos;
×
495

496
    _builderPtr->reportAdd();
×
497
    auto const lastPos = _builderPtr->_pos;
×
498
    parseString();
×
499

500
    if (options->attributeTranslator != nullptr) {
×
501
      // check if a translation for the attribute name exists
502
      Slice key(_builderPtr->_start + lastPos);
×
503

504
      if (key.isString()) {
×
505
        uint8_t const* translated =
506
            options->attributeTranslator->translate(key.stringView());
×
507

508
        if (translated != nullptr) {
×
509
          // found translation... now reset position to old key position
510
          // and simply overwrite the existing key with the numeric translation
511
          // id
512
          _builderPtr->resetTo(lastPos);
×
513
          _builderPtr->addUInt(Slice(translated).getUInt());
×
514
        }
515
      }
516
    }
517

518
    i = skipWhiteSpace("Expecting ':'");
×
519
    // always expecting the ':' here
520
    if (VELOCYPACK_UNLIKELY(i != ':')) {
×
521
      throw Exception(Exception::ParseError, "Expecting ':'");
×
522
    }
523
    ++_pos;  // skip over the colon
×
524

525
    parseJson();
×
526

527
    i = skipWhiteSpace("Expecting ',' or '}'");
×
528
    if (i == '}') {
×
529
      // end of object
530
      ++_pos;  // the closing '}'
×
531
      if (_nesting != 1 || !options->keepTopLevelOpen) {
×
532
        // only close if we've not been asked to keep top level open
533
        _builderPtr->close();
×
534
      }
535
      decreaseNesting();
×
536
      return;
×
537
    }
538
    if (VELOCYPACK_UNLIKELY(i != ',')) {
×
539
      throw Exception(Exception::ParseError, "Expecting ',' or '}'");
×
540
    }
541
    // skip over ','
542
    ++_pos;  // the ','
×
543
    i = skipWhiteSpace("Expecting '\"' or '}'");
×
544
  }
×
545

546
  // should never get here
547
  VELOCYPACK_ASSERT(false);
548
}
549

550
void Parser::parseJson() {
×
551
  skipWhiteSpace("Expecting item");  // return value intentionally not checked
×
552

553
  int i = consume();
×
554
  if (i < 0) {
×
555
    return;
×
556
  }
557
  switch (i) {
×
558
    case '{':
×
559
      parseObject();  // this consumes the closing '}' or throws
×
560
      break;
×
561
    case '[':
×
562
      parseArray();  // this consumes the closing ']' or throws
×
563
      break;
×
564
    case 't':
×
565
      parseTrue();  // this consumes "rue" or throws
×
566
      break;
×
567
    case 'f':
×
568
      parseFalse();  // this consumes "alse" or throws
×
569
      break;
×
570
    case 'n':
×
571
      parseNull();  // this consumes "ull" or throws
×
572
      break;
×
573
    case '"':
×
574
      parseString();
×
575
      break;
×
576
    default: {
×
577
      // everything else must be a number or is invalid...
578
      // this includes '-' and '0' to '9'. scanNumber() will
579
      // throw if the input is non-numeric
580
      unconsume();
×
581
      parseNumber();  // this consumes the number or throws
×
582
      break;
×
583
    }
584
  }
585
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc