• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 24648292556

19 Apr 2026 10:53PM UTC coverage: 89.474% (+0.03%) from 89.442%
24648292556

push

github

web-flow
Merge pull request #5536 from randombit/jack/x509-misc

Various PKIX optimizations and bug fixes

106453 of 118977 relevant lines covered (89.47%)

11452293.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.34
/src/lib/utils/parsing.cpp
1
/*
2
* Various string utils and parsing functions
3
* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4
* (C) 2015 Simon Warta (Kullo GmbH)
5
* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6
*
7
* Botan is released under the Simplified BSD License (see license.txt)
8
*/
9

10
#include <botan/internal/parsing.h>
11

12
#include <botan/exceptn.h>
13
#include <botan/internal/fmt.h>
14
#include <botan/internal/loadstor.h>
15
#include <algorithm>
16
#include <cctype>
17
#include <limits>
18
#include <sstream>
19

20
namespace Botan {
21

22
uint16_t to_uint16(std::string_view str) {
×
23
   const uint32_t x = to_u32bit(str);
×
24

25
   if(x != static_cast<uint16_t>(x)) {
×
26
      throw Invalid_Argument("Integer value exceeds 16 bit range");
×
27
   }
28

29
   return static_cast<uint16_t>(x);
×
30
}
31

32
uint32_t to_u32bit(std::string_view str_view) {
371,387✔
33
   const std::string str(str_view);
371,387✔
34

35
   // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
36
   for(const char chr : str) {
1,148,582✔
37
      if(chr < '0' || chr > '9') {
777,502✔
38
         throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
921✔
39
      }
40
   }
41

42
   const unsigned long int x = std::stoul(str);
371,080✔
43

44
   if constexpr(sizeof(unsigned long int) > 4) {
371,080✔
45
      // x might be uint64
46
      if(x > std::numeric_limits<uint32_t>::max()) {
371,080✔
47
         throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
×
48
      }
49
   }
50

51
   return static_cast<uint32_t>(x);
371,080✔
52
}
371,387✔
53

54
/*
55
* Parse a SCAN-style algorithm name
56
*/
57
std::vector<std::string> parse_algorithm_name(std::string_view scan_name) {
18,573✔
58
   if(scan_name.find('(') == std::string::npos && scan_name.find(')') == std::string::npos) {
18,573✔
59
      return {std::string(scan_name)};
35,096✔
60
   }
61

62
   std::string name(scan_name);
1,025✔
63
   std::string substring;
1,025✔
64
   std::vector<std::string> elems;
1,025✔
65
   size_t level = 0;
1,025✔
66

67
   elems.push_back(name.substr(0, name.find('(')));
2,050✔
68
   name = name.substr(name.find('('));
1,025✔
69

70
   for(auto i = name.begin(); i != name.end(); ++i) {
4,187✔
71
      const char c = *i;
4,187✔
72

73
      if(c == '(') {
4,187✔
74
         ++level;
1,025✔
75
      }
76
      if(c == ')') {
4,187✔
77
         if(level == 1 && i == name.end() - 1) {
1,025✔
78
            if(elems.size() == 1) {
1,025✔
79
               elems.push_back(substring.substr(1));
1,630✔
80
            } else {
81
               elems.push_back(substring);
210✔
82
            }
83
            return elems;
1,025✔
84
         }
85

86
         if(level == 0 || (level == 1 && i != name.end() - 1)) {
×
87
            throw Invalid_Algorithm_Name(scan_name);
×
88
         }
89
         --level;
×
90
      }
91

92
      if(c == ',' && level == 1) {
3,162✔
93
         if(elems.size() == 1) {
210✔
94
            elems.push_back(substring.substr(1));
420✔
95
         } else {
96
            elems.push_back(substring);
×
97
         }
98
         substring.clear();
210✔
99
      } else {
100
         substring += c;
6,114✔
101
      }
102
   }
103

104
   if(!substring.empty()) {
×
105
      throw Invalid_Algorithm_Name(scan_name);
×
106
   }
107

108
   return elems;
×
109
}
36,121✔
110

111
std::vector<std::string> split_on(std::string_view str, char delim) {
213,037✔
112
   std::vector<std::string> elems;
213,037✔
113
   if(str.empty()) {
213,037✔
114
      return elems;
115
   }
116

117
   std::string substr;
212,913✔
118
   for(const char c : str) {
2,791,819✔
119
      if(c == delim) {
2,578,906✔
120
         if(!substr.empty()) {
173,097✔
121
            elems.push_back(substr);
173,058✔
122
         }
123
         substr.clear();
173,097✔
124
      } else {
125
         substr += c;
4,984,715✔
126
      }
127
   }
128

129
   if(substr.empty()) {
212,913✔
130
      throw Invalid_Argument(fmt("Unable to split string '{}", str));
2✔
131
   }
132
   elems.push_back(substr);
212,912✔
133

134
   return elems;
212,912✔
135
}
212,913✔
136

137
/*
138
* Join a string
139
*/
140
std::string string_join(const std::vector<std::string>& strs, char delim) {
19✔
141
   std::ostringstream out;
19✔
142

143
   for(size_t i = 0; i != strs.size(); ++i) {
236✔
144
      if(i != 0) {
217✔
145
         out << delim;
198✔
146
      }
147
      out << strs[i];
217✔
148
   }
149

150
   return out.str();
38✔
151
}
19✔
152

153
/*
154
* Convert a decimal-dotted string to binary IP
155
*/
156
std::optional<uint32_t> string_to_ipv4(std::string_view str) {
5,528✔
157
   // At least 3 dots + 4 1-digit integers
158
   // At most 3 dots + 4 3-digit integers
159
   if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
5,528✔
160
      return {};
1,728✔
161
   }
162

163
   // the final result
164
   uint32_t ip = 0;
3,800✔
165
   // the number of '.' seen so far
166
   size_t dots = 0;
3,800✔
167
   // accumulates one quad (range 0-255)
168
   uint32_t accum = 0;
3,800✔
169
   // # of digits pushed to accum since last dot
170
   size_t cur_digits = 0;
3,800✔
171

172
   for(const char c : str) {
4,145✔
173
      if(c == '.') {
4,120✔
174
         // . without preceding digit is invalid
175
         if(cur_digits == 0) {
122✔
176
            return {};
6✔
177
         }
178
         dots += 1;
116✔
179
         // too many dots
180
         if(dots > 3) {
116✔
181
            return {};
6✔
182
         }
183

184
         cur_digits = 0;
110✔
185
         ip = (ip << 8) | accum;
110✔
186
         accum = 0;
110✔
187
      } else if(c >= '0' && c <= '9') {
3,998✔
188
         const auto d = static_cast<uint8_t>(c - '0');
246✔
189

190
         // prohibit leading zero in quad (used for octal)
191
         if(cur_digits > 0 && accum == 0) {
246✔
192
            return {};
7✔
193
         }
194
         accum = (accum * 10) + d;
239✔
195

196
         if(accum > 255) {
239✔
197
            return {};
4✔
198
         }
199

200
         cur_digits++;
235✔
201
         BOTAN_ASSERT_NOMSG(cur_digits <= 3);
235✔
202
      } else {
203
         return {};
3,752✔
204
      }
205
   }
206

207
   // no trailing digits?
208
   if(cur_digits == 0) {
25✔
209
      return {};
×
210
   }
211

212
   // insufficient # of dots
213
   if(dots != 3) {
25✔
214
      return {};
×
215
   }
216

217
   ip = (ip << 8) | accum;
25✔
218

219
   return ip;
25✔
220
}
221

222
std::optional<std::array<uint8_t, 16>> string_to_ipv6(std::string_view str) {
345✔
223
   if(str.empty()) {
345✔
224
      return {};
1✔
225
   }
226

227
   // Parsed hex groups, split by whether they appeared before or after a "::".
228
   // If no "::" appears, only `pre` is populated and must reach exactly 8 groups.
229
   std::array<uint16_t, 8> pre{};
344✔
230
   std::array<uint16_t, 8> post{};
344✔
231
   size_t pre_count = 0;
344✔
232
   size_t post_count = 0;
344✔
233
   bool seen_double_colon = false;
344✔
234

235
   auto hex_value = [](char c) -> std::optional<uint8_t> {
1,591✔
236
      if(c >= '0' && c <= '9') {
1,247✔
237
         return c - '0';
440✔
238
      } else if(c >= 'a' && c <= 'f') {
807✔
239
         return 10 + (c - 'a');
160✔
240
      } else if(c >= 'A' && c <= 'F') {
647✔
241
         return 10 + (c - 'A');
59✔
242
      } else {
243
         return {};
588✔
244
      }
245
   };
246

247
   size_t idx = 0;
344✔
248
   bool expect_group = true;  // set after any separator, cleared after a group
344✔
249

250
   while(idx < str.size()) {
966✔
251
      if(str[idx] == ':') {
918✔
252
         if(idx + 1 < str.size() && str[idx + 1] == ':') {
285✔
253
            if(seen_double_colon) {
30✔
254
               return {};  // at most one "::"
5✔
255
            }
256
            seen_double_colon = true;
25✔
257
            idx += 2;
25✔
258
            expect_group = (idx < str.size());
25✔
259
            continue;
25✔
260
         }
261
         // single ':' separator between groups — only valid after a group
262
         if(expect_group) {
255✔
263
            return {};
3✔
264
         }
265
         expect_group = true;
252✔
266
         idx += 1;
252✔
267
         continue;
252✔
268
      }
269

270
      // Parse a hex group of 1..4 digits
271
      uint32_t group = 0;
272
      size_t hex_chars = 0;
273
      while(idx < str.size() && hex_chars < 4) {
1,289✔
274
         const auto digit = hex_value(str[idx]);
1,170✔
275
         if(digit.has_value() == false) {
1,170✔
276
            break;
277
         }
278
         group = (group << 4) | static_cast<uint32_t>(digit.value());
656✔
279
         idx += 1;
656✔
280
         hex_chars += 1;
656✔
281
      }
282
      if(hex_chars == 0) {
633✔
283
         return {};
283✔
284
      }
285
      // If a 5th hex digit follows, the group is oversized.
286
      if(hex_chars == 4 && idx < str.size() && hex_value(str[idx]).has_value()) {
350✔
287
         return {};
3✔
288
      }
289

290
      if(seen_double_colon) {
347✔
291
         if(post_count >= 8) {
24✔
292
            return {};
×
293
         }
294
         post[post_count++] = static_cast<uint16_t>(group);
24✔
295
      } else {
296
         if(pre_count >= 8) {
323✔
297
            return {};
2✔
298
         }
299
         pre[pre_count++] = static_cast<uint16_t>(group);
321✔
300
      }
301
      expect_group = false;
302
   }
303

304
   // Trailing single ':' is invalid
305
   if(expect_group) {
48✔
306
      return {};
4✔
307
   }
308

309
   const size_t total_groups = pre_count + post_count;
44✔
310
   if(seen_double_colon) {
44✔
311
      // "::" has to cover at least one zero group
312
      if(total_groups > 7) {
13✔
313
         return {};
×
314
      }
315
   } else {
316
      if(total_groups != 8) {
31✔
317
         return {};
4✔
318
      }
319
   }
320

321
   std::array<uint8_t, 16> out{};
40✔
322
   for(size_t i = 0; i != pre_count; ++i) {
269✔
323
      out[2 * i] = get_byte<0>(pre[i]);
229✔
324
      out[2 * i + 1] = get_byte<1>(pre[i]);
229✔
325
   }
326
   const size_t gap = 8 - total_groups;
40✔
327
   for(size_t i = 0; i != post_count; ++i) {
54✔
328
      const size_t target = pre_count + gap + i;
14✔
329
      out[2 * target] = get_byte<0>(post[i]);
14✔
330
      out[2 * target + 1] = get_byte<1>(post[i]);
14✔
331
   }
332
   return out;
40✔
333
}
334

335
std::string ipv6_to_string(const std::array<uint8_t, 16>& a) {
10✔
336
   static const char* hex = "0123456789abcdef";
10✔
337

338
   std::string out;
10✔
339
   out.reserve(39);
10✔
340

341
   for(size_t i = 0; i != 16; i += 2) {
90✔
342
      if(i != 0) {
80✔
343
         out.push_back(':');
70✔
344
      }
345
      const uint16_t group = make_uint16(a[i], a[i + 1]);
80✔
346
      bool started = false;
80✔
347
      // Write each nibble omitting leading 0s
348
      for(int s = 12; s >= 0; s -= 4) {
400✔
349
         const auto nibble = (group >> s) & 0xF;
320✔
350
         if(nibble != 0 || started || s == 0) {
320✔
351
            out.push_back(hex[nibble]);
155✔
352
            started = true;
353
         }
354
      }
355
   }
356
   return out;
10✔
357
}
×
358

359
/*
360
* Convert an IP address to decimal-dotted string
361
*/
362
std::string ipv4_to_string(uint32_t ip) {
20✔
363
   uint8_t bits[4];
20✔
364
   store_be(ip, bits);
20✔
365

366
   std::string str;
20✔
367

368
   for(size_t i = 0; i != 4; ++i) {
100✔
369
      if(i > 0) {
80✔
370
         str += ".";
60✔
371
      }
372
      str += std::to_string(bits[i]);
160✔
373
   }
374

375
   return str;
20✔
376
}
×
377

378
std::string tolower_string(std::string_view str) {
1,877✔
379
   std::string lower(str);
1,877✔
380
   for(char& c : lower) {
35,217✔
381
      const int cu = static_cast<unsigned char>(c);
33,340✔
382
      if(std::isalpha(cu) != 0) {
33,340✔
383
         c = static_cast<char>(std::tolower(cu));
29,490✔
384
      }
385
   }
386
   return lower;
1,877✔
387
}
388

389
bool host_wildcard_match(std::string_view issued, std::string_view host) {
260✔
390
   if(host.empty() || issued.empty()) {
260✔
391
      return false;
392
   }
393

394
   // Maximum valid DNS name
395
   if(host.size() > 253) {
257✔
396
      return false;
397
   }
398

399
   /*
400
   The wildcard if existing absorbs (host.size() - issued.size() + 1) chars,
401
   which must be non-negative. So issued cannot possibly exceed host.size() + 1.
402
   */
403
   if(issued.size() > host.size() + 1) {
257✔
404
      return false;
405
   }
406

407
   /*
408
   If there are embedded nulls in your issued name
409
   Well I feel bad for you son
410
   */
411
   if(issued.find('\0') != std::string_view::npos) {
244✔
412
      return false;
413
   }
414

415
   // '*' is not a valid character in DNS names so should not appear on the host side
416
   if(host.find('*') != std::string_view::npos) {
244✔
417
      return false;
418
   }
419

420
   // Similarly a DNS name can't end in .
421
   if(host.back() == '.') {
242✔
422
      return false;
423
   }
424

425
   // And a host can't have an empty name component, so reject that
426
   if(host.find("..") != std::string_view::npos) {
242✔
427
      return false;
428
   }
429

430
   // ASCII-only case-insensitive char equality, avoids locale overhead from tolower
431
   auto dns_char_eq = [](char a, char b) -> bool {
3,320✔
432
      if(a == b) {
3,080✔
433
         return true;
434
      }
435
      const auto la = static_cast<unsigned char>(a | 0x20);
52✔
436
      const auto lb = static_cast<unsigned char>(b | 0x20);
52✔
437
      return la == lb && la >= 'a' && la <= 'z';
52✔
438
   };
439

440
   auto dns_char_eq_range = [&](std::string_view a, std::string_view b) -> bool {
480✔
441
      if(a.size() != b.size()) {
240✔
442
         return false;
443
      }
444
      for(size_t i = 0; i != a.size(); ++i) {
2,965✔
445
         if(!dns_char_eq(a[i], b[i])) {
2,869✔
446
            return false;
447
         }
448
      }
449
      return true;
450
   };
240✔
451

452
   // Exact match: accept
453
   if(dns_char_eq_range(issued, host)) {
240✔
454
      return true;
455
   }
456

457
   // First detect offset of wildcard '*' if included
458
   const size_t first_star = issued.find('*');
58✔
459
   const bool has_wildcard = (first_star != std::string_view::npos);
58✔
460

461
   // At most one wildcard is allowed
462
   if(has_wildcard && issued.find('*', first_star + 1) != std::string_view::npos) {
58✔
463
      return false;
464
   }
465

466
   // If no * at all then not a wildcard, and so not a match
467
   if(!has_wildcard) {
52✔
468
      return false;
469
   }
470

471
   /*
472
   Now walk through the issued string, making sure every character
473
   matches. When we come to the (singular) '*', jump forward in the
474
   hostname by the corresponding amount. We know exactly how much
475
   space the wildcard takes because it must be exactly `len(host) -
476
   len(issued) + 1 chars`.
477

478
   We also verify that the '*' comes in the leftmost component, and
479
   doesn't skip over any '.' in the hostname.
480
   */
481
   size_t dots_seen = 0;
482
   size_t host_idx = 0;
483

484
   for(size_t i = 0; i != issued.size(); ++i) {
357✔
485
      if(issued[i] == '.') {
338✔
486
         dots_seen += 1;
48✔
487
      }
488

489
      if(issued[i] == '*') {
338✔
490
         // Fail: wildcard can only come in leftmost component
491
         if(dots_seen > 0) {
41✔
492
            return false;
493
         }
494

495
         /*
496
         Since there is only one * we know the tail of the issued and
497
         hostname must be an exact match. In this case advance host_idx
498
         to match.
499
         */
500
         const size_t advance = (host.size() - issued.size() + 1);
30✔
501

502
         if(host_idx + advance > host.size()) {  // shouldn't happen
30✔
503
            return false;
504
         }
505

506
         // Can't be any intervening .s that we would have skipped
507
         for(size_t k = host_idx; k != host_idx + advance; ++k) {
106✔
508
            if(host[k] == '.') {
86✔
509
               return false;
510
            }
511
         }
512

513
         host_idx += advance;
514
      } else {
515
         if(!dns_char_eq(issued[i], host[host_idx])) {
359✔
516
            return false;
517
         }
518

519
         host_idx += 1;
294✔
520
      }
521
   }
522

523
   // Wildcard issued name must have at least 3 components
524
   if(dots_seen < 2) {
19✔
525
      return false;
526
   }
527

528
   return true;
529
}
530

531
std::string check_and_canonicalize_dns_name(std::string_view name) {
9,268✔
532
   if(name.size() > 255) {
9,268✔
533
      throw Decoding_Error("DNS name exceeds maximum allowed length");
87✔
534
   }
535

536
   if(name.empty()) {
9,181✔
537
      throw Decoding_Error("DNS name cannot be empty");
4✔
538
   }
539

540
   if(name.starts_with(".") || name.ends_with(".")) {
9,177✔
541
      throw Decoding_Error("DNS name cannot start or end with a dot");
10✔
542
   }
543

544
   /*
545
   * Table mapping uppercase to lowercase and only including values for valid DNS names
546
   * namely A-Z, a-z, 0-9, hyphen, and dot, plus '*' for wildcarding. (RFC 1035)
547
   */
548
   // clang-format off
549
   constexpr uint8_t DNS_CHAR_MAPPING[128] = {
9,167✔
550
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
551
      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
552
      '\0', '\0', '\0', '\0',  '*', '\0', '\0',  '-',  '.', '\0',  '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',
553
       '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',
554
       'l',  'm',  'n',  'o',  'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0',
555
      '\0', '\0',  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',  'p',  'q',
556
       'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', '\0', '\0', '\0', '\0', '\0',
557
   };
558
   // clang-format on
559

560
   std::string canon;
9,167✔
561
   canon.reserve(name.size());
9,167✔
562

563
   // RFC 1035: DNS labels must not exceed 63 characters
564
   size_t current_label_length = 0;
565

566
   for(size_t i = 0; i != name.size(); ++i) {
121,627✔
567
      const char c = name[i];
112,807✔
568

569
      if(c == '.') {
112,807✔
570
         if(i > 0 && name[i - 1] == '.') {
9,353✔
571
            throw Decoding_Error("DNS name contains sequential period chars");
4✔
572
         }
573

574
         if(current_label_length == 0) {
9,349✔
575
            throw Decoding_Error("DNS name contains empty label");
×
576
         }
577
         current_label_length = 0;  // Reset for next label
578
      } else {
579
         current_label_length++;
103,454✔
580

581
         if(current_label_length > 63) {  // RFC 1035 Maximum DNS label length
103,454✔
582
            throw Decoding_Error("DNS name label exceeds maximum length of 63 characters");
1✔
583
         }
584
      }
585

586
      const uint8_t cu = static_cast<uint8_t>(c);
112,802✔
587
      if(cu >= 128) {
112,802✔
588
         throw Decoding_Error("DNS name must not contain any extended ASCII code points");
147✔
589
      }
590
      const uint8_t mapped = DNS_CHAR_MAPPING[cu];
112,655✔
591
      if(mapped == 0) {
112,655✔
592
         throw Decoding_Error("DNS name includes invalid character");
189✔
593
      }
594

595
      if(mapped == '-') {
112,466✔
596
         if(i == 0 || (i > 0 && name[i - 1] == '.')) {
671✔
597
            throw Decoding_Error("DNS name has label with leading hyphen");
3✔
598
         } else if(i == name.size() - 1 || (i < name.size() - 1 && name[i + 1] == '.')) {
668✔
599
            throw Decoding_Error("DNS name has label with trailing hyphen");
3✔
600
         }
601
      }
602
      canon.push_back(static_cast<char>(mapped));
112,460✔
603
   }
604

605
   if(current_label_length == 0) {
8,820✔
606
      throw Decoding_Error("DNS name contains empty label");
×
607
   }
608
   return canon;
8,820✔
609
}
347✔
610

611
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc