• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 26352126234

23 May 2026 07:15PM UTC coverage: 89.383%. Remained the same
26352126234

push

github

randombit
Bump version to 3.13.0-pre

109795 of 122836 relevant lines covered (89.38%)

11207026.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.39
/src/lib/utils/uri/uri.cpp
1
/*
2
* (C) 2026 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/uri.h>
8

9
#include <botan/assert.h>
10
#include <botan/exceptn.h>
11
#include <botan/internal/charset.h>
12
#include <botan/internal/parsing.h>
13

14
namespace Botan {
15

16
namespace {
17

18
std::optional<uint16_t> parse_port(std::string_view s) {
48✔
19
   const auto digit_from_ascii = [](char c) -> std::optional<uint32_t> {
237✔
20
      if(c >= '0' && c <= '9') {
189✔
21
         return c - '0';
185✔
22
      } else {
23
         return {};
24
      }
25
   };
26

27
   if(s.empty() || s.size() > 5) {
48✔
28
      return {};
×
29
   }
30

31
   uint32_t port = 0;
48✔
32

33
   for(const char c : s) {
233✔
34
      if(auto digit = digit_from_ascii(c)) {
374✔
35
         // Integer overflow impossible here since we checked max length of s earlier
36
         port = port * 10 + *digit;
185✔
37
      } else {
38
         return {};
4✔
39
      }
40
   }
41

42
   if(port == 0 || port >= 65536) {
44✔
43
      return {};
2✔
44
   }
45

46
   return static_cast<uint16_t>(port);
42✔
47
}
48

49
bool is_valid_percent_escape(char c1, char c2) {
2,457✔
50
   auto is_hex_digit = [](char c) {
4,908✔
51
      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
4,908✔
52
   };
53

54
   if(!is_hex_digit(c1) || !is_hex_digit(c2)) {
2,457✔
55
      return false;
56
   }
57

58
   // Proactively reject embedded null (%00)
59
   if(c1 == '0' && c2 == '0') {
2,450✔
60
      return false;
2✔
61
   }
62

63
   return true;
64
}
65

66
bool validate_path_query_fragment(std::string_view tail) {
7,274✔
67
   /*
68
   * RFC 3986 syntax for the path/query/fragment of a URI:
69
   *
70
   *   URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
71
   *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
72
   *   segment       = *pchar
73
   *   path-abempty  = *( "/" segment )
74
   *   query         = *( pchar / "/" / "?" )
75
   *   fragment     =  *( pchar / "/" / "?" )
76
   */
77

78
   constexpr auto is_pchar_or_slash = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:@/");
7,274✔
79

80
   enum class State : uint8_t { Path, Query, Fragment };
7,274✔
81
   State state = State::Path;
7,274✔
82

83
   for(size_t i = 0; i < tail.size(); ++i) {
89,133✔
84
      const char c = tail[i];
81,895✔
85
      if(c == '%') {
81,895✔
86
         if(i + 2 >= tail.size() || !is_valid_percent_escape(tail[i + 1], tail[i + 2])) {
2,455✔
87
            return false;
88
         }
89
         i += 2;
2,448✔
90
         continue;
2,448✔
91
      }
92
      if(c == '?') {
79,440✔
93
         // First '?' transitions from path to query, any further '?' are literal
94
         if(state == State::Path) {
665✔
95
            state = State::Query;
96
         }
97
         continue;
665✔
98
      }
99
      if(c == '#') {
78,775✔
100
         // There is only one '#' fragment delimiter, second '#' is invalid
101
         if(state == State::Fragment) {
18✔
102
            return false;
103
         }
104
         state = State::Fragment;
16✔
105
         continue;
16✔
106
      }
107
      if(!is_pchar_or_slash(c)) {
78,757✔
108
         return false;
109
      }
110
   }
111
   return true;
112
}
113

114
bool validate_userinfo(std::string_view userinfo) {
250✔
115
   constexpr auto is_valid_userinfo_char = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:");
250✔
116

117
   for(size_t i = 0; i < userinfo.size(); ++i) {
469✔
118
      const char c = userinfo[i];
457✔
119
      if(c == '%') {
457✔
120
         if(i + 2 >= userinfo.size() || !is_valid_percent_escape(userinfo[i + 1], userinfo[i + 2])) {
3✔
121
            return false;
122
         }
123
         i += 2;
×
124
         continue;
×
125
      }
126
      if(!is_valid_userinfo_char(c)) {
454✔
127
         return false;
128
      }
129
   }
130
   return true;
131
}
132

133
}  // namespace
134

135
std::strong_ordering URI::operator<=>(const URI& other) const {
217✔
136
   return std::tie(m_scheme, m_authority, m_path, m_query, m_fragment) <=>
434✔
137
          std::tie(other.m_scheme, other.m_authority, other.m_path, other.m_query, other.m_fragment);
217✔
138
}
139

140
bool URI::operator==(const URI& other) const {
11✔
141
   return m_scheme == other.m_scheme && m_authority == other.m_authority && m_path == other.m_path &&
10✔
142
          m_query == other.m_query && m_fragment == other.m_fragment;
21✔
143
}
144

145
std::strong_ordering URI::Authority::operator<=>(const URI::Authority& other) const {
145✔
146
   /*
147
   Userinfo is compared without normalization; RFC 3986 6.2.2.1:
148
      When a URI uses components of the generic syntax, the component
149
      syntax equivalence rules always apply; namely, that the scheme
150
      and host are case-insensitive and therefore should be normalized
151
      to lowercase. ... The other generic syntax components are assumed
152
      to be case-sensitive unless specifically defined otherwise by the
153
      scheme.
154
   */
155
   return std::tie(m_userinfo, m_host, m_port) <=> std::tie(other.m_userinfo, other.m_host, other.m_port);
145✔
156
}
157

158
bool URI::Authority::operator==(const URI::Authority& other) const {
10✔
159
   return m_userinfo == other.m_userinfo && m_host == other.m_host && m_port == other.m_port;
18✔
160
}
161

162
//static
163
std::optional<URI> URI::parse(std::string_view raw) {
8,398✔
164
   // Empty string is not a valid URI
165
   if(raw.empty()) {
8,398✔
166
      return {};
3✔
167
   }
168

169
   // RFC 3986:
170
   // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
171
   constexpr auto is_scheme_cont_char = CharacterValidityTable::alpha_numeric_plus("+-.");
8,395✔
172

173
   const auto is_ascii_alpha = [](char c) -> bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); };
8,395✔
174

175
   // Check the first scheme character
176
   if(!is_ascii_alpha(raw.front())) {
8,395✔
177
      return {};
831✔
178
   }
179

180
   // Scan the rest of the scheme
181
   size_t i = 1;
182
   while(i < raw.size() && is_scheme_cont_char(raw[i])) {
34,482✔
183
      ++i;
26,918✔
184
   }
185
   // Scheme wasn't followed by ':' -> invalid
186
   if(i >= raw.size() || raw[i] != ':') {
7,564✔
187
      return {};
215✔
188
   }
189

190
   // Canonicalize the scheme
191
   const std::string scheme = tolower_string(raw.substr(0, i));
7,349✔
192

193
   // The scheme must be followed by "//" introducing an authority. RFC 5280
194
   // does allow including URIs without an authority ("urn:of:cat:ashes",
195
   // "mailto:root@attacker.com") but they seem like an potential footgun (for
196
   // example a rfc822 name constraint will not apply to a mailto: URL) and
197
   // without any obvious justification to support here.
198

199
   auto rest = raw.substr(i + 1);
7,349✔
200
   if(rest.size() < 2 || rest[0] != '/' || rest[1] != '/') {
7,349✔
201
      return {};
15✔
202
   }
203
   rest.remove_prefix(2);  // Strip off the '//'
7,334✔
204

205
   // Authority runs to the first '/', '?' or '#'. The remaining is `path ? query # fragment`,
206
   // which is validated against the RFC 3986 character set.
207
   const auto end = rest.find_first_of("/?#");
7,334✔
208
   const auto authority = (end == std::string_view::npos) ? rest : rest.substr(0, end);
7,334✔
209
   const auto path_query_fragment = (end == std::string_view::npos) ? std::string_view{} : rest.substr(end);
7,334✔
210

211
   // Parse and validate the authority string (hostname, IPv4, or IPv6 address)
212
   auto parsed_authority = Authority::parse(authority);
7,334✔
213
   if(!parsed_authority.has_value()) {
7,334✔
214
      return {};
60✔
215
   }
216

217
   // Validate any `path ? query # fragment` portions of the URL
218
   if(!validate_path_query_fragment(path_query_fragment)) {
7,274✔
219
      return {};
36✔
220
   }
221

222
   // Split into path / query / fragment. Validation above guarantees at most
223
   // one '#', so the first '#' is the fragment delimiter, and within the
224
   // pre-fragment portion the first '?' (if any) is the query delimiter.
225
   const auto hash = path_query_fragment.find('#');
7,238✔
226
   const auto pre_fragment =
7,238✔
227
      (hash == std::string_view::npos) ? path_query_fragment : path_query_fragment.substr(0, hash);
7,238✔
228
   std::optional<std::string> fragment;
7,238✔
229
   if(hash != std::string_view::npos) {
7,238✔
230
      fragment = std::string(path_query_fragment.substr(hash + 1));
28✔
231
   }
232

233
   const auto qmark = pre_fragment.find('?');
7,238✔
234
   const auto path = (qmark == std::string_view::npos) ? pre_fragment : pre_fragment.substr(0, qmark);
7,238✔
235
   std::optional<std::string> query;
7,238✔
236
   if(qmark != std::string_view::npos) {
7,238✔
237
      query = std::string(pre_fragment.substr(qmark + 1));
918✔
238
   }
239

240
   // Accept
241
   return URI(
51,153✔
242
      std::string(raw), scheme, std::move(*parsed_authority), std::string(path), std::move(query), std::move(fragment));
50,666✔
243
}
21,935✔
244

245
//static
246
std::optional<URI::Authority> URI::Authority::parse(std::string_view raw) {
8,364✔
247
   if(raw.empty()) {
8,364✔
248
      return {};
15✔
249
   }
250

251
   /*
252
   RFC 3986
253
     userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
254

255
   Thus a unencoded '@' is not allowed inside userinfo, and the single '@' splits the
256
   username from the authority. The @ being present at all is significant; an empty
257
   userinfo ("https://@example.com/") is distinct from no userinfo at all.
258
   */
259
   std::optional<std::string> userinfo;
8,349✔
260
   const auto first_at = raw.find('@');
8,349✔
261
   if(first_at != std::string_view::npos) {
8,349✔
262
      if(raw.find('@', first_at + 1) != std::string_view::npos) {
768✔
263
         return {};
756✔
264
      }
265
      const auto userinfo_view = raw.substr(0, first_at);
250✔
266
      if(!validate_userinfo(userinfo_view)) {
250✔
267
         return {};
238✔
268
      }
269
      userinfo = std::string(userinfo_view);
24✔
270
      raw.remove_prefix(first_at + 1);
12✔
271
   }
272

273
   std::string_view host_view;
7,593✔
274
   std::string_view port_str;
7,593✔
275
   std::optional<Host> host;
7,593✔
276

277
   if(!raw.empty() && raw.front() == '[') {
7,593✔
278
      // Bracketed IPv6 literal.
279
      const auto close = raw.find(']');
11✔
280
      if(close == std::string_view::npos) {
11✔
281
         return {};
4✔
282
      }
283
      host_view = raw.substr(1, close - 1);
10✔
284
      if(host_view.empty()) {
10✔
285
         return {};
×
286
      }
287
      const auto after = raw.substr(close + 1);
10✔
288
      if(!after.empty()) {
10✔
289
         if(after.front() != ':') {
5✔
290
            return {};
×
291
         }
292
         port_str = after.substr(1);
5✔
293
      }
294
      auto ipv6 = IPv6Address::from_string(host_view);
10✔
295
      if(!ipv6.has_value()) {
10✔
296
         return {};
3✔
297
      }
298
      host = *ipv6;
14✔
299
   } else {
300
      // host[:port] with no brackets. Only one ':' is allowed (port).
301
      const auto colon = raw.find(':');
7,582✔
302
      if(colon == std::string_view::npos) {
7,582✔
303
         host_view = raw;
7,411✔
304
      } else {
305
         host_view = raw.substr(0, colon);
171✔
306
         port_str = raw.substr(colon + 1);
171✔
307

308
         // Verify the `:` char is the only one that appears
309
         if(port_str.find(':') != std::string::npos) {
171✔
310
            return {};
53✔
311
         }
312
      }
313

314
      if(host_view.empty()) {
7,529✔
315
         return {};
1✔
316
      }
317

318
      // Technically valid per RFC 3986 but likely not something we want to support
319
      if(host_view.ends_with('.')) {
7,528✔
320
         return {};
4✔
321
      }
322

323
      if(auto ipv4 = IPv4Address::from_string(host_view)) {
7,524✔
324
         host = *ipv4;
7,312✔
325
      } else if(auto dns = DNSName::from_string(host_view)) {
7,498✔
326
         host = std::move(*dns);
7,260✔
327
      } else {
328
         return {};
238✔
329
      }
7,498✔
330
   }
331

332
   std::optional<uint16_t> port;
7,293✔
333

334
   if(!port_str.empty()) {
7,293✔
335
      port = parse_port(port_str);
48✔
336
      if(!port.has_value()) {
48✔
337
         return {};
6✔
338
      }
339
   }
340

341
   return Authority(std::string(raw), std::move(userinfo), std::move(*host), port);
21,885✔
342
}
15,978✔
343

344
std::string URI::Authority::host_to_string() const {
46✔
345
   return std::visit([](const auto& h) -> std::string { return h.to_string(); }, m_host);
92✔
346
}
347

348
URI::Authority::HostKind URI::Authority::host_kind() const {
84✔
349
   if(std::holds_alternative<DNSName>(m_host)) {
84✔
350
      return HostKind::DNS;
351
   } else if(std::holds_alternative<IPv4Address>(m_host)) {
10✔
352
      return HostKind::IPv4;
353
   } else if(std::holds_alternative<IPv6Address>(m_host)) {
5✔
354
      return HostKind::IPv6;
355
   } else {
356
      BOTAN_ASSERT_UNREACHABLE();
×
357
   }
358
}
359

360
//static
361
std::vector<URI> URI::filter_scheme(std::string_view scheme, std::span<const URI> uris) {
10✔
362
   std::vector<URI> results;
10✔
363

364
   const auto normalized_scheme = tolower_string(scheme);
10✔
365

366
   for(const auto& uri : uris) {
11✔
367
      if(uri.scheme() == normalized_scheme) {
1✔
368
         results.push_back(uri);
1✔
369
      }
370
   }
371

372
   return results;
10✔
373
}
10✔
374

375
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc