• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 27595034679

15 Jun 2026 11:31PM UTC coverage: 91.711% (+2.3%) from 89.427%
27595034679

push

github

randombit
Add volatile specifier to rdseed inline asm annotation

114099 of 124411 relevant lines covered (91.71%)

10688437.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.84
/src/lib/utils/uri/uri.cpp
1
/*
2
* (C) 2026 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/uri.h>
8

9
#include <botan/assert.h>
10
#include <botan/exceptn.h>
11
#include <botan/internal/charset.h>
12
#include <botan/internal/parsing.h>
13

14
namespace Botan {
15

16
namespace {
17

18
std::optional<uint16_t> parse_port(std::string_view s) {
56✔
19
   // RFC 3986 port is "*DIGIT" but we reject leading zeros ("host:0080")
20
   if(const auto port = parse_u16(s, /*require_canonical=*/true)) {
56✔
21
      if(*port > 0) {
45✔
22
         return port;
44✔
23
      }
24
   }
25

26
   return {};
12✔
27
}
28

29
bool is_valid_percent_escape(char c1, char c2) {
2,494✔
30
   auto is_hex_digit = [](char c) {
4,981✔
31
      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
4,981✔
32
   };
33

34
   if(!is_hex_digit(c1) || !is_hex_digit(c2)) {
2,494✔
35
      return false;
36
   }
37

38
   // Proactively reject embedded null (%00)
39
   if(c1 == '0' && c2 == '0') {
2,486✔
40
      return false;
2✔
41
   }
42

43
   return true;
44
}
45

46
bool validate_path_query_fragment(std::string_view tail) {
7,295✔
47
   /*
48
   * RFC 3986 syntax for the path/query/fragment of a URI:
49
   *
50
   *   URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
51
   *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
52
   *   segment       = *pchar
53
   *   path-abempty  = *( "/" segment )
54
   *   query         = *( pchar / "/" / "?" )
55
   *   fragment     =  *( pchar / "/" / "?" )
56
   */
57

58
   constexpr auto is_pchar_or_slash = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:@/");
7,295✔
59

60
   enum class State : uint8_t { Path, Query, Fragment };
7,295✔
61
   State state = State::Path;
7,295✔
62

63
   for(size_t i = 0; i < tail.size(); ++i) {
89,999✔
64
      const char c = tail[i];
82,740✔
65
      if(c == '%') {
82,740✔
66
         if(i + 2 >= tail.size() || !is_valid_percent_escape(tail[i + 1], tail[i + 2])) {
2,491✔
67
            return false;
68
         }
69
         i += 2;
2,484✔
70
         continue;
2,484✔
71
      }
72
      if(c == '?') {
80,249✔
73
         // First '?' transitions from path to query, any further '?' are literal
74
         if(state == State::Path) {
681✔
75
            state = State::Query;
76
         }
77
         continue;
681✔
78
      }
79
      if(c == '#') {
79,568✔
80
         // There is only one '#' fragment delimiter, second '#' is invalid
81
         if(state == State::Fragment) {
18✔
82
            return false;
83
         }
84
         state = State::Fragment;
16✔
85
         continue;
16✔
86
      }
87
      if(!is_pchar_or_slash(c)) {
79,550✔
88
         return false;
89
      }
90
   }
91
   return true;
92
}
93

94
bool validate_userinfo(std::string_view userinfo) {
263✔
95
   constexpr auto is_valid_userinfo_char = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:");
263✔
96

97
   for(size_t i = 0; i < userinfo.size(); ++i) {
526✔
98
      const char c = userinfo[i];
509✔
99
      if(c == '%') {
509✔
100
         if(i + 2 >= userinfo.size() || !is_valid_percent_escape(userinfo[i + 1], userinfo[i + 2])) {
4✔
101
            return false;
102
         }
103
         i += 2;
×
104
         continue;
×
105
      }
106
      if(!is_valid_userinfo_char(c)) {
505✔
107
         return false;
108
      }
109
   }
110
   return true;
111
}
112

113
}  // namespace
114

115
std::strong_ordering URI::operator<=>(const URI& other) const {
225✔
116
   return std::tie(m_scheme, m_authority, m_path, m_query, m_fragment) <=>
450✔
117
          std::tie(other.m_scheme, other.m_authority, other.m_path, other.m_query, other.m_fragment);
225✔
118
}
119

120
bool URI::operator==(const URI& other) const {
11✔
121
   return m_scheme == other.m_scheme && m_authority == other.m_authority && m_path == other.m_path &&
10✔
122
          m_query == other.m_query && m_fragment == other.m_fragment;
21✔
123
}
124

125
std::strong_ordering URI::Authority::operator<=>(const URI::Authority& other) const {
145✔
126
   /*
127
   Userinfo is compared without normalization; RFC 3986 6.2.2.1:
128
      When a URI uses components of the generic syntax, the component
129
      syntax equivalence rules always apply; namely, that the scheme
130
      and host are case-insensitive and therefore should be normalized
131
      to lowercase. ... The other generic syntax components are assumed
132
      to be case-sensitive unless specifically defined otherwise by the
133
      scheme.
134
   */
135
   return std::tie(m_userinfo, m_host, m_port) <=> std::tie(other.m_userinfo, other.m_host, other.m_port);
145✔
136
}
137

138
bool URI::Authority::operator==(const URI::Authority& other) const {
10✔
139
   return m_userinfo == other.m_userinfo && m_host == other.m_host && m_port == other.m_port;
18✔
140
}
141

142
//static
143
std::optional<URI> URI::parse(std::string_view raw) {
8,421✔
144
   // Empty string is not a valid URI
145
   if(raw.empty()) {
8,421✔
146
      return {};
3✔
147
   }
148

149
   // RFC 3986:
150
   // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
151
   constexpr auto is_scheme_cont_char = CharacterValidityTable::alpha_numeric_plus("+-.");
8,418✔
152

153
   const auto is_ascii_alpha = [](char c) -> bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); };
8,418✔
154

155
   // Check the first scheme character
156
   if(!is_ascii_alpha(raw.front())) {
8,418✔
157
      return {};
816✔
158
   }
159

160
   // Scan the rest of the scheme
161
   size_t i = 1;
162
   while(i < raw.size() && is_scheme_cont_char(raw[i])) {
34,599✔
163
      ++i;
26,997✔
164
   }
165
   // Scheme wasn't followed by ':' -> invalid
166
   if(i >= raw.size() || raw[i] != ':') {
7,602✔
167
      return {};
231✔
168
   }
169

170
   // Canonicalize the scheme
171
   const std::string scheme = tolower_string(raw.substr(0, i));
7,371✔
172

173
   // The scheme must be followed by "//" introducing an authority. RFC 5280
174
   // does allow including URIs without an authority ("urn:of:cat:ashes",
175
   // "mailto:root@attacker.com") but they seem like an potential footgun (for
176
   // example a rfc822 name constraint will not apply to a mailto: URL) and
177
   // without any obvious justification to support here.
178

179
   auto rest = raw.substr(i + 1);
7,371✔
180
   if(rest.size() < 2 || rest[0] != '/' || rest[1] != '/') {
7,371✔
181
      return {};
14✔
182
   }
183
   rest.remove_prefix(2);  // Strip off the '//'
7,357✔
184

185
   // Authority runs to the first '/', '?' or '#'. The remaining is `path ? query # fragment`,
186
   // which is validated against the RFC 3986 character set.
187
   const auto end = rest.find_first_of("/?#");
7,357✔
188
   const auto authority = (end == std::string_view::npos) ? rest : rest.substr(0, end);
7,357✔
189
   const auto path_query_fragment = (end == std::string_view::npos) ? std::string_view{} : rest.substr(end);
7,357✔
190

191
   // Parse and validate the authority string (hostname, IPv4, or IPv6 address)
192
   auto parsed_authority = Authority::parse(authority);
7,357✔
193
   if(!parsed_authority.has_value()) {
7,357✔
194
      return {};
62✔
195
   }
196

197
   // Validate any `path ? query # fragment` portions of the URL
198
   if(!validate_path_query_fragment(path_query_fragment)) {
7,295✔
199
      return {};
36✔
200
   }
201

202
   // Split into path / query / fragment. Validation above guarantees at most
203
   // one '#', so the first '#' is the fragment delimiter, and within the
204
   // pre-fragment portion the first '?' (if any) is the query delimiter.
205
   const auto hash = path_query_fragment.find('#');
7,259✔
206
   const auto pre_fragment =
7,259✔
207
      (hash == std::string_view::npos) ? path_query_fragment : path_query_fragment.substr(0, hash);
7,259✔
208
   std::optional<std::string> fragment;
7,259✔
209
   if(hash != std::string_view::npos) {
7,259✔
210
      fragment = std::string(path_query_fragment.substr(hash + 1));
28✔
211
   }
212

213
   const auto qmark = pre_fragment.find('?');
7,259✔
214
   const auto path = (qmark == std::string_view::npos) ? pre_fragment : pre_fragment.substr(0, qmark);
7,259✔
215
   std::optional<std::string> query;
7,259✔
216
   if(qmark != std::string_view::npos) {
7,259✔
217
      query = std::string(pre_fragment.substr(qmark + 1));
934✔
218
   }
219

220
   // Accept
221
   return URI(
51,308✔
222
      std::string(raw), scheme, std::move(*parsed_authority), std::string(path), std::move(query), std::move(fragment));
50,813✔
223
}
22,001✔
224

225
//static
226
std::optional<URI::Authority> URI::Authority::parse(std::string_view raw) {
8,394✔
227
   if(raw.empty()) {
8,394✔
228
      return {};
15✔
229
   }
230

231
   // Capture the full input now; the userinfo prefix is stripped from
232
   // `raw` below, and m_raw must reflect the original
233
   const std::string original_input(raw);
8,379✔
234

235
   /*
236
   RFC 3986
237
     userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
238

239
   Thus a unencoded '@' is not allowed inside userinfo, and the single '@' splits the
240
   username from the authority. The @ being present at all is significant; an empty
241
   userinfo ("https://@example.com/") is distinct from no userinfo at all.
242
   */
243
   std::optional<std::string> userinfo;
8,379✔
244
   const auto first_at = raw.find('@');
8,379✔
245
   if(first_at != std::string_view::npos) {
8,379✔
246
      if(raw.find('@', first_at + 1) != std::string_view::npos) {
768✔
247
         return {};
751✔
248
      }
249
      const auto userinfo_view = raw.substr(0, first_at);
263✔
250
      if(!validate_userinfo(userinfo_view)) {
263✔
251
         return {};
246✔
252
      }
253
      userinfo = std::string(userinfo_view);
34✔
254
      raw.remove_prefix(first_at + 1);
17✔
255
   }
256

257
   std::string_view host_view;
7,628✔
258
   std::string_view port_str;
7,628✔
259
   std::optional<Host> host;
7,628✔
260

261
   if(!raw.empty() && raw.front() == '[') {
7,628✔
262
      // Bracketed IPv6 literal.
263
      const auto close = raw.find(']');
13✔
264
      if(close == std::string_view::npos) {
13✔
265
         return {};
5✔
266
      }
267
      host_view = raw.substr(1, close - 1);
11✔
268
      if(host_view.empty()) {
11✔
269
         return {};
×
270
      }
271
      const auto after = raw.substr(close + 1);
11✔
272
      if(!after.empty()) {
11✔
273
         if(after.front() != ':') {
6✔
274
            return {};
×
275
         }
276
         port_str = after.substr(1);
6✔
277
      }
278
      auto ipv6 = IPv6Address::from_string(host_view);
11✔
279
      if(!ipv6.has_value()) {
11✔
280
         return {};
3✔
281
      }
282
      host = *ipv6;
16✔
283
   } else {
284
      // host[:port] with no brackets. Only one ':' is allowed (port).
285
      const auto colon = raw.find(':');
7,615✔
286
      if(colon == std::string_view::npos) {
7,615✔
287
         host_view = raw;
7,429✔
288
      } else {
289
         host_view = raw.substr(0, colon);
186✔
290
         port_str = raw.substr(colon + 1);
186✔
291

292
         // Verify the `:` char is the only one that appears
293
         if(port_str.find(':') != std::string::npos) {
186✔
294
            return {};
61✔
295
         }
296
      }
297

298
      if(host_view.empty()) {
7,554✔
299
         return {};
1✔
300
      }
301

302
      // Technically valid per RFC 3986 but likely not something we want to support
303
      if(host_view.ends_with('.')) {
7,553✔
304
         return {};
4✔
305
      }
306

307
      if(auto ipv4 = IPv4Address::from_string(host_view)) {
7,549✔
308
         host = *ipv4;
7,342✔
309
      } else if(auto dns = DNSName::from_string(host_view)) {
7,522✔
310
         host = std::move(*dns);
7,288✔
311
      } else {
312
         return {};
234✔
313
      }
7,522✔
314
   }
315

316
   std::optional<uint16_t> port;
7,323✔
317

318
   if(!port_str.empty()) {
7,323✔
319
      port = parse_port(port_str);
56✔
320
      if(!port.has_value()) {
56✔
321
         return {};
12✔
322
      }
323
   }
324

325
   return Authority(original_input, std::move(userinfo), std::move(*host), port);
21,965✔
326
}
16,007✔
327

328
std::string URI::Authority::host_to_string() const {
48✔
329
   return std::visit([](const auto& h) -> std::string { return h.to_string(); }, m_host);
96✔
330
}
331

332
URI::Authority::HostKind URI::Authority::host_kind() const {
88✔
333
   if(std::holds_alternative<DNSName>(m_host)) {
88✔
334
      return HostKind::DNS;
335
   } else if(std::holds_alternative<IPv4Address>(m_host)) {
10✔
336
      return HostKind::IPv4;
337
   } else if(std::holds_alternative<IPv6Address>(m_host)) {
5✔
338
      return HostKind::IPv6;
339
   } else {
340
      BOTAN_ASSERT_UNREACHABLE();
×
341
   }
342
}
343

344
//static
345
std::vector<URI> URI::filter_scheme(std::string_view scheme, std::span<const URI> uris) {
10✔
346
   std::vector<URI> results;
10✔
347

348
   const auto normalized_scheme = tolower_string(scheme);
10✔
349

350
   for(const auto& uri : uris) {
11✔
351
      if(uri.scheme() == normalized_scheme) {
1✔
352
         results.push_back(uri);
1✔
353
      }
354
   }
355

356
   return results;
10✔
357
}
10✔
358

359
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc