• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 26995937053

04 Jun 2026 09:38PM UTC coverage: 89.394% (-2.3%) from 91.672%
26995937053

push

github

web-flow
Merge pull request #5642 from randombit/jack/prefetch-in-ks

Improve prefetching for table based implementations

110588 of 123708 relevant lines covered (89.39%)

11056434.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.18
/src/lib/utils/uri/uri.cpp
1
/*
2
* (C) 2026 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/uri.h>
8

9
#include <botan/assert.h>
10
#include <botan/exceptn.h>
11
#include <botan/internal/charset.h>
12
#include <botan/internal/parsing.h>
13

14
namespace Botan {
15

16
namespace {
17

18
std::optional<uint16_t> parse_port(std::string_view s) {
48✔
19
   if(const auto port = parse_u16(s)) {
48✔
20
      if(*port > 0) {
42✔
21
         return port;
42✔
22
      }
23
   }
24

25
   return {};
6✔
26
}
27

28
bool is_valid_percent_escape(char c1, char c2) {
2,493✔
29
   auto is_hex_digit = [](char c) {
4,981✔
30
      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
4,981✔
31
   };
32

33
   if(!is_hex_digit(c1) || !is_hex_digit(c2)) {
2,493✔
34
      return false;
35
   }
36

37
   // Proactively reject embedded null (%00)
38
   if(c1 == '0' && c2 == '0') {
2,486✔
39
      return false;
2✔
40
   }
41

42
   return true;
43
}
44

45
bool validate_path_query_fragment(std::string_view tail) {
7,293✔
46
   /*
47
   * RFC 3986 syntax for the path/query/fragment of a URI:
48
   *
49
   *   URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
50
   *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
51
   *   segment       = *pchar
52
   *   path-abempty  = *( "/" segment )
53
   *   query         = *( pchar / "/" / "?" )
54
   *   fragment     =  *( pchar / "/" / "?" )
55
   */
56

57
   constexpr auto is_pchar_or_slash = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:@/");
7,293✔
58

59
   enum class State : uint8_t { Path, Query, Fragment };
7,293✔
60
   State state = State::Path;
7,293✔
61

62
   for(size_t i = 0; i < tail.size(); ++i) {
89,992✔
63
      const char c = tail[i];
82,735✔
64
      if(c == '%') {
82,735✔
65
         if(i + 2 >= tail.size() || !is_valid_percent_escape(tail[i + 1], tail[i + 2])) {
2,491✔
66
            return false;
67
         }
68
         i += 2;
2,484✔
69
         continue;
2,484✔
70
      }
71
      if(c == '?') {
80,244✔
72
         // First '?' transitions from path to query, any further '?' are literal
73
         if(state == State::Path) {
681✔
74
            state = State::Query;
75
         }
76
         continue;
681✔
77
      }
78
      if(c == '#') {
79,563✔
79
         // There is only one '#' fragment delimiter, second '#' is invalid
80
         if(state == State::Fragment) {
18✔
81
            return false;
82
         }
83
         state = State::Fragment;
16✔
84
         continue;
16✔
85
      }
86
      if(!is_pchar_or_slash(c)) {
79,545✔
87
         return false;
88
      }
89
   }
90
   return true;
91
}
92

93
bool validate_userinfo(std::string_view userinfo) {
252✔
94
   constexpr auto is_valid_userinfo_char = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:");
252✔
95

96
   for(size_t i = 0; i < userinfo.size(); ++i) {
466✔
97
      const char c = userinfo[i];
450✔
98
      if(c == '%') {
450✔
99
         if(i + 2 >= userinfo.size() || !is_valid_percent_escape(userinfo[i + 1], userinfo[i + 2])) {
3✔
100
            return false;
101
         }
102
         i += 2;
×
103
         continue;
×
104
      }
105
      if(!is_valid_userinfo_char(c)) {
447✔
106
         return false;
107
      }
108
   }
109
   return true;
110
}
111

112
}  // namespace
113

114
std::strong_ordering URI::operator<=>(const URI& other) const {
225✔
115
   return std::tie(m_scheme, m_authority, m_path, m_query, m_fragment) <=>
450✔
116
          std::tie(other.m_scheme, other.m_authority, other.m_path, other.m_query, other.m_fragment);
225✔
117
}
118

119
bool URI::operator==(const URI& other) const {
11✔
120
   return m_scheme == other.m_scheme && m_authority == other.m_authority && m_path == other.m_path &&
10✔
121
          m_query == other.m_query && m_fragment == other.m_fragment;
21✔
122
}
123

124
std::strong_ordering URI::Authority::operator<=>(const URI::Authority& other) const {
145✔
125
   /*
126
   Userinfo is compared without normalization; RFC 3986 6.2.2.1:
127
      When a URI uses components of the generic syntax, the component
128
      syntax equivalence rules always apply; namely, that the scheme
129
      and host are case-insensitive and therefore should be normalized
130
      to lowercase. ... The other generic syntax components are assumed
131
      to be case-sensitive unless specifically defined otherwise by the
132
      scheme.
133
   */
134
   return std::tie(m_userinfo, m_host, m_port) <=> std::tie(other.m_userinfo, other.m_host, other.m_port);
145✔
135
}
136

137
bool URI::Authority::operator==(const URI::Authority& other) const {
10✔
138
   return m_userinfo == other.m_userinfo && m_host == other.m_host && m_port == other.m_port;
18✔
139
}
140

141
//static
142
std::optional<URI> URI::parse(std::string_view raw) {
8,417✔
143
   // Empty string is not a valid URI
144
   if(raw.empty()) {
8,417✔
145
      return {};
3✔
146
   }
147

148
   // RFC 3986:
149
   // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
150
   constexpr auto is_scheme_cont_char = CharacterValidityTable::alpha_numeric_plus("+-.");
8,414✔
151

152
   const auto is_ascii_alpha = [](char c) -> bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); };
8,414✔
153

154
   // Check the first scheme character
155
   if(!is_ascii_alpha(raw.front())) {
8,414✔
156
      return {};
824✔
157
   }
158

159
   // Scan the rest of the scheme
160
   size_t i = 1;
161
   while(i < raw.size() && is_scheme_cont_char(raw[i])) {
34,601✔
162
      ++i;
27,011✔
163
   }
164
   // Scheme wasn't followed by ':' -> invalid
165
   if(i >= raw.size() || raw[i] != ':') {
7,590✔
166
      return {};
223✔
167
   }
168

169
   // Canonicalize the scheme
170
   const std::string scheme = tolower_string(raw.substr(0, i));
7,367✔
171

172
   // The scheme must be followed by "//" introducing an authority. RFC 5280
173
   // does allow including URIs without an authority ("urn:of:cat:ashes",
174
   // "mailto:root@attacker.com") but they seem like an potential footgun (for
175
   // example a rfc822 name constraint will not apply to a mailto: URL) and
176
   // without any obvious justification to support here.
177

178
   auto rest = raw.substr(i + 1);
7,367✔
179
   if(rest.size() < 2 || rest[0] != '/' || rest[1] != '/') {
7,367✔
180
      return {};
14✔
181
   }
182
   rest.remove_prefix(2);  // Strip off the '//'
7,353✔
183

184
   // Authority runs to the first '/', '?' or '#'. The remaining is `path ? query # fragment`,
185
   // which is validated against the RFC 3986 character set.
186
   const auto end = rest.find_first_of("/?#");
7,353✔
187
   const auto authority = (end == std::string_view::npos) ? rest : rest.substr(0, end);
7,353✔
188
   const auto path_query_fragment = (end == std::string_view::npos) ? std::string_view{} : rest.substr(end);
7,353✔
189

190
   // Parse and validate the authority string (hostname, IPv4, or IPv6 address)
191
   auto parsed_authority = Authority::parse(authority);
7,353✔
192
   if(!parsed_authority.has_value()) {
7,353✔
193
      return {};
60✔
194
   }
195

196
   // Validate any `path ? query # fragment` portions of the URL
197
   if(!validate_path_query_fragment(path_query_fragment)) {
7,293✔
198
      return {};
36✔
199
   }
200

201
   // Split into path / query / fragment. Validation above guarantees at most
202
   // one '#', so the first '#' is the fragment delimiter, and within the
203
   // pre-fragment portion the first '?' (if any) is the query delimiter.
204
   const auto hash = path_query_fragment.find('#');
7,257✔
205
   const auto pre_fragment =
7,257✔
206
      (hash == std::string_view::npos) ? path_query_fragment : path_query_fragment.substr(0, hash);
7,257✔
207
   std::optional<std::string> fragment;
7,257✔
208
   if(hash != std::string_view::npos) {
7,257✔
209
      fragment = std::string(path_query_fragment.substr(hash + 1));
28✔
210
   }
211

212
   const auto qmark = pre_fragment.find('?');
7,257✔
213
   const auto path = (qmark == std::string_view::npos) ? pre_fragment : pre_fragment.substr(0, qmark);
7,257✔
214
   std::optional<std::string> query;
7,257✔
215
   if(qmark != std::string_view::npos) {
7,257✔
216
      query = std::string(pre_fragment.substr(qmark + 1));
934✔
217
   }
218

219
   // Accept
220
   return URI(
51,294✔
221
      std::string(raw), scheme, std::move(*parsed_authority), std::string(path), std::move(query), std::move(fragment));
50,799✔
222
}
21,991✔
223

224
//static
225
std::optional<URI::Authority> URI::Authority::parse(std::string_view raw) {
8,383✔
226
   if(raw.empty()) {
8,383✔
227
      return {};
15✔
228
   }
229

230
   /*
231
   RFC 3986
232
     userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
233

234
   Thus a unencoded '@' is not allowed inside userinfo, and the single '@' splits the
235
   username from the authority. The @ being present at all is significant; an empty
236
   userinfo ("https://@example.com/") is distinct from no userinfo at all.
237
   */
238
   std::optional<std::string> userinfo;
8,368✔
239
   const auto first_at = raw.find('@');
8,368✔
240
   if(first_at != std::string_view::npos) {
8,368✔
241
      if(raw.find('@', first_at + 1) != std::string_view::npos) {
772✔
242
         return {};
756✔
243
      }
244
      const auto userinfo_view = raw.substr(0, first_at);
252✔
245
      if(!validate_userinfo(userinfo_view)) {
252✔
246
         return {};
236✔
247
      }
248
      userinfo = std::string(userinfo_view);
32✔
249
      raw.remove_prefix(first_at + 1);
16✔
250
   }
251

252
   std::string_view host_view;
7,612✔
253
   std::string_view port_str;
7,612✔
254
   std::optional<Host> host;
7,612✔
255

256
   if(!raw.empty() && raw.front() == '[') {
7,612✔
257
      // Bracketed IPv6 literal.
258
      const auto close = raw.find(']');
11✔
259
      if(close == std::string_view::npos) {
11✔
260
         return {};
4✔
261
      }
262
      host_view = raw.substr(1, close - 1);
10✔
263
      if(host_view.empty()) {
10✔
264
         return {};
×
265
      }
266
      const auto after = raw.substr(close + 1);
10✔
267
      if(!after.empty()) {
10✔
268
         if(after.front() != ':') {
5✔
269
            return {};
×
270
         }
271
         port_str = after.substr(1);
5✔
272
      }
273
      auto ipv6 = IPv6Address::from_string(host_view);
10✔
274
      if(!ipv6.has_value()) {
10✔
275
         return {};
3✔
276
      }
277
      host = *ipv6;
14✔
278
   } else {
279
      // host[:port] with no brackets. Only one ':' is allowed (port).
280
      const auto colon = raw.find(':');
7,601✔
281
      if(colon == std::string_view::npos) {
7,601✔
282
         host_view = raw;
7,434✔
283
      } else {
284
         host_view = raw.substr(0, colon);
167✔
285
         port_str = raw.substr(colon + 1);
167✔
286

287
         // Verify the `:` char is the only one that appears
288
         if(port_str.find(':') != std::string::npos) {
167✔
289
            return {};
63✔
290
         }
291
      }
292

293
      if(host_view.empty()) {
7,538✔
294
         return {};
×
295
      }
296

297
      // Technically valid per RFC 3986 but likely not something we want to support
298
      if(host_view.ends_with('.')) {
7,538✔
299
         return {};
4✔
300
      }
301

302
      if(auto ipv4 = IPv4Address::from_string(host_view)) {
7,534✔
303
         host = *ipv4;
7,331✔
304
      } else if(auto dns = DNSName::from_string(host_view)) {
7,508✔
305
         host = std::move(*dns);
7,279✔
306
      } else {
307
         return {};
229✔
308
      }
7,508✔
309
   }
310

311
   std::optional<uint16_t> port;
7,312✔
312

313
   if(!port_str.empty()) {
7,312✔
314
      port = parse_port(port_str);
48✔
315
      if(!port.has_value()) {
48✔
316
         return {};
6✔
317
      }
318
   }
319

320
   return Authority(std::string(raw), std::move(userinfo), std::move(*host), port);
21,942✔
321
}
16,028✔
322

323
std::string URI::Authority::host_to_string() const {
46✔
324
   return std::visit([](const auto& h) -> std::string { return h.to_string(); }, m_host);
92✔
325
}
326

327
URI::Authority::HostKind URI::Authority::host_kind() const {
84✔
328
   if(std::holds_alternative<DNSName>(m_host)) {
84✔
329
      return HostKind::DNS;
330
   } else if(std::holds_alternative<IPv4Address>(m_host)) {
10✔
331
      return HostKind::IPv4;
332
   } else if(std::holds_alternative<IPv6Address>(m_host)) {
5✔
333
      return HostKind::IPv6;
334
   } else {
335
      BOTAN_ASSERT_UNREACHABLE();
×
336
   }
337
}
338

339
//static
340
std::vector<URI> URI::filter_scheme(std::string_view scheme, std::span<const URI> uris) {
10✔
341
   std::vector<URI> results;
10✔
342

343
   const auto normalized_scheme = tolower_string(scheme);
10✔
344

345
   for(const auto& uri : uris) {
11✔
346
      if(uri.scheme() == normalized_scheme) {
1✔
347
         results.push_back(uri);
1✔
348
      }
349
   }
350

351
   return results;
10✔
352
}
10✔
353

354
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc