• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 28311706783

27 Jun 2026 04:51PM UTC coverage: 89.346% (-0.006%) from 89.352%
28311706783

push

github

web-flow
Merge pull request #5703 from randombit/jack/alg-id-param-validation

Validate AlgorithmIdentifier parameters on decode

112110 of 125479 relevant lines covered (89.35%)

10963156.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.59
/src/lib/utils/uri/uri.cpp
1
/*
2
* (C) 2026 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/uri.h>
8

9
#include <botan/assert.h>
10
#include <botan/exceptn.h>
11
#include <botan/internal/charset.h>
12
#include <botan/internal/parsing.h>
13

14
namespace Botan {
15

16
namespace {
17

18
std::optional<uint16_t> parse_port(std::string_view s) {
56✔
19
   // RFC 3986 port is "*DIGIT" but we reject leading zeros ("host:0080")
20
   if(const auto port = parse_u16(s, /*require_canonical=*/true)) {
56✔
21
      if(*port > 0) {
45✔
22
         return port;
44✔
23
      }
24
   }
25

26
   return {};
12✔
27
}
28

29
bool is_valid_percent_escape(char c1, char c2) {
2,563✔
30
   auto is_hex_digit = [](char c) {
5,119✔
31
      return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
5,119✔
32
   };
33

34
   if(!is_hex_digit(c1) || !is_hex_digit(c2)) {
2,563✔
35
      return false;
36
   }
37

38
   // Proactively reject embedded null (%00)
39
   if(c1 == '0' && c2 == '0') {
2,555✔
40
      return false;
2✔
41
   }
42

43
   return true;
44
}
45

46
bool validate_path_query_fragment(std::string_view tail) {
7,116✔
47
   /*
48
   * RFC 3986 syntax for the path/query/fragment of a URI:
49
   *
50
   *   URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
51
   *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
52
   *   segment       = *pchar
53
   *   path-abempty  = *( "/" segment )
54
   *   query         = *( pchar / "/" / "?" )
55
   *   fragment     =  *( pchar / "/" / "?" )
56
   */
57

58
   constexpr auto is_pchar_or_slash = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:@/");
7,116✔
59

60
   enum class State : uint8_t { Path, Query, Fragment };
7,116✔
61
   State state = State::Path;
7,116✔
62

63
   for(size_t i = 0; i < tail.size(); ++i) {
90,479✔
64
      const char c = tail[i];
83,405✔
65
      if(c == '%') {
83,405✔
66
         if(i + 2 >= tail.size() || !is_valid_percent_escape(tail[i + 1], tail[i + 2])) {
2,560✔
67
            return false;
68
         }
69
         i += 2;
2,553✔
70
         continue;
2,553✔
71
      }
72
      if(c == '?') {
80,845✔
73
         // First '?' transitions from path to query, any further '?' are literal
74
         if(state == State::Path) {
730✔
75
            state = State::Query;
76
         }
77
         continue;
730✔
78
      }
79
      if(c == '#') {
80,115✔
80
         // There is only one '#' fragment delimiter, second '#' is invalid
81
         if(state == State::Fragment) {
19✔
82
            return false;
83
         }
84
         state = State::Fragment;
17✔
85
         continue;
17✔
86
      }
87
      if(!is_pchar_or_slash(c)) {
80,096✔
88
         return false;
89
      }
90
   }
91
   return true;
92
}
93

94
bool validate_userinfo(std::string_view userinfo) {
263✔
95
   constexpr auto is_valid_userinfo_char = CharacterValidityTable::alpha_numeric_plus("-._~!$&'()*+,;=:");
263✔
96

97
   for(size_t i = 0; i < userinfo.size(); ++i) {
508✔
98
      const char c = userinfo[i];
491✔
99
      if(c == '%') {
491✔
100
         if(i + 2 >= userinfo.size() || !is_valid_percent_escape(userinfo[i + 1], userinfo[i + 2])) {
4✔
101
            return false;
102
         }
103
         i += 2;
×
104
         continue;
×
105
      }
106
      if(!is_valid_userinfo_char(c)) {
487✔
107
         return false;
108
      }
109
   }
110
   return true;
111
}
112

113
}  // namespace
114

115
std::strong_ordering URI::operator<=>(const URI& other) const {
272✔
116
   const bool has_authority = raw_authority().has_value();
272✔
117
   const bool other_has_authority = other.raw_authority().has_value();
272✔
118

119
   return std::tie(m_scheme, has_authority, m_authority, m_path, m_query, m_fragment) <=>
544✔
120
          std::tie(
272✔
121
             other.m_scheme, other_has_authority, other.m_authority, other.m_path, other.m_query, other.m_fragment);
272✔
122
}
123

124
bool URI::operator==(const URI& other) const {
13✔
125
   return m_scheme == other.m_scheme && raw_authority().has_value() == other.raw_authority().has_value() &&
12✔
126
          m_authority == other.m_authority && m_path == other.m_path && m_query == other.m_query &&
31✔
127
          m_fragment == other.m_fragment;
5✔
128
}
129

130
std::optional<std::string_view> URI::raw_authority() const {
591✔
131
   const auto colon = m_raw.find(':');
591✔
132
   BOTAN_ASSERT_NOMSG(colon != std::string::npos);
591✔
133

134
   const size_t rest_offset = colon + 1;
591✔
135
   if(m_raw.size() < rest_offset + 2 || m_raw[rest_offset] != '/' || m_raw[rest_offset + 1] != '/') {
591✔
136
      return std::nullopt;
22✔
137
   }
138

139
   const size_t authority_start = rest_offset + 2;
569✔
140
   const auto authority_end = m_raw.find_first_of("/?#", authority_start);
569✔
141
   const size_t authority_len =
1,138✔
142
      (authority_end == std::string::npos) ? std::string::npos : authority_end - authority_start;
569✔
143
   return std::string_view(m_raw).substr(authority_start, authority_len);
569✔
144
}
145

146
std::strong_ordering URI::Authority::operator<=>(const URI::Authority& other) const {
157✔
147
   /*
148
   Userinfo is compared without normalization; RFC 3986 6.2.2.1:
149
      When a URI uses components of the generic syntax, the component
150
      syntax equivalence rules always apply; namely, that the scheme
151
      and host are case-insensitive and therefore should be normalized
152
      to lowercase. ... The other generic syntax components are assumed
153
      to be case-sensitive unless specifically defined otherwise by the
154
      scheme.
155
   */
156
   return std::tie(m_userinfo, m_host, m_port) <=> std::tie(other.m_userinfo, other.m_host, other.m_port);
157✔
157
}
158

159
bool URI::Authority::operator==(const URI::Authority& other) const {
10✔
160
   return m_userinfo == other.m_userinfo && m_host == other.m_host && m_port == other.m_port;
18✔
161
}
162

163
//static
164
std::optional<URI> URI::parse(std::string_view raw) {
8,213✔
165
   // Empty string is not a valid URI
166
   if(raw.empty()) {
8,213✔
167
      return {};
3✔
168
   }
169

170
   // RFC 3986:
171
   // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
172
   constexpr auto is_scheme_cont_char = CharacterValidityTable::alpha_numeric_plus("+-.");
8,210✔
173

174
   const auto is_ascii_alpha = [](char c) -> bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); };
8,210✔
175

176
   // Check the first scheme character
177
   if(!is_ascii_alpha(raw.front())) {
8,210✔
178
      return {};
830✔
179
   }
180

181
   // Scan the rest of the scheme
182
   size_t i = 1;
183
   while(i < raw.size() && is_scheme_cont_char(raw[i])) {
33,737✔
184
      ++i;
26,357✔
185
   }
186
   // Scheme wasn't followed by ':' -> invalid
187
   if(i >= raw.size() || raw[i] != ':') {
7,380✔
188
      return {};
215✔
189
   }
190

191
   // Canonicalize the scheme
192
   const std::string scheme = tolower_string(raw.substr(0, i));
7,165✔
193

194
   auto rest = raw.substr(i + 1);
7,165✔
195

196
   std::optional<Authority> parsed_authority;
7,165✔
197
   std::string_view path_query_fragment;
7,165✔
198

199
   if(rest.starts_with("//")) {
7,165✔
200
      rest.remove_prefix(2);  // Strip off the '//'
7,140✔
201

202
      // Authority runs to the first '/', '?' or '#'. The remaining is `path ? query # fragment`,
203
      // which is validated against the RFC 3986 character set.
204
      const auto end = rest.find_first_of("/?#");
7,140✔
205
      const auto authority = (end == std::string_view::npos) ? rest : rest.substr(0, end);
7,140✔
206
      path_query_fragment = (end == std::string_view::npos) ? std::string_view{} : rest.substr(end);
7,140✔
207

208
      // Parse and validate non-empty authority strings (hostname, IPv4, or IPv6 address)
209
      if(!authority.empty()) {
7,140✔
210
         parsed_authority = Authority::parse(authority);
14,236✔
211
         if(!parsed_authority.has_value()) {
7,118✔
212
            return {};
49✔
213
         }
214
      }
215
   } else {
216
      path_query_fragment = rest;
25✔
217
   }
218

219
   // Validate any `path ? query # fragment` portions of the URL
220
   if(!validate_path_query_fragment(path_query_fragment)) {
7,116✔
221
      return {};
42✔
222
   }
223

224
   // Split into path / query / fragment. Validation above guarantees at most
225
   // one '#', so the first '#' is the fragment delimiter, and within the
226
   // pre-fragment portion the first '?' (if any) is the query delimiter.
227
   const auto hash = path_query_fragment.find('#');
7,074✔
228
   const auto pre_fragment =
7,074✔
229
      (hash == std::string_view::npos) ? path_query_fragment : path_query_fragment.substr(0, hash);
7,074✔
230
   std::optional<std::string> fragment;
7,074✔
231
   if(hash != std::string_view::npos) {
7,074✔
232
      fragment = std::string(path_query_fragment.substr(hash + 1));
30✔
233
   }
234

235
   const auto qmark = pre_fragment.find('?');
7,074✔
236
   const auto path = (qmark == std::string_view::npos) ? pre_fragment : pre_fragment.substr(0, qmark);
7,074✔
237
   std::optional<std::string> query;
7,074✔
238
   if(qmark != std::string_view::npos) {
7,074✔
239
      query = std::string(pre_fragment.substr(qmark + 1));
966✔
240
   }
241

242
   // Accept
243
   return URI(
57,064✔
244
      std::string(raw), scheme, std::move(parsed_authority), std::string(path), std::move(query), std::move(fragment));
49,518✔
245
}
14,254✔
246

247
//static
248
std::optional<URI::Authority> URI::Authority::parse(std::string_view raw) {
8,155✔
249
   if(raw.empty()) {
8,155✔
250
      return {};
2✔
251
   }
252

253
   // Capture the full input now; the userinfo prefix is stripped from
254
   // `raw` below, and m_raw must reflect the original
255
   const std::string original_input(raw);
8,153✔
256

257
   /*
258
   RFC 3986
259
     userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
260

261
   Thus a unencoded '@' is not allowed inside userinfo, and the single '@' splits the
262
   username from the authority. The @ being present at all is significant; an empty
263
   userinfo ("https://@example.com/") is distinct from no userinfo at all.
264
   */
265
   std::optional<std::string> userinfo;
8,153✔
266
   const auto first_at = raw.find('@');
8,153✔
267
   if(first_at != std::string_view::npos) {
8,153✔
268
      if(raw.find('@', first_at + 1) != std::string_view::npos) {
774✔
269
         return {};
757✔
270
      }
271
      const auto userinfo_view = raw.substr(0, first_at);
263✔
272
      if(!validate_userinfo(userinfo_view)) {
263✔
273
         return {};
246✔
274
      }
275
      userinfo = std::string(userinfo_view);
34✔
276
      raw.remove_prefix(first_at + 1);
17✔
277
   }
278

279
   std::string_view host_view;
7,396✔
280
   std::string_view port_str;
7,396✔
281
   std::optional<Host> host;
7,396✔
282

283
   if(!raw.empty() && raw.front() == '[') {
7,396✔
284
      // Bracketed IPv6 literal.
285
      const auto close = raw.find(']');
12✔
286
      if(close == std::string_view::npos) {
12✔
287
         return {};
4✔
288
      }
289
      host_view = raw.substr(1, close - 1);
11✔
290
      if(host_view.empty()) {
11✔
291
         return {};
×
292
      }
293
      const auto after = raw.substr(close + 1);
11✔
294
      if(!after.empty()) {
11✔
295
         if(after.front() != ':') {
6✔
296
            return {};
×
297
         }
298
         port_str = after.substr(1);
6✔
299
      }
300
      auto ipv6 = IPv6Address::from_string(host_view);
11✔
301
      if(!ipv6.has_value()) {
11✔
302
         return {};
3✔
303
      }
304
      host = *ipv6;
16✔
305
   } else {
306
      // host[:port] with no brackets. Only one ':' is allowed (port).
307
      const auto colon = raw.find(':');
7,384✔
308
      if(colon == std::string_view::npos) {
7,384✔
309
         host_view = raw;
7,212✔
310
      } else {
311
         host_view = raw.substr(0, colon);
172✔
312
         port_str = raw.substr(colon + 1);
172✔
313

314
         // Verify the `:` char is the only one that appears
315
         if(port_str.find(':') != std::string::npos) {
172✔
316
            return {};
61✔
317
         }
318
      }
319

320
      if(host_view.empty()) {
7,323✔
321
         return {};
×
322
      }
323

324
      // Technically valid per RFC 3986 but likely not something we want to support
325
      if(host_view.ends_with('.')) {
7,323✔
326
         return {};
4✔
327
      }
328

329
      if(auto ipv4 = IPv4Address::from_string(host_view)) {
7,319✔
330
         host = *ipv4;
7,115✔
331
      } else if(auto dns = DNSName::from_string(host_view)) {
7,292✔
332
         host = std::move(*dns);
7,061✔
333
      } else {
334
         return {};
231✔
335
      }
7,292✔
336
   }
337

338
   std::optional<uint16_t> port;
7,096✔
339

340
   if(!port_str.empty()) {
7,096✔
341
      port = parse_port(port_str);
56✔
342
      if(!port.has_value()) {
56✔
343
         return {};
12✔
344
      }
345
   }
346

347
   return Authority(original_input, std::move(userinfo), std::move(*host), port);
21,280✔
348
}
15,549✔
349

350
std::string URI::Authority::host_to_string() const {
48✔
351
   return std::visit([](const auto& h) -> std::string { return h.to_string(); }, m_host);
96✔
352
}
353

354
URI::Authority::HostKind URI::Authority::host_kind() const {
42✔
355
   if(std::holds_alternative<DNSName>(m_host)) {
42✔
356
      return HostKind::DNS;
357
   } else if(std::holds_alternative<IPv4Address>(m_host)) {
7✔
358
      return HostKind::IPv4;
359
   } else if(std::holds_alternative<IPv6Address>(m_host)) {
4✔
360
      return HostKind::IPv6;
361
   } else {
362
      BOTAN_ASSERT_UNREACHABLE();
×
363
   }
364
}
365

366
//static
367
std::vector<URI> URI::filter_scheme(std::string_view scheme, std::span<const URI> uris) {
10✔
368
   std::vector<URI> results;
10✔
369

370
   const auto normalized_scheme = tolower_string(scheme);
10✔
371

372
   for(const auto& uri : uris) {
11✔
373
      if(uri.scheme() == normalized_scheme && uri.authority().has_value()) {
1✔
374
         results.push_back(uri);
1✔
375
      }
376
   }
377

378
   return results;
10✔
379
}
10✔
380

381
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc