• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 28322807206

28 Jun 2026 12:50PM UTC coverage: 89.342% (-0.004%) from 89.346%
28322807206

push

github

web-flow
Merge pull request #5709 from randombit/jack/anvil-brainpool

Add brainpool back to the TLS-Anvil policy file

112110 of 125484 relevant lines covered (89.34%)

11030670.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.89
/src/lib/utils/charset.cpp
1
/*
2
* Character Set Handling
3
* (C) 1999-2007,2021 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7

8
#include <botan/internal/charset.h>
9

10
#include <botan/exceptn.h>
11
#include <botan/internal/loadstor.h>
12

13
namespace Botan {
14

15
namespace {
16

17
void append_utf8_for(std::string& s, uint32_t c) {
21,715✔
18
   if(c >= 0xD800 && c < 0xE000) {
21,715✔
19
      throw Decoding_Error("Invalid Unicode character");
2✔
20
   }
21

22
   if(c <= 0x7F) {
21,713✔
23
      const uint8_t b0 = static_cast<uint8_t>(c);
21,199✔
24
      s.push_back(static_cast<char>(b0));
21,199✔
25
   } else if(c <= 0x7FF) {
514✔
26
      const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
200✔
27
      const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
200✔
28
      s.push_back(static_cast<char>(b0));
200✔
29
      s.push_back(static_cast<char>(b1));
200✔
30
   } else if(c <= 0xFFFF) {
314✔
31
      const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
292✔
32
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
292✔
33
      const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
292✔
34
      s.push_back(static_cast<char>(b0));
292✔
35
      s.push_back(static_cast<char>(b1));
292✔
36
      s.push_back(static_cast<char>(b2));
292✔
37
   } else if(c <= 0x10FFFF) {
22✔
38
      const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
1✔
39
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
1✔
40
      const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
1✔
41
      const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
1✔
42
      s.push_back(static_cast<char>(b0));
1✔
43
      s.push_back(static_cast<char>(b1));
1✔
44
      s.push_back(static_cast<char>(b2));
1✔
45
      s.push_back(static_cast<char>(b3));
1✔
46
   } else {
47
      throw Decoding_Error("Invalid Unicode character");
21✔
48
   }
49
}
21,692✔
50

51
}  // namespace
52

53
uint32_t next_utf8_codepoint(std::string_view utf8, size_t& pos) {
676,078✔
54
   auto read_continuation = [&]() -> uint32_t {
678,922✔
55
      if(pos >= utf8.size()) {
2,844✔
56
         throw Decoding_Error("Invalid UTF-8 sequence");
5✔
57
      }
58
      const uint8_t b = static_cast<uint8_t>(utf8[pos++]);
2,839✔
59
      if((b & 0xC0) != 0x80) {
2,839✔
60
         throw Decoding_Error("Invalid UTF-8 sequence");
8✔
61
      }
62
      return b & 0x3F;
2,831✔
63
   };
676,078✔
64

65
   if(pos >= utf8.size()) {
676,078✔
66
      throw Decoding_Error("Invalid UTF-8 sequence");
×
67
   }
68
   const uint8_t lead = static_cast<uint8_t>(utf8[pos++]);
676,078✔
69
   uint32_t c = 0;
676,078✔
70

71
   if(lead <= 0x7F) {
676,078✔
72
      c = lead;
673,296✔
73
   } else if((lead & 0xE0) == 0xC0) {
2,782✔
74
      c = (lead & 0x1F) << 6;
2,701✔
75
      c |= read_continuation();
2,701✔
76
      if(c < 0x80) {
2,694✔
77
         throw Decoding_Error("Overlong UTF-8 sequence");
1✔
78
      }
79
   } else if((lead & 0xF0) == 0xE0) {
81✔
80
      c = (lead & 0x0F) << 12;
53✔
81
      c |= read_continuation() << 6;
53✔
82
      c |= read_continuation();
53✔
83
      if(c < 0x800) {
50✔
84
         throw Decoding_Error("Overlong UTF-8 sequence");
3✔
85
      }
86
   } else if((lead & 0xF8) == 0xF0) {
28✔
87
      c = (lead & 0x07) << 18;
13✔
88
      c |= read_continuation() << 12;
13✔
89
      c |= read_continuation() << 6;
12✔
90
      c |= read_continuation();
12✔
91
      if(c < 0x10000) {
10✔
92
         throw Decoding_Error("Overlong UTF-8 sequence");
3✔
93
      }
94
   } else {
95
      throw Decoding_Error("Invalid UTF-8 sequence");
15✔
96
   }
97

98
   if(c > 0x10FFFF) {
676,043✔
99
      throw Decoding_Error("UTF-8 sequence encodes value outside Unicode range");
3✔
100
   }
101
   if(c >= 0xD800 && c < 0xE000) {
676,040✔
102
      throw Decoding_Error("UTF-8 sequence encodes surrogate code point");
3✔
103
   }
104

105
   return c;
676,037✔
106
}
107

108
bool is_valid_utf8(std::string_view utf8) {
38,486✔
109
   try {
38,486✔
110
      size_t pos = 0;
38,486✔
111
      while(pos < utf8.size()) {
687,150✔
112
         const uint32_t c = next_utf8_codepoint(utf8, pos);
648,683✔
113
         BOTAN_UNUSED(c);
114
      }
115
   } catch(Decoding_Error&) {
19✔
116
      return false;
19✔
117
   }
19✔
118
   return true;
38,467✔
119
}
120

121
std::string ucs2_to_utf8(std::span<const uint8_t> ucs2) {
944✔
122
   if(ucs2.size() % 2 != 0) {
944✔
123
      throw Decoding_Error("Invalid length for UCS-2 string");
8✔
124
   }
125

126
   const size_t chars = ucs2.size() / 2;
936✔
127

128
   std::string s;
936✔
129
   for(size_t i = 0; i != chars; ++i) {
2,438✔
130
      const uint32_t c = load_be<uint16_t>(ucs2.data(), i);
1,503✔
131
      append_utf8_for(s, c);
1,503✔
132
   }
133

134
   return s;
935✔
135
}
1✔
136

137
std::vector<uint8_t> utf8_to_ucs2(std::string_view utf8) {
44✔
138
   std::vector<uint8_t> out;
44✔
139
   out.reserve(utf8.size() * 2);
44✔
140

141
   size_t pos = 0;
44✔
142
   while(pos < utf8.size()) {
252✔
143
      const uint32_t c = next_utf8_codepoint(utf8, pos);
221✔
144
      if(c > 0xFFFF) {
209✔
145
         throw Decoding_Error("Cannot encode character in UCS-2");
1✔
146
      }
147
      const uint16_t val = static_cast<uint16_t>(c);
208✔
148
      out.push_back(get_byte<0>(val));
208✔
149
      out.push_back(get_byte<1>(val));
208✔
150
   }
151

152
   return out;
31✔
153
}
13✔
154

155
std::string ucs4_to_utf8(std::span<const uint8_t> ucs4) {
71✔
156
   if(ucs4.size() % 4 != 0) {
71✔
157
      throw Decoding_Error("Invalid length for UCS-4 string");
5✔
158
   }
159

160
   const size_t chars = ucs4.size() / 4;
66✔
161

162
   std::string s;
66✔
163
   for(size_t i = 0; i != chars; ++i) {
112✔
164
      const uint32_t c = load_be<uint32_t>(ucs4.data(), i);
68✔
165
      append_utf8_for(s, c);
68✔
166
   }
167

168
   return s;
44✔
169
}
22✔
170

171
std::vector<uint8_t> utf8_to_ucs4(std::string_view utf8) {
21✔
172
   std::vector<uint8_t> out;
21✔
173
   out.reserve(utf8.size() * 4);
21✔
174

175
   size_t pos = 0;
21✔
176
   while(pos < utf8.size()) {
54✔
177
      const uint32_t val = next_utf8_codepoint(utf8, pos);
43✔
178
      out.push_back(get_byte<0>(val));
33✔
179
      out.push_back(get_byte<1>(val));
33✔
180
      out.push_back(get_byte<2>(val));
33✔
181
      out.push_back(get_byte<3>(val));
33✔
182
   }
183

184
   return out;
11✔
185
}
10✔
186

187
/*
188
* Convert from ISO 8859-1 to UTF-8
189
*/
190
std::string latin1_to_utf8(std::span<const uint8_t> chars) {
1,082✔
191
   std::string s;
1,082✔
192
   for(const uint8_t b : chars) {
21,226✔
193
      append_utf8_for(s, static_cast<uint32_t>(b));
20,144✔
194
   }
195
   return s;
1,082✔
196
}
×
197

198
bool is_ascii_control_char(char c) {
306✔
199
   const uint8_t b = static_cast<uint8_t>(c);
306✔
200
   return b < 0x20 || b == 0x7F;
306✔
201
}
202

203
bool is_unicode_control_char(uint32_t cp) {
27,124✔
204
   return cp < 0x20 || (cp >= 0x7F && cp <= 0x9F);
27,124✔
205
}
206

207
std::string escape_control_chars(std::string_view utf8) {
427✔
208
   std::string out;
427✔
209
   out.reserve(utf8.size());
427✔
210

211
   const auto append_hex_escape = [&](uint8_t b) {
453✔
212
      out += "\\x";
26✔
213
      out += nibble_to_hex(b >> 4);
52✔
214
      out += nibble_to_hex(b);
52✔
215
   };
453✔
216

217
   size_t pos = 0;
427✔
218
   while(pos < utf8.size()) {
18,707✔
219
      const size_t start = pos;
18,280✔
220

221
      uint32_t cp = 0;
18,280✔
222
      try {
18,280✔
223
         cp = next_utf8_codepoint(utf8, pos);
18,280✔
224
      } catch(const Decoding_Error&) {
×
225
         // Not valid UTF-8: escape the offending byte and resume
226
         append_hex_escape(static_cast<uint8_t>(utf8[start]));
×
227
         pos = start + 1;
×
228
         continue;
×
229
      }
×
230

231
      if(is_unicode_control_char(cp)) {
18,280✔
232
         for(size_t i = start; i < pos; ++i) {
49✔
233
            append_hex_escape(static_cast<uint8_t>(utf8[i]));
26✔
234
         }
235
      } else {
236
         out.append(utf8.substr(start, pos - start));
36,514✔
237
      }
238
   }
239

240
   return out;
427✔
241
}
×
242

243
std::string format_char_for_display(char c) {
84✔
244
   std::string out;
84✔
245
   out += '\'';
84✔
246

247
   if(c == '\t') {
84✔
248
      out += "\\t";
15✔
249
   } else if(c == '\n') {
69✔
250
      out += "\\n";
15✔
251
   } else if(c == '\r') {
54✔
252
      out += "\\r";
15✔
253
   } else if(is_ascii_control_char(c) || static_cast<uint8_t>(c) >= 0x80) {
39✔
254
      const auto b = static_cast<uint8_t>(c);
17✔
255
      out += "\\x";
17✔
256
      out += nibble_to_hex(b >> 4);
34✔
257
      out += nibble_to_hex(b);
34✔
258
   } else {
259
      out += c;
22✔
260
   }
261

262
   out += '\'';
84✔
263

264
   return out;
84✔
265
}
×
266

267
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc