• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 26995937053

04 Jun 2026 09:38PM UTC coverage: 89.394% (-2.3%) from 91.672%
26995937053

push

github

web-flow
Merge pull request #5642 from randombit/jack/prefetch-in-ks

Improve prefetching for table based implementations

110588 of 123708 relevant lines covered (89.39%)

11056434.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.74
/src/lib/stream/chacha/chacha.cpp
1
/*
2
* ChaCha
3
* (C) 2014,2018,2023 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7

8
#include <botan/internal/chacha.h>
9

10
#include <botan/exceptn.h>
11
#include <botan/internal/fmt.h>
12
#include <botan/internal/loadstor.h>
13
#include <botan/internal/rotate.h>
14

15
#if defined(BOTAN_HAS_CPUID)
16
   #include <botan/internal/cpuid.h>
17
#endif
18

19
namespace Botan {
20

21
namespace {
22

23
/*
24
* RFC 8439 defines ChaCha with 96-bit nonces by stealing one of the
25
* words used for the block counter. With 64-bit nonces, the block
26
* counter is also 64 bits and practically not exhaustible.
27
*/
28
constexpr uint64_t chacha_96bit_nonce_cap = uint64_t{1} << 38;
29

30
inline void chacha_quarter_round(uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d) {
5,708,000✔
31
   a += b;
5,708,000✔
32
   d ^= a;
5,708,000✔
33
   d = rotl<16>(d);
5,708,000✔
34
   c += d;
5,708,000✔
35
   b ^= c;
5,708,000✔
36
   b = rotl<12>(b);
5,708,000✔
37
   a += b;
5,708,000✔
38
   d ^= a;
5,708,000✔
39
   d = rotl<8>(d);
5,708,000✔
40
   c += d;
5,708,000✔
41
   b ^= c;
5,708,000✔
42
   b = rotl<7>(b);
5,708,000✔
43
}
5,708,000✔
44

45
/*
46
* Generate HChaCha cipher stream (for XChaCha IV setup)
47
*/
48
void hchacha(uint32_t output[8], const uint32_t input[16], size_t rounds) {
71,350✔
49
   BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
71,350✔
50

51
   uint32_t x00 = input[0];
71,350✔
52
   uint32_t x01 = input[1];
71,350✔
53
   uint32_t x02 = input[2];
71,350✔
54
   uint32_t x03 = input[3];
71,350✔
55
   uint32_t x04 = input[4];
71,350✔
56
   uint32_t x05 = input[5];
71,350✔
57
   uint32_t x06 = input[6];
71,350✔
58
   uint32_t x07 = input[7];
71,350✔
59
   uint32_t x08 = input[8];
71,350✔
60
   uint32_t x09 = input[9];
71,350✔
61
   uint32_t x10 = input[10];
71,350✔
62
   uint32_t x11 = input[11];
71,350✔
63
   uint32_t x12 = input[12];
71,350✔
64
   uint32_t x13 = input[13];
71,350✔
65
   uint32_t x14 = input[14];
71,350✔
66
   uint32_t x15 = input[15];
71,350✔
67

68
   for(size_t i = 0; i != rounds / 2; ++i) {
784,850✔
69
      chacha_quarter_round(x00, x04, x08, x12);
713,500✔
70
      chacha_quarter_round(x01, x05, x09, x13);
713,500✔
71
      chacha_quarter_round(x02, x06, x10, x14);
713,500✔
72
      chacha_quarter_round(x03, x07, x11, x15);
713,500✔
73

74
      chacha_quarter_round(x00, x05, x10, x15);
713,500✔
75
      chacha_quarter_round(x01, x06, x11, x12);
713,500✔
76
      chacha_quarter_round(x02, x07, x08, x13);
713,500✔
77
      chacha_quarter_round(x03, x04, x09, x14);
713,500✔
78
   }
79

80
   output[0] = x00;
71,350✔
81
   output[1] = x01;
71,350✔
82
   output[2] = x02;
71,350✔
83
   output[3] = x03;
71,350✔
84
   output[4] = x12;
71,350✔
85
   output[5] = x13;
71,350✔
86
   output[6] = x14;
71,350✔
87
   output[7] = x15;
71,350✔
88
}
71,350✔
89

90
}  // namespace
91

92
ChaCha::ChaCha(size_t rounds) : m_rounds(rounds) {
21,567✔
93
   BOTAN_ARG_CHECK(m_rounds == 8 || m_rounds == 12 || m_rounds == 20, "ChaCha only supports 8, 12 or 20 rounds");
21,567✔
94
}
21,567✔
95

96
size_t ChaCha::parallelism() {
29,796✔
97
#if defined(BOTAN_HAS_CHACHA_AVX512)
98
   if(CPUID::has(CPUID::Feature::AVX512)) {
29,796✔
99
      return 16;
100
   }
101
#endif
102

103
#if defined(BOTAN_HAS_CHACHA_AVX2)
104
   if(CPUID::has(CPUID::Feature::AVX2)) {
29,796✔
105
      return 8;
22,150✔
106
   }
107
#endif
108

109
   return 4;
110
}
111

112
std::string ChaCha::provider() const {
328✔
113
#if defined(BOTAN_HAS_CHACHA_AVX512)
114
   if(auto feat = CPUID::check(CPUID::Feature::AVX512)) {
328✔
115
      return *feat;
×
116
   }
×
117
#endif
118

119
#if defined(BOTAN_HAS_CHACHA_AVX2)
120
   if(auto feat = CPUID::check(CPUID::Feature::AVX2)) {
328✔
121
      return *feat;
220✔
122
   }
110✔
123
#endif
124

125
#if defined(BOTAN_HAS_CHACHA_SIMD32)
126
   if(auto feat = CPUID::check(CPUID::Feature::SIMD_4X32)) {
218✔
127
      return *feat;
436✔
128
   }
218✔
129
#endif
130

131
   return "base";
×
132
}
133

134
void ChaCha::chacha(uint8_t output[], size_t output_blocks, uint32_t state[16], size_t rounds) {
219,513✔
135
   BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
219,513✔
136

137
#if defined(BOTAN_HAS_CHACHA_AVX512)
138
   if(CPUID::has(CPUID::Feature::AVX512)) {
219,513✔
139
      while(output_blocks >= 16) {
×
140
         ChaCha::chacha_avx512_x16(output, state, rounds);
×
141
         output += 16 * 64;
×
142
         output_blocks -= 16;
×
143
      }
144
   }
145
#endif
146

147
#if defined(BOTAN_HAS_CHACHA_AVX2)
148
   if(CPUID::has(CPUID::Feature::AVX2)) {
219,513✔
149
      while(output_blocks >= 8) {
335,328✔
150
         ChaCha::chacha_avx2_x8(output, state, rounds);
167,664✔
151
         output += 8 * 64;
167,664✔
152
         output_blocks -= 8;
167,664✔
153
      }
154
   }
155
#endif
156

157
#if defined(BOTAN_HAS_CHACHA_SIMD32)
158
   if(CPUID::has(CPUID::Feature::SIMD_4X32)) {
219,513✔
159
      while(output_blocks >= 4) {
271,362✔
160
         ChaCha::chacha_simd32_x4(output, state, rounds);
51,849✔
161
         output += 4 * 64;
51,849✔
162
         output_blocks -= 4;
51,849✔
163
      }
164
   }
165
#endif
166

167
   // TODO interleave rounds
168
   for(size_t i = 0; i != output_blocks; ++i) {
219,513✔
169
      uint32_t x00 = state[0];
×
170
      uint32_t x01 = state[1];
×
171
      uint32_t x02 = state[2];
×
172
      uint32_t x03 = state[3];
×
173
      uint32_t x04 = state[4];
×
174
      uint32_t x05 = state[5];
×
175
      uint32_t x06 = state[6];
×
176
      uint32_t x07 = state[7];
×
177
      uint32_t x08 = state[8];
×
178
      uint32_t x09 = state[9];
×
179
      uint32_t x10 = state[10];
×
180
      uint32_t x11 = state[11];
×
181
      uint32_t x12 = state[12];
×
182
      uint32_t x13 = state[13];
×
183
      uint32_t x14 = state[14];
×
184
      uint32_t x15 = state[15];
×
185

186
      for(size_t r = 0; r != rounds / 2; ++r) {
×
187
         chacha_quarter_round(x00, x04, x08, x12);
×
188
         chacha_quarter_round(x01, x05, x09, x13);
×
189
         chacha_quarter_round(x02, x06, x10, x14);
×
190
         chacha_quarter_round(x03, x07, x11, x15);
×
191

192
         chacha_quarter_round(x00, x05, x10, x15);
×
193
         chacha_quarter_round(x01, x06, x11, x12);
×
194
         chacha_quarter_round(x02, x07, x08, x13);
×
195
         chacha_quarter_round(x03, x04, x09, x14);
×
196
      }
197

198
      x00 += state[0];
×
199
      x01 += state[1];
×
200
      x02 += state[2];
×
201
      x03 += state[3];
×
202
      x04 += state[4];
×
203
      x05 += state[5];
×
204
      x06 += state[6];
×
205
      x07 += state[7];
×
206
      x08 += state[8];
×
207
      x09 += state[9];
×
208
      x10 += state[10];
×
209
      x11 += state[11];
×
210
      x12 += state[12];
×
211
      x13 += state[13];
×
212
      x14 += state[14];
×
213
      x15 += state[15];
×
214

215
      store_le(x00, output + 64 * i + 4 * 0);
×
216
      store_le(x01, output + 64 * i + 4 * 1);
×
217
      store_le(x02, output + 64 * i + 4 * 2);
×
218
      store_le(x03, output + 64 * i + 4 * 3);
×
219
      store_le(x04, output + 64 * i + 4 * 4);
×
220
      store_le(x05, output + 64 * i + 4 * 5);
×
221
      store_le(x06, output + 64 * i + 4 * 6);
×
222
      store_le(x07, output + 64 * i + 4 * 7);
×
223
      store_le(x08, output + 64 * i + 4 * 8);
×
224
      store_le(x09, output + 64 * i + 4 * 9);
×
225
      store_le(x10, output + 64 * i + 4 * 10);
×
226
      store_le(x11, output + 64 * i + 4 * 11);
×
227
      store_le(x12, output + 64 * i + 4 * 12);
×
228
      store_le(x13, output + 64 * i + 4 * 13);
×
229
      store_le(x14, output + 64 * i + 4 * 14);
×
230
      store_le(x15, output + 64 * i + 4 * 15);
×
231

232
      state[12]++;
×
233
      if(state[12] == 0) {
×
234
         state[13] += 1;
×
235
      }
236
   }
237
}
219,513✔
238

239
/*
240
* Combine cipher stream with message
241
*/
242
void ChaCha::cipher_bytes(const uint8_t in[], uint8_t out[], size_t length) {
529,377✔
243
   assert_key_material_set();
529,377✔
244

245
   if(m_iv_length == 12) {
528,723✔
246
      if(length > m_bytes_remaining) {
132,269✔
247
         throw Invalid_State("ChaCha 96-bit nonce keystream exhausted");
1✔
248
      }
249
      m_bytes_remaining -= length;
132,268✔
250
   }
251

252
   while(length >= m_buffer.size() - m_position) {
537,288✔
253
      const size_t available = m_buffer.size() - m_position;
8,566✔
254

255
      xor_buf(out, in, &m_buffer[m_position], available);
8,566✔
256
      chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
8,566✔
257

258
      length -= available;
8,566✔
259
      in += available;
8,566✔
260
      out += available;
8,566✔
261
      m_position = 0;
8,566✔
262
   }
263

264
   xor_buf(out, in, &m_buffer[m_position], length);
528,722✔
265

266
   m_position += length;
528,722✔
267
}
528,722✔
268

269
void ChaCha::generate_keystream(uint8_t out[], size_t length) {
232,293✔
270
   assert_key_material_set();
232,293✔
271

272
   if(m_iv_length == 12) {
232,293✔
273
      if(length > m_bytes_remaining) {
84,199✔
274
         throw Invalid_State("ChaCha 96-bit nonce keystream exhausted");
1✔
275
      }
276
      m_bytes_remaining -= length;
84,198✔
277
   }
278

279
   while(length >= m_buffer.size() - m_position) {
256,392✔
280
      const size_t available = m_buffer.size() - m_position;
24,100✔
281

282
      // TODO: this could write directly to the output buffer
283
      // instead of bouncing it through m_buffer first
284
      copy_mem(out, &m_buffer[m_position], available);
24,100✔
285
      chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
24,100✔
286

287
      length -= available;
24,100✔
288
      out += available;
24,100✔
289
      m_position = 0;
24,100✔
290
   }
291

292
   copy_mem(out, &m_buffer[m_position], length);
232,292✔
293

294
   m_position += length;
232,292✔
295
}
232,292✔
296

297
void ChaCha::initialize_state() {
185,024✔
298
   static const uint32_t TAU[] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
185,024✔
299

300
   static const uint32_t SIGMA[] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
185,024✔
301

302
   m_state[4] = m_key[0];
185,024✔
303
   m_state[5] = m_key[1];
185,024✔
304
   m_state[6] = m_key[2];
185,024✔
305
   m_state[7] = m_key[3];
185,024✔
306

307
   if(m_key.size() == 4) {
185,024✔
308
      m_state[0] = TAU[0];
210✔
309
      m_state[1] = TAU[1];
210✔
310
      m_state[2] = TAU[2];
210✔
311
      m_state[3] = TAU[3];
210✔
312

313
      m_state[8] = m_key[0];
210✔
314
      m_state[9] = m_key[1];
210✔
315
      m_state[10] = m_key[2];
210✔
316
      m_state[11] = m_key[3];
210✔
317
   } else {
318
      m_state[0] = SIGMA[0];
184,814✔
319
      m_state[1] = SIGMA[1];
184,814✔
320
      m_state[2] = SIGMA[2];
184,814✔
321
      m_state[3] = SIGMA[3];
184,814✔
322

323
      m_state[8] = m_key[4];
184,814✔
324
      m_state[9] = m_key[5];
184,814✔
325
      m_state[10] = m_key[6];
184,814✔
326
      m_state[11] = m_key[7];
184,814✔
327
   }
328

329
   m_state[12] = 0;
185,024✔
330
   m_state[13] = 0;
185,024✔
331
   m_state[14] = 0;
185,024✔
332
   m_state[15] = 0;
185,024✔
333

334
   m_position = 0;
185,024✔
335
}
185,024✔
336

337
bool ChaCha::has_keying_material() const {
974,752✔
338
   return !m_state.empty();
974,752✔
339
}
340

341
size_t ChaCha::buffer_size() const {
327✔
342
   return 64;
327✔
343
}
344

345
/*
346
* ChaCha Key Schedule
347
*/
348
void ChaCha::key_schedule(std::span<const uint8_t> key) {
29,796✔
349
   m_key.resize(key.size() / 4);
29,796✔
350
   load_le<uint32_t>(m_key.data(), key.data(), m_key.size());
29,796✔
351

352
   m_state.resize(16);
29,796✔
353

354
   const size_t chacha_block = 64;
29,796✔
355
   m_buffer.resize(parallelism() * chacha_block);
29,796✔
356

357
   set_iv(nullptr, 0);
29,796✔
358
}
29,796✔
359

360
size_t ChaCha::default_iv_length() const {
654✔
361
   return 24;
654✔
362
}
363

364
Key_Length_Specification ChaCha::key_spec() const {
30,464✔
365
   return Key_Length_Specification(16, 32, 16);
30,464✔
366
}
367

368
std::unique_ptr<StreamCipher> ChaCha::new_object() const {
552✔
369
   return std::make_unique<ChaCha>(m_rounds);
552✔
370
}
371

372
bool ChaCha::valid_iv_length(size_t iv_len) const {
186,333✔
373
   return (iv_len == 0 || iv_len == 8 || iv_len == 12 || iv_len == 24);
982✔
374
}
375

376
void ChaCha::set_iv_bytes(const uint8_t iv[], size_t length) {
185,675✔
377
   assert_key_material_set();
185,675✔
378

379
   if(!valid_iv_length(length)) {
185,351✔
380
      throw Invalid_IV_Length(name(), length);
654✔
381
   }
382

383
   initialize_state();
185,024✔
384

385
   if(length == 0) {
185,024✔
386
      // Treat zero length IV same as an all-zero IV
387
      m_state[14] = 0;
29,799✔
388
      m_state[15] = 0;
29,799✔
389
   } else if(length == 8) {
155,225✔
390
      m_state[14] = load_le<uint32_t>(iv, 0);
442✔
391
      m_state[15] = load_le<uint32_t>(iv, 1);
442✔
392
   } else if(length == 12) {
154,783✔
393
      m_state[13] = load_le<uint32_t>(iv, 0);
83,433✔
394
      m_state[14] = load_le<uint32_t>(iv, 1);
83,433✔
395
      m_state[15] = load_le<uint32_t>(iv, 2);
83,433✔
396
   } else if(length == 24) {
71,350✔
397
      m_state[12] = load_le<uint32_t>(iv, 0);
71,350✔
398
      m_state[13] = load_le<uint32_t>(iv, 1);
71,350✔
399
      m_state[14] = load_le<uint32_t>(iv, 2);
71,350✔
400
      m_state[15] = load_le<uint32_t>(iv, 3);
71,350✔
401

402
      secure_vector<uint32_t> hc(8);
71,350✔
403
      hchacha(hc.data(), m_state.data(), m_rounds);
71,350✔
404

405
      m_state[4] = hc[0];
71,350✔
406
      m_state[5] = hc[1];
71,350✔
407
      m_state[6] = hc[2];
71,350✔
408
      m_state[7] = hc[3];
71,350✔
409
      m_state[8] = hc[4];
71,350✔
410
      m_state[9] = hc[5];
71,350✔
411
      m_state[10] = hc[6];
71,350✔
412
      m_state[11] = hc[7];
71,350✔
413
      m_state[12] = 0;
71,350✔
414
      m_state[13] = 0;
71,350✔
415
      m_state[14] = load_le<uint32_t>(iv, 4);
71,350✔
416
      m_state[15] = load_le<uint32_t>(iv, 5);
71,350✔
417
   }
71,350✔
418

419
   m_iv_length = length;
185,024✔
420
   m_state13_post_iv = m_state[13];
185,024✔
421
   if(length == 12) {
185,024✔
422
      m_bytes_remaining = chacha_96bit_nonce_cap;
83,433✔
423
   }
424

425
   chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
185,024✔
426
   m_position = 0;
185,024✔
427
}
185,024✔
428

429
void ChaCha::clear() {
7,169✔
430
   zap(m_key);
7,169✔
431
   zap(m_state);
7,169✔
432
   zap(m_buffer);
7,169✔
433
   m_position = 0;
7,169✔
434
   m_iv_length = 0;
7,169✔
435
   m_state13_post_iv = 0;
7,169✔
436
   m_bytes_remaining = 0;
7,169✔
437
}
7,169✔
438

439
std::optional<uint64_t> ChaCha::remaining_keystream_bytes() const {
7✔
440
   if(!has_keying_material() || m_iv_length != 12) {
7✔
441
      return std::nullopt;
3✔
442
   }
443
   return m_bytes_remaining;
4✔
444
}
445

446
std::string ChaCha::name() const {
2,615✔
447
   return fmt("ChaCha({})", m_rounds);
2,615✔
448
}
449

450
void ChaCha::seek(uint64_t offset) {
2,152✔
451
   assert_key_material_set();
2,152✔
452

453
   const uint64_t block = offset / 64;
1,825✔
454

455
   if(m_iv_length == 12) {
1,825✔
456
      // 96 bit nonce implies a 32-bit counter; prevent seeking beyond that
457
      if((block >> 32) != 0) {
1,664✔
458
         throw Invalid_Argument("ChaCha::seek with 96-bit nonce limited to 2^32 blocks (256 GiB)");
2✔
459
      }
460
      m_state[12] = static_cast<uint32_t>(block);
1,662✔
461
      m_state[13] = m_state13_post_iv;
1,662✔
462
      m_bytes_remaining = chacha_96bit_nonce_cap - offset;
1,662✔
463
   } else {
464
      // 64-bit block counter spanning state words 12 and 13.
465
      m_state[12] = static_cast<uint32_t>(block);
161✔
466
      m_state[13] = m_state13_post_iv + static_cast<uint32_t>(block >> 32);
161✔
467
   }
468

469
   chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
1,823✔
470
   m_position = offset % 64;
1,823✔
471
}
1,823✔
472
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc