• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 21712952425

05 Feb 2026 01:16PM UTC coverage: 90.076% (+0.005%) from 90.071%
21712952425

Pull #5287

github

web-flow
Merge 1b320c06e into 8c9623340
Pull Request #5287: Split out BufferSlicer and BufferStuffer to their own headers

102242 of 113507 relevant lines covered (90.08%)

11534589.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.24
/src/lib/mac/poly1305/poly1305.cpp
1
/*
2
* Derived from poly1305-donna-64.h by Andrew Moon <liquidsun@gmail.com>
3
* in https://github.com/floodyberry/poly1305-donna
4
*
5
* (C) 2014 Andrew Moon
6
* (C) 2014,2025,2026 Jack Lloyd
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10

11
#include <botan/internal/poly1305.h>
12

13
#include <botan/internal/ct_utils.h>
14
#include <botan/internal/donna128.h>
15
#include <botan/internal/loadstor.h>
16

17
#if defined(BOTAN_HAS_POLY1305_AVX2) || defined(BOTAN_HAS_POLY1305_AVX512)
18
   #include <botan/internal/cpuid.h>
19
#endif
20

21
namespace Botan {
22

23
namespace {
24

25
// State layout: pad || accum || r || r^2 || r^3 || ... || r^n
26
// This ordering allows extending with more powers of r at the end
27
constexpr size_t PAD_BASE = 0;  // pad[0..1]
28
constexpr size_t H_BASE = 2;    // h[0..2] (accumulator)
29
constexpr size_t R_BASE = 5;    // r^1[0..2], r^2[3..5], r^3[6..8], etc.
30

31
// Multiply two values in radix 2^44 representation mod (2^130 - 5)
32
// h = a * b mod p
33
BOTAN_FORCE_INLINE void poly1305_mul_44(uint64_t& h0,
131,458✔
34
                                        uint64_t& h1,
35
                                        uint64_t& h2,
36
                                        uint64_t a0,
37
                                        uint64_t a1,
38
                                        uint64_t a2,
39
                                        uint64_t b0,
40
                                        uint64_t b1,
41
                                        uint64_t b2) {
42
   constexpr uint64_t M44 = 0xFFFFFFFFFFF;
131,458✔
43
   constexpr uint64_t M42 = 0x3FFFFFFFFFF;
131,458✔
44

45
#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
46
   typedef donna128 uint128_t;
47
#endif
48

49
   const uint64_t s1 = b1 * 20;
131,458✔
50
   const uint64_t s2 = b2 * 20;
131,458✔
51

52
   const uint128_t d0 = uint128_t(a0) * b0 + uint128_t(a1) * s2 + uint128_t(a2) * s1;
131,458✔
53
   const uint64_t c0 = carry_shift(d0, 44);
131,458✔
54

55
   const uint128_t d1 = uint128_t(a0) * b1 + uint128_t(a1) * b0 + uint128_t(a2) * s2 + c0;
131,458✔
56
   const uint64_t c1 = carry_shift(d1, 44);
131,458✔
57

58
   const uint128_t d2 = uint128_t(a0) * b2 + uint128_t(a1) * b1 + uint128_t(a2) * b0 + c1;
131,458✔
59
   const uint64_t c2 = carry_shift(d2, 42);
131,458✔
60

61
   h0 = (d0 & M44) + c2 * 5;
131,458✔
62
   h1 = (d1 & M44) + (h0 >> 44);
131,458✔
63
   h0 &= M44;
131,458✔
64
   h2 = d2 & M42;
131,458✔
65
}
66

67
// Extend powers of r from current max to target
68
void poly1305_extend_powers(secure_vector<uint64_t>& X, size_t target_powers) {
129,152✔
69
   const size_t current_powers = (X.size() - 5) / 3;
129,152✔
70

71
   if(current_powers >= target_powers) {
129,152✔
72
      return;
73
   }
74

75
   // Load r^1 for multiplication
76
   const uint64_t r0 = X[R_BASE + 0];
129,124✔
77
   const uint64_t r1 = X[R_BASE + 1];
129,124✔
78
   const uint64_t r2 = X[R_BASE + 2];
129,124✔
79

80
   X.resize(5 + target_powers * 3);
129,124✔
81

82
   // Compute r^(current+1) through r^target
83
   for(size_t i = current_powers + 1; i <= target_powers; ++i) {
260,582✔
84
      const size_t offset = R_BASE + (i - 1) * 3;
131,458✔
85
      poly1305_mul_44(
131,458✔
86
         X[offset + 0], X[offset + 1], X[offset + 2], X[offset - 3], X[offset - 2], X[offset - 1], r0, r1, r2);
131,458✔
87
   }
88
}
89

90
// Initialize Poly1305 state and precompute powers of r
91
void poly1305_init(secure_vector<uint64_t>& X, const uint8_t key[32]) {
126,790✔
92
   X.clear();
126,790✔
93
   X.reserve(2 + 3 + 2 * 3);
126,790✔
94
   X.resize(2 + 3 + 3);
126,790✔
95

96
   /* Save pad for later (first 2 slots) */
97
   X[PAD_BASE + 0] = load_le<uint64_t>(key, 2);
126,790✔
98
   X[PAD_BASE + 1] = load_le<uint64_t>(key, 3);
126,790✔
99

100
   /* h = 0 (accumulator, next 3 slots) */
101
   X[H_BASE + 0] = 0;
126,790✔
102
   X[H_BASE + 1] = 0;
126,790✔
103
   X[H_BASE + 2] = 0;
126,790✔
104

105
   /* r &= 0xffffffc0ffffffc0ffffffc0fffffff (clamping) */
106
   const uint64_t t0 = load_le<uint64_t>(key, 0);
126,790✔
107
   const uint64_t t1 = load_le<uint64_t>(key, 1);
126,790✔
108

109
   const uint64_t r0 = (t0) & 0xffc0fffffff;
126,790✔
110
   const uint64_t r1 = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
126,790✔
111
   const uint64_t r2 = ((t1 >> 24)) & 0x00ffffffc0f;
126,790✔
112

113
   // Store r^1
114
   X[R_BASE + 0] = r0;
126,790✔
115
   X[R_BASE + 1] = r1;
126,790✔
116
   X[R_BASE + 2] = r2;
126,790✔
117

118
   poly1305_extend_powers(X, 2);
126,790✔
119
}
126,790✔
120

121
// Process a single block: h = (h + m) * r mod p
122
BOTAN_FORCE_INLINE void poly1305_block_single(uint64_t& h0,
370,068✔
123
                                              uint64_t& h1,
124
                                              uint64_t& h2,
125
                                              uint64_t r0,
126
                                              uint64_t r1,
127
                                              uint64_t r2,
128
                                              uint64_t s1,
129
                                              uint64_t s2,
130
                                              const uint8_t* m,
131
                                              uint64_t hibit) {
132
   constexpr uint64_t M44 = 0xFFFFFFFFFFF;
370,068✔
133
   constexpr uint64_t M42 = 0x3FFFFFFFFFF;
370,068✔
134

135
#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
136
   typedef donna128 uint128_t;
137
#endif
138

139
   const uint64_t t0 = load_le<uint64_t>(m, 0);
740,136✔
140
   const uint64_t t1 = load_le<uint64_t>(m, 1);
370,068✔
141

142
   h0 += (t0 & M44);
370,068✔
143
   h1 += ((t0 >> 44) | (t1 << 20)) & M44;
370,068✔
144
   h2 += ((t1 >> 24) & M42) | hibit;
370,068✔
145

146
   const uint128_t d0 = uint128_t(h0) * r0 + uint128_t(h1) * s2 + uint128_t(h2) * s1;
370,068✔
147
   const uint64_t c0 = carry_shift(d0, 44);
370,068✔
148

149
   const uint128_t d1 = uint128_t(h0) * r1 + uint128_t(h1) * r0 + uint128_t(h2) * s2 + c0;
370,068✔
150
   const uint64_t c1 = carry_shift(d1, 44);
370,068✔
151

152
   const uint128_t d2 = uint128_t(h0) * r2 + uint128_t(h1) * r1 + uint128_t(h2) * r0 + c1;
370,068✔
153
   const uint64_t c2 = carry_shift(d2, 42);
370,068✔
154

155
   h0 = (d0 & M44) + c2 * 5;
370,068✔
156
   h1 = (d1 & M44) + (h0 >> 44);
370,068✔
157
   h0 &= M44;
370,068✔
158
   h2 = d2 & M42;
370,068✔
159
}
160

161
// Process two blocks in parallel: h = ((h + m0) * r + m1) * r = (h + m0) * r^2 + m1 * r
162
// The multiplications by r^2 and r are independent, enabling ILP
163
BOTAN_FORCE_INLINE void poly1305_block_pair(uint64_t& h0,
71,855✔
164
                                            uint64_t& h1,
165
                                            uint64_t& h2,
166
                                            uint64_t r0,
167
                                            uint64_t r1,
168
                                            uint64_t r2,
169
                                            uint64_t s1,
170
                                            uint64_t s2,
171
                                            uint64_t rr0,
172
                                            uint64_t rr1,
173
                                            uint64_t rr2,
174
                                            uint64_t ss1,
175
                                            uint64_t ss2,
176
                                            const uint8_t* m,
177
                                            uint64_t hibit) {
178
   constexpr uint64_t M44 = 0xFFFFFFFFFFF;
71,855✔
179
   constexpr uint64_t M42 = 0x3FFFFFFFFFF;
71,855✔
180

181
#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
182
   typedef donna128 uint128_t;
183
#endif
184

185
   // Load first block (will be multiplied by r^2)
186
   const uint64_t m0_t0 = load_le<uint64_t>(m, 0);
143,710✔
187
   const uint64_t m0_t1 = load_le<uint64_t>(m, 1);
71,855✔
188

189
   // Load second block (will be multiplied by r)
190
   const uint64_t m1_t0 = load_le<uint64_t>(m + 16, 0);
71,855✔
191
   const uint64_t m1_t1 = load_le<uint64_t>(m + 16, 1);
71,855✔
192

193
   // Add first block to h
194
   h0 += (m0_t0 & M44);
71,855✔
195
   h1 += ((m0_t0 >> 44) | (m0_t1 << 20)) & M44;
71,855✔
196
   h2 += ((m0_t1 >> 24) & M42) | hibit;
71,855✔
197

198
   // Convert second block to limbs
199
   const uint64_t b0 = (m1_t0 & M44);
71,855✔
200
   const uint64_t b1 = ((m1_t0 >> 44) | (m1_t1 << 20)) & M44;
71,855✔
201
   const uint64_t b2 = ((m1_t1 >> 24) & M42) | hibit;
71,855✔
202

203
   // Compute (h + m0) * r^2 + m1 * r
204
   const uint128_t d0 = uint128_t(h0) * rr0 + uint128_t(h1) * ss2 + uint128_t(h2) * ss1 + uint128_t(b0) * r0 +
71,855✔
205
                        uint128_t(b1) * s2 + uint128_t(b2) * s1;
71,855✔
206
   const uint64_t c0 = carry_shift(d0, 44);
71,855✔
207

208
   const uint128_t d1 = uint128_t(h0) * rr1 + uint128_t(h1) * rr0 + uint128_t(h2) * ss2 + uint128_t(b0) * r1 +
71,855✔
209
                        uint128_t(b1) * r0 + uint128_t(b2) * s2 + c0;
71,855✔
210
   const uint64_t c1 = carry_shift(d1, 44);
71,855✔
211

212
   const uint128_t d2 = uint128_t(h0) * rr2 + uint128_t(h1) * rr1 + uint128_t(h2) * rr0 + uint128_t(b0) * r2 +
71,855✔
213
                        uint128_t(b1) * r1 + uint128_t(b2) * r0 + c1;
71,855✔
214
   const uint64_t c2 = carry_shift(d2, 42);
71,855✔
215

216
   h0 = (d0 & M44) + c2 * 5;
71,855✔
217
   h1 = (d1 & M44) + (h0 >> 44);
71,855✔
218
   h0 &= M44;
71,855✔
219
   h2 = d2 & M42;
71,855✔
220
}
221

222
void poly1305_blocks(secure_vector<uint64_t>& X, const uint8_t* m, size_t blocks, bool is_final = false) {
390,947✔
223
   const uint64_t hibit = is_final ? 0 : (static_cast<uint64_t>(1) << 40);
390,947✔
224

225
   // Load r (at R_BASE + 0)
226
   const uint64_t r0 = X[R_BASE + 0];
390,947✔
227
   const uint64_t r1 = X[R_BASE + 1];
390,947✔
228
   const uint64_t r2 = X[R_BASE + 2];
390,947✔
229
   const uint64_t s1 = r1 * 20;
390,947✔
230
   const uint64_t s2 = r2 * 20;
390,947✔
231

232
   // Load r^2 (at R_BASE + 3)
233
   const uint64_t rr0 = X[R_BASE + 3];
390,947✔
234
   const uint64_t rr1 = X[R_BASE + 4];
390,947✔
235
   const uint64_t rr2 = X[R_BASE + 5];
390,947✔
236

237
   // Precompute
238
   const uint64_t ss1 = rr1 * 20;
390,947✔
239
   const uint64_t ss2 = rr2 * 20;
390,947✔
240

241
   // Load accumulator
242
   uint64_t h0 = X[H_BASE + 0];
390,947✔
243
   uint64_t h1 = X[H_BASE + 1];
390,947✔
244
   uint64_t h2 = X[H_BASE + 2];
390,947✔
245

246
   while(blocks >= 2) {
462,802✔
247
      poly1305_block_pair(h0, h1, h2, r0, r1, r2, s1, s2, rr0, rr1, rr2, ss1, ss2, m, hibit);
71,855✔
248
      m += 32;
71,855✔
249
      blocks -= 2;
71,855✔
250
   }
251

252
   // Final block?
253
   if(blocks > 0) {
390,947✔
254
      poly1305_block_single(h0, h1, h2, r0, r1, r2, s1, s2, m, hibit);
740,136✔
255
   }
256

257
   // Store accumulator
258
   X[H_BASE + 0] = h0;
390,947✔
259
   X[H_BASE + 1] = h1;
390,947✔
260
   X[H_BASE + 2] = h2;
390,947✔
261
}
390,947✔
262

263
void poly1305_finish(secure_vector<uint64_t>& X, uint8_t mac[16]) {
112,762✔
264
   constexpr uint64_t M44 = 0xFFFFFFFFFFF;
112,762✔
265
   constexpr uint64_t M42 = 0x3FFFFFFFFFF;
112,762✔
266

267
   /* fully carry h */
268
   uint64_t h0 = X[H_BASE + 0];
112,762✔
269
   uint64_t h1 = X[H_BASE + 1];
112,762✔
270
   uint64_t h2 = X[H_BASE + 2];
112,762✔
271

272
   uint64_t c = (h1 >> 44);
112,762✔
273
   h1 &= M44;
112,762✔
274
   h2 += c;
112,762✔
275
   c = (h2 >> 42);
112,762✔
276
   h2 &= M42;
112,762✔
277
   h0 += c * 5;
112,762✔
278
   c = (h0 >> 44);
112,762✔
279
   h0 &= M44;
112,762✔
280
   h1 += c;
112,762✔
281
   c = (h1 >> 44);
112,762✔
282
   h1 &= M44;
112,762✔
283
   h2 += c;
112,762✔
284
   c = (h2 >> 42);
112,762✔
285
   h2 &= M42;
112,762✔
286
   h0 += c * 5;
112,762✔
287
   c = (h0 >> 44);
112,762✔
288
   h0 &= M44;
112,762✔
289
   h1 += c;
112,762✔
290

291
   /* compute h + -p */
292
   uint64_t g0 = h0 + 5;
112,762✔
293
   c = (g0 >> 44);
112,762✔
294
   g0 &= M44;
112,762✔
295
   uint64_t g1 = h1 + c;
112,762✔
296
   c = (g1 >> 44);
112,762✔
297
   g1 &= M44;
112,762✔
298
   const uint64_t g2 = h2 + c - (static_cast<uint64_t>(1) << 42);
112,762✔
299

300
   /* select h if h < p, or h + -p if h >= p */
301
   const auto c_mask = CT::Mask<uint64_t>::expand(c);
112,762✔
302
   h0 = c_mask.select(g0, h0);
112,762✔
303
   h1 = c_mask.select(g1, h1);
112,762✔
304
   h2 = c_mask.select(g2, h2);
112,762✔
305

306
   /* h = (h + pad) */
307
   const uint64_t t0 = X[PAD_BASE + 0];
112,762✔
308
   const uint64_t t1 = X[PAD_BASE + 1];
112,762✔
309

310
   h0 += ((t0)&M44);
112,762✔
311
   c = (h0 >> 44);
112,762✔
312
   h0 &= M44;
112,762✔
313
   h1 += (((t0 >> 44) | (t1 << 20)) & M44) + c;
112,762✔
314
   c = (h1 >> 44);
112,762✔
315
   h1 &= M44;
112,762✔
316
   h2 += (((t1 >> 24)) & M42) + c;
112,762✔
317
   h2 &= M42;
112,762✔
318

319
   /* mac = h % (2^128) */
320
   h0 = ((h0) | (h1 << 44));
112,762✔
321
   h1 = ((h1 >> 20) | (h2 << 24));
112,762✔
322

323
   store_le(mac, h0, h1);
112,762✔
324

325
   /* zero out the state */
326
   clear_mem(X.data(), X.size());
112,762✔
327
}
112,762✔
328

329
}  // namespace
330

331
void Poly1305::clear() {
7,186✔
332
   zap(m_poly);
7,186✔
333
   m_buffer.clear();
7,186✔
334
}
7,186✔
335

336
bool Poly1305::has_keying_material() const {
1,215,121✔
337
   // Minimum size: pad(2) + accum(3) + r(3) + r^2(3) = 11
338
   return m_poly.size() >= 11;
1,215,121✔
339
}
340

341
void Poly1305::key_schedule(std::span<const uint8_t> key) {
126,790✔
342
   m_buffer.clear();
126,790✔
343

344
   poly1305_init(m_poly, key.data());
126,790✔
345
}
126,790✔
346

347
std::string Poly1305::provider() const {
172✔
348
#if defined(BOTAN_HAS_POLY1305_AVX512)
349
   if(auto feat = CPUID::check(CPUID::Feature::AVX512)) {
172✔
350
      return *feat;
×
351
   }
×
352
#endif
353

354
#if defined(BOTAN_HAS_POLY1305_AVX2)
355
   if(auto feat = CPUID::check(CPUID::Feature::AVX2)) {
172✔
356
      return *feat;
172✔
357
   }
86✔
358
#endif
359

360
   return "base";
86✔
361
}
362

363
void Poly1305::add_data(std::span<const uint8_t> input) {
1,101,671✔
364
   assert_key_material_set();
1,101,671✔
365

366
   BufferSlicer in(input);
1,077,387✔
367

368
   while(!in.empty()) {
3,250,769✔
369
      if(const auto one_block = m_buffer.handle_unaligned_data(in)) {
1,095,995✔
370
         poly1305_blocks(m_poly, one_block->data(), 1);
348,668✔
371
      }
372

373
      if(m_buffer.in_alignment()) {
1,095,995✔
374
         const auto [aligned_data, full_blocks] = m_buffer.aligned_data_to_process(in);
391,873✔
375
         if(full_blocks > 0) {
391,873✔
376
            const uint8_t* data_ptr = aligned_data.data();
43,389✔
377
            size_t blocks_remaining = full_blocks;
43,389✔
378

379
#if defined(BOTAN_HAS_POLY1305_AVX512)
380
            if(blocks_remaining >= 8 * 3 && CPUID::has(CPUID::Feature::AVX512)) {
43,389✔
381
               // Lazily compute r^3 through r^8 on first AVX512 use
382
               poly1305_extend_powers(m_poly, 8);
×
383
               const size_t processed = poly1305_avx512_blocks(m_poly, data_ptr, blocks_remaining);
×
384
               data_ptr += processed * 16;
×
385
               blocks_remaining -= processed;
×
386
            }
387
#endif
388

389
#if defined(BOTAN_HAS_POLY1305_AVX2)
390
            if(blocks_remaining >= 4 * 6 && CPUID::has(CPUID::Feature::AVX2)) {
43,389✔
391
               // Lazily compute r^3 and r^4 on first AVX2 use
392
               poly1305_extend_powers(m_poly, 4);
2,362✔
393
               const size_t processed = poly1305_avx2_blocks(m_poly, data_ptr, blocks_remaining);
2,362✔
394
               data_ptr += processed * 16;
2,362✔
395
               blocks_remaining -= processed;
2,362✔
396
            }
397
#endif
398

399
            if(blocks_remaining > 0) {
43,389✔
400
               poly1305_blocks(m_poly, data_ptr, blocks_remaining);
41,459✔
401
            }
402
         }
403
      }
404
   }
405
}
1,077,387✔
406

407
void Poly1305::final_result(std::span<uint8_t> out) {
112,934✔
408
   assert_key_material_set();
112,934✔
409

410
   if(!m_buffer.in_alignment()) {
112,762✔
411
      const uint8_t final_byte = 0x01;
820✔
412
      m_buffer.append({&final_byte, 1});
820✔
413
      m_buffer.fill_up_with_zeros();
820✔
414
      poly1305_blocks(m_poly, m_buffer.consume().data(), 1, true);
820✔
415
   }
416

417
   poly1305_finish(m_poly, out.data());
112,762✔
418

419
   m_poly.clear();
112,762✔
420
   m_buffer.clear();
112,762✔
421
}
112,762✔
422

423
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc