• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 13535848071

26 Feb 2025 03:22AM UTC coverage: 91.695% (-0.001%) from 91.696%
13535848071

push

github

web-flow
Merge pull request #4718 from randombit/jack/split-cpuid

Make cpuid module optional

95832 of 104512 relevant lines covered (91.69%)

11266034.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.49
/src/lib/block/aes/aes.cpp
1
/*
2
* (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/internal/aes.h>
8

9
#include <botan/internal/bit_ops.h>
10
#include <botan/internal/bswap.h>
11
#include <botan/internal/ct_utils.h>
12
#include <botan/internal/loadstor.h>
13
#include <botan/internal/rotate.h>
14

15
#if defined(BOTAN_HAS_CPUID)
16
   #include <botan/internal/cpuid.h>
17
#endif
18

19
namespace Botan {
20

21
#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
22
   #define BOTAN_HAS_HW_AES_SUPPORT
23
#endif
24

25
/*
26
* One of three AES implementation strategies are used to get a constant time
27
* implementation which is immune to common cache/timing based side channels:
28
*
29
* - If AES hardware support is available (AES-NI, POWER8, Aarch64) use that
30
*
31
* - If 128-bit SIMD with byte shuffles are available (SSSE3, NEON, or Altivec),
32
*   use the vperm technique published by Mike Hamburg at CHES 2009.
33
*
34
* - If no hardware or SIMD support, fall back to a constant time bitsliced
35
*   implementation. This uses 32-bit words resulting in 2 blocks being processed
36
*   in parallel. Moving to 4 blocks (with 64-bit words) would approximately
37
*   double performance on 64-bit CPUs. Likewise moving to 128 bit SIMD would
38
*   again approximately double performance vs 64-bit. However the assumption is
39
*   that most 64-bit CPUs either have hardware AES or SIMD shuffle support and
40
*   that the majority of users falling back to this code will be 32-bit cores.
41
*   If this assumption proves to be unsound, the bitsliced code can easily be
42
*   extended to operate on either 32 or 64 bit words depending on the native
43
*   wordsize of the target processor.
44
*
45
* Useful references
46
*
47
* - "Accelerating AES with Vector Permute Instructions" Mike Hamburg
48
*   https://www.shiftleft.org/papers/vector_aes/vector_aes.pdf
49
*
50
* - "Faster and Timing-Attack Resistant AES-GCM" Käsper and Schwabe
51
*   https://eprint.iacr.org/2009/129.pdf
52
*
53
* - "A new combinational logic minimization technique with applications to cryptology."
54
*   Boyar and Peralta https://eprint.iacr.org/2009/191.pdf
55
*
56
* - "A depth-16 circuit for the AES S-box" Boyar and Peralta
57
*    https://eprint.iacr.org/2011/332.pdf
58
*
59
* - "A Very Compact S-box for AES" Canright
60
*   https://www.iacr.org/archive/ches2005/032.pdf
61
*   https://core.ac.uk/download/pdf/36694529.pdf (extended)
62
*/
63

64
namespace {
65

66
/*
67
This is an AES sbox circuit which can execute in bitsliced mode up to 32x in
68
parallel.
69

70
The circuit is from the "Circuit Minimization Team" group
71
http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
72
http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt
73

74
This circuit has size 113 and depth 27. In software it is much faster than
75
circuits which are considered faster for hardware purposes (where circuit depth
76
is the critical constraint), because unlike in hardware, on common CPUs we can
77
only execute - at best - 3 or 4 logic operations per cycle. So a smaller circuit
78
is superior. On an x86-64 machine this circuit is about 15% faster than the
79
circuit of size 128 and depth 16 given in "A depth-16 circuit for the AES S-box".
80

81
Another circuit for AES Sbox of size 102 and depth 24 is describted in "New
82
Circuit Minimization Techniques for Smaller and Faster AES SBoxes"
83
[https://eprint.iacr.org/2019/802] however it relies on "non-standard" gates
84
like MUX, NOR, NAND, etc and so in practice in bitsliced software, its size is
85
actually a bit larger than this circuit, as few CPUs have such instructions and
86
otherwise they must be emulated using a sequence of available bit operations.
87
*/
88
void AES_SBOX(uint32_t V[8]) {
98,095✔
89
   const uint32_t U0 = V[0];
98,095✔
90
   const uint32_t U1 = V[1];
98,095✔
91
   const uint32_t U2 = V[2];
98,095✔
92
   const uint32_t U3 = V[3];
98,095✔
93
   const uint32_t U4 = V[4];
98,095✔
94
   const uint32_t U5 = V[5];
98,095✔
95
   const uint32_t U6 = V[6];
98,095✔
96
   const uint32_t U7 = V[7];
98,095✔
97

98
   const uint32_t y14 = U3 ^ U5;
98,095✔
99
   const uint32_t y13 = U0 ^ U6;
98,095✔
100
   const uint32_t y9 = U0 ^ U3;
98,095✔
101
   const uint32_t y8 = U0 ^ U5;
98,095✔
102
   const uint32_t t0 = U1 ^ U2;
98,095✔
103
   const uint32_t y1 = t0 ^ U7;
98,095✔
104
   const uint32_t y4 = y1 ^ U3;
98,095✔
105
   const uint32_t y12 = y13 ^ y14;
98,095✔
106
   const uint32_t y2 = y1 ^ U0;
98,095✔
107
   const uint32_t y5 = y1 ^ U6;
98,095✔
108
   const uint32_t y3 = y5 ^ y8;
98,095✔
109
   const uint32_t t1 = U4 ^ y12;
98,095✔
110
   const uint32_t y15 = t1 ^ U5;
98,095✔
111
   const uint32_t y20 = t1 ^ U1;
98,095✔
112
   const uint32_t y6 = y15 ^ U7;
98,095✔
113
   const uint32_t y10 = y15 ^ t0;
98,095✔
114
   const uint32_t y11 = y20 ^ y9;
98,095✔
115
   const uint32_t y7 = U7 ^ y11;
98,095✔
116
   const uint32_t y17 = y10 ^ y11;
98,095✔
117
   const uint32_t y19 = y10 ^ y8;
98,095✔
118
   const uint32_t y16 = t0 ^ y11;
98,095✔
119
   const uint32_t y21 = y13 ^ y16;
98,095✔
120
   const uint32_t y18 = U0 ^ y16;
98,095✔
121
   const uint32_t t2 = y12 & y15;
98,095✔
122
   const uint32_t t3 = y3 & y6;
98,095✔
123
   const uint32_t t4 = t3 ^ t2;
98,095✔
124
   const uint32_t t5 = y4 & U7;
98,095✔
125
   const uint32_t t6 = t5 ^ t2;
98,095✔
126
   const uint32_t t7 = y13 & y16;
98,095✔
127
   const uint32_t t8 = y5 & y1;
98,095✔
128
   const uint32_t t9 = t8 ^ t7;
98,095✔
129
   const uint32_t t10 = y2 & y7;
98,095✔
130
   const uint32_t t11 = t10 ^ t7;
98,095✔
131
   const uint32_t t12 = y9 & y11;
98,095✔
132
   const uint32_t t13 = y14 & y17;
98,095✔
133
   const uint32_t t14 = t13 ^ t12;
98,095✔
134
   const uint32_t t15 = y8 & y10;
98,095✔
135
   const uint32_t t16 = t15 ^ t12;
98,095✔
136
   const uint32_t t17 = t4 ^ y20;
98,095✔
137
   const uint32_t t18 = t6 ^ t16;
98,095✔
138
   const uint32_t t19 = t9 ^ t14;
98,095✔
139
   const uint32_t t20 = t11 ^ t16;
98,095✔
140
   const uint32_t t21 = t17 ^ t14;
98,095✔
141
   const uint32_t t22 = t18 ^ y19;
98,095✔
142
   const uint32_t t23 = t19 ^ y21;
98,095✔
143
   const uint32_t t24 = t20 ^ y18;
98,095✔
144
   const uint32_t t25 = t21 ^ t22;
98,095✔
145
   const uint32_t t26 = t21 & t23;
98,095✔
146
   const uint32_t t27 = t24 ^ t26;
98,095✔
147
   const uint32_t t28 = t25 & t27;
98,095✔
148
   const uint32_t t29 = t28 ^ t22;
98,095✔
149
   const uint32_t t30 = t23 ^ t24;
98,095✔
150
   const uint32_t t31 = t22 ^ t26;
98,095✔
151
   const uint32_t t32 = t31 & t30;
98,095✔
152
   const uint32_t t33 = t32 ^ t24;
98,095✔
153
   const uint32_t t34 = t23 ^ t33;
98,095✔
154
   const uint32_t t35 = t27 ^ t33;
98,095✔
155
   const uint32_t t36 = t24 & t35;
98,095✔
156
   const uint32_t t37 = t36 ^ t34;
98,095✔
157
   const uint32_t t38 = t27 ^ t36;
98,095✔
158
   const uint32_t t39 = t29 & t38;
98,095✔
159
   const uint32_t t40 = t25 ^ t39;
98,095✔
160
   const uint32_t t41 = t40 ^ t37;
98,095✔
161
   const uint32_t t42 = t29 ^ t33;
98,095✔
162
   const uint32_t t43 = t29 ^ t40;
98,095✔
163
   const uint32_t t44 = t33 ^ t37;
98,095✔
164
   const uint32_t t45 = t42 ^ t41;
98,095✔
165
   const uint32_t z0 = t44 & y15;
98,095✔
166
   const uint32_t z1 = t37 & y6;
98,095✔
167
   const uint32_t z2 = t33 & U7;
98,095✔
168
   const uint32_t z3 = t43 & y16;
98,095✔
169
   const uint32_t z4 = t40 & y1;
98,095✔
170
   const uint32_t z5 = t29 & y7;
98,095✔
171
   const uint32_t z6 = t42 & y11;
98,095✔
172
   const uint32_t z7 = t45 & y17;
98,095✔
173
   const uint32_t z8 = t41 & y10;
98,095✔
174
   const uint32_t z9 = t44 & y12;
98,095✔
175
   const uint32_t z10 = t37 & y3;
98,095✔
176
   const uint32_t z11 = t33 & y4;
98,095✔
177
   const uint32_t z12 = t43 & y13;
98,095✔
178
   const uint32_t z13 = t40 & y5;
98,095✔
179
   const uint32_t z14 = t29 & y2;
98,095✔
180
   const uint32_t z15 = t42 & y9;
98,095✔
181
   const uint32_t z16 = t45 & y14;
98,095✔
182
   const uint32_t z17 = t41 & y8;
98,095✔
183
   const uint32_t tc1 = z15 ^ z16;
98,095✔
184
   const uint32_t tc2 = z10 ^ tc1;
98,095✔
185
   const uint32_t tc3 = z9 ^ tc2;
98,095✔
186
   const uint32_t tc4 = z0 ^ z2;
98,095✔
187
   const uint32_t tc5 = z1 ^ z0;
98,095✔
188
   const uint32_t tc6 = z3 ^ z4;
98,095✔
189
   const uint32_t tc7 = z12 ^ tc4;
98,095✔
190
   const uint32_t tc8 = z7 ^ tc6;
98,095✔
191
   const uint32_t tc9 = z8 ^ tc7;
98,095✔
192
   const uint32_t tc10 = tc8 ^ tc9;
98,095✔
193
   const uint32_t tc11 = tc6 ^ tc5;
98,095✔
194
   const uint32_t tc12 = z3 ^ z5;
98,095✔
195
   const uint32_t tc13 = z13 ^ tc1;
98,095✔
196
   const uint32_t tc14 = tc4 ^ tc12;
98,095✔
197
   const uint32_t S3 = tc3 ^ tc11;
98,095✔
198
   const uint32_t tc16 = z6 ^ tc8;
98,095✔
199
   const uint32_t tc17 = z14 ^ tc10;
98,095✔
200
   const uint32_t tc18 = ~tc13 ^ tc14;
98,095✔
201
   const uint32_t S7 = z12 ^ tc18;
98,095✔
202
   const uint32_t tc20 = z15 ^ tc16;
98,095✔
203
   const uint32_t tc21 = tc2 ^ z11;
98,095✔
204
   const uint32_t S0 = tc3 ^ tc16;
98,095✔
205
   const uint32_t S6 = tc10 ^ tc18;
98,095✔
206
   const uint32_t S4 = tc14 ^ S3;
98,095✔
207
   const uint32_t S1 = ~(S3 ^ tc16);
98,095✔
208
   const uint32_t tc26 = tc17 ^ tc20;
98,095✔
209
   const uint32_t S2 = ~(tc26 ^ z17);
98,095✔
210
   const uint32_t S5 = tc21 ^ tc17;
98,095✔
211

212
   V[0] = S0;
98,095✔
213
   V[1] = S1;
98,095✔
214
   V[2] = S2;
98,095✔
215
   V[3] = S3;
98,095✔
216
   V[4] = S4;
98,095✔
217
   V[5] = S5;
98,095✔
218
   V[6] = S6;
98,095✔
219
   V[7] = S7;
98,095✔
220
}
98,095✔
221

222
/*
223
A circuit for inverse AES Sbox of size 121 and depth 21 from
224
http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
225
http://www.cs.yale.edu/homes/peralta/CircuitStuff/Sinv.txt
226
*/
227
void AES_INV_SBOX(uint32_t V[8]) {
33,420✔
228
   const uint32_t U0 = V[0];
33,420✔
229
   const uint32_t U1 = V[1];
33,420✔
230
   const uint32_t U2 = V[2];
33,420✔
231
   const uint32_t U3 = V[3];
33,420✔
232
   const uint32_t U4 = V[4];
33,420✔
233
   const uint32_t U5 = V[5];
33,420✔
234
   const uint32_t U6 = V[6];
33,420✔
235
   const uint32_t U7 = V[7];
33,420✔
236

237
   const uint32_t Y0 = U0 ^ U3;
33,420✔
238
   const uint32_t Y2 = ~(U1 ^ U3);
33,420✔
239
   const uint32_t Y4 = U0 ^ Y2;
33,420✔
240
   const uint32_t RTL0 = U6 ^ U7;
33,420✔
241
   const uint32_t Y1 = Y2 ^ RTL0;
33,420✔
242
   const uint32_t Y7 = ~(U2 ^ Y1);
33,420✔
243
   const uint32_t RTL1 = U3 ^ U4;
33,420✔
244
   const uint32_t Y6 = ~(U7 ^ RTL1);
33,420✔
245
   const uint32_t Y3 = Y1 ^ RTL1;
33,420✔
246
   const uint32_t RTL2 = ~(U0 ^ U2);
33,420✔
247
   const uint32_t Y5 = U5 ^ RTL2;
33,420✔
248
   const uint32_t sa1 = Y0 ^ Y2;
33,420✔
249
   const uint32_t sa0 = Y1 ^ Y3;
33,420✔
250
   const uint32_t sb1 = Y4 ^ Y6;
33,420✔
251
   const uint32_t sb0 = Y5 ^ Y7;
33,420✔
252
   const uint32_t ah = Y0 ^ Y1;
33,420✔
253
   const uint32_t al = Y2 ^ Y3;
33,420✔
254
   const uint32_t aa = sa0 ^ sa1;
33,420✔
255
   const uint32_t bh = Y4 ^ Y5;
33,420✔
256
   const uint32_t bl = Y6 ^ Y7;
33,420✔
257
   const uint32_t bb = sb0 ^ sb1;
33,420✔
258
   const uint32_t ab20 = sa0 ^ sb0;
33,420✔
259
   const uint32_t ab22 = al ^ bl;
33,420✔
260
   const uint32_t ab23 = Y3 ^ Y7;
33,420✔
261
   const uint32_t ab21 = sa1 ^ sb1;
33,420✔
262
   const uint32_t abcd1 = ah & bh;
33,420✔
263
   const uint32_t rr1 = Y0 & Y4;
33,420✔
264
   const uint32_t ph11 = ab20 ^ abcd1;
33,420✔
265
   const uint32_t t01 = Y1 & Y5;
33,420✔
266
   const uint32_t ph01 = t01 ^ abcd1;
33,420✔
267
   const uint32_t abcd2 = al & bl;
33,420✔
268
   const uint32_t r1 = Y2 & Y6;
33,420✔
269
   const uint32_t pl11 = ab22 ^ abcd2;
33,420✔
270
   const uint32_t r2 = Y3 & Y7;
33,420✔
271
   const uint32_t pl01 = r2 ^ abcd2;
33,420✔
272
   const uint32_t r3 = sa0 & sb0;
33,420✔
273
   const uint32_t vr1 = aa & bb;
33,420✔
274
   const uint32_t pr1 = vr1 ^ r3;
33,420✔
275
   const uint32_t wr1 = sa1 & sb1;
33,420✔
276
   const uint32_t qr1 = wr1 ^ r3;
33,420✔
277
   const uint32_t ab0 = ph11 ^ rr1;
33,420✔
278
   const uint32_t ab1 = ph01 ^ ab21;
33,420✔
279
   const uint32_t ab2 = pl11 ^ r1;
33,420✔
280
   const uint32_t ab3 = pl01 ^ qr1;
33,420✔
281
   const uint32_t cp1 = ab0 ^ pr1;
33,420✔
282
   const uint32_t cp2 = ab1 ^ qr1;
33,420✔
283
   const uint32_t cp3 = ab2 ^ pr1;
33,420✔
284
   const uint32_t cp4 = ab3 ^ ab23;
33,420✔
285
   const uint32_t tinv1 = cp3 ^ cp4;
33,420✔
286
   const uint32_t tinv2 = cp3 & cp1;
33,420✔
287
   const uint32_t tinv3 = cp2 ^ tinv2;
33,420✔
288
   const uint32_t tinv4 = cp1 ^ cp2;
33,420✔
289
   const uint32_t tinv5 = cp4 ^ tinv2;
33,420✔
290
   const uint32_t tinv6 = tinv5 & tinv4;
33,420✔
291
   const uint32_t tinv7 = tinv3 & tinv1;
33,420✔
292
   const uint32_t d2 = cp4 ^ tinv7;
33,420✔
293
   const uint32_t d0 = cp2 ^ tinv6;
33,420✔
294
   const uint32_t tinv8 = cp1 & cp4;
33,420✔
295
   const uint32_t tinv9 = tinv4 & tinv8;
33,420✔
296
   const uint32_t tinv10 = tinv4 ^ tinv2;
33,420✔
297
   const uint32_t d1 = tinv9 ^ tinv10;
33,420✔
298
   const uint32_t tinv11 = cp2 & cp3;
33,420✔
299
   const uint32_t tinv12 = tinv1 & tinv11;
33,420✔
300
   const uint32_t tinv13 = tinv1 ^ tinv2;
33,420✔
301
   const uint32_t d3 = tinv12 ^ tinv13;
33,420✔
302
   const uint32_t sd1 = d1 ^ d3;
33,420✔
303
   const uint32_t sd0 = d0 ^ d2;
33,420✔
304
   const uint32_t dl = d0 ^ d1;
33,420✔
305
   const uint32_t dh = d2 ^ d3;
33,420✔
306
   const uint32_t dd = sd0 ^ sd1;
33,420✔
307
   const uint32_t abcd3 = dh & bh;
33,420✔
308
   const uint32_t rr2 = d3 & Y4;
33,420✔
309
   const uint32_t t02 = d2 & Y5;
33,420✔
310
   const uint32_t abcd4 = dl & bl;
33,420✔
311
   const uint32_t r4 = d1 & Y6;
33,420✔
312
   const uint32_t r5 = d0 & Y7;
33,420✔
313
   const uint32_t r6 = sd0 & sb0;
33,420✔
314
   const uint32_t vr2 = dd & bb;
33,420✔
315
   const uint32_t wr2 = sd1 & sb1;
33,420✔
316
   const uint32_t abcd5 = dh & ah;
33,420✔
317
   const uint32_t r7 = d3 & Y0;
33,420✔
318
   const uint32_t r8 = d2 & Y1;
33,420✔
319
   const uint32_t abcd6 = dl & al;
33,420✔
320
   const uint32_t r9 = d1 & Y2;
33,420✔
321
   const uint32_t r10 = d0 & Y3;
33,420✔
322
   const uint32_t r11 = sd0 & sa0;
33,420✔
323
   const uint32_t vr3 = dd & aa;
33,420✔
324
   const uint32_t wr3 = sd1 & sa1;
33,420✔
325
   const uint32_t ph12 = rr2 ^ abcd3;
33,420✔
326
   const uint32_t ph02 = t02 ^ abcd3;
33,420✔
327
   const uint32_t pl12 = r4 ^ abcd4;
33,420✔
328
   const uint32_t pl02 = r5 ^ abcd4;
33,420✔
329
   const uint32_t pr2 = vr2 ^ r6;
33,420✔
330
   const uint32_t qr2 = wr2 ^ r6;
33,420✔
331
   const uint32_t p0 = ph12 ^ pr2;
33,420✔
332
   const uint32_t p1 = ph02 ^ qr2;
33,420✔
333
   const uint32_t p2 = pl12 ^ pr2;
33,420✔
334
   const uint32_t p3 = pl02 ^ qr2;
33,420✔
335
   const uint32_t ph13 = r7 ^ abcd5;
33,420✔
336
   const uint32_t ph03 = r8 ^ abcd5;
33,420✔
337
   const uint32_t pl13 = r9 ^ abcd6;
33,420✔
338
   const uint32_t pl03 = r10 ^ abcd6;
33,420✔
339
   const uint32_t pr3 = vr3 ^ r11;
33,420✔
340
   const uint32_t qr3 = wr3 ^ r11;
33,420✔
341
   const uint32_t p4 = ph13 ^ pr3;
33,420✔
342
   const uint32_t S7 = ph03 ^ qr3;
33,420✔
343
   const uint32_t p6 = pl13 ^ pr3;
33,420✔
344
   const uint32_t p7 = pl03 ^ qr3;
33,420✔
345
   const uint32_t S3 = p1 ^ p6;
33,420✔
346
   const uint32_t S6 = p2 ^ p6;
33,420✔
347
   const uint32_t S0 = p3 ^ p6;
33,420✔
348
   const uint32_t X11 = p0 ^ p2;
33,420✔
349
   const uint32_t S5 = S0 ^ X11;
33,420✔
350
   const uint32_t X13 = p4 ^ p7;
33,420✔
351
   const uint32_t X14 = X11 ^ X13;
33,420✔
352
   const uint32_t S1 = S3 ^ X14;
33,420✔
353
   const uint32_t X16 = p1 ^ S7;
33,420✔
354
   const uint32_t S2 = X14 ^ X16;
33,420✔
355
   const uint32_t X18 = p0 ^ p4;
33,420✔
356
   const uint32_t X19 = S5 ^ X16;
33,420✔
357
   const uint32_t S4 = X18 ^ X19;
33,420✔
358

359
   V[0] = S0;
33,420✔
360
   V[1] = S1;
33,420✔
361
   V[2] = S2;
33,420✔
362
   V[3] = S3;
33,420✔
363
   V[4] = S4;
33,420✔
364
   V[5] = S5;
33,420✔
365
   V[6] = S6;
33,420✔
366
   V[7] = S7;
33,420✔
367
}
33,420✔
368

369
inline void bit_transpose(uint32_t B[8]) {
13,668✔
370
   swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
13,668✔
371
   swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
13,668✔
372
   swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1);
13,668✔
373
   swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1);
13,668✔
374

375
   swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
13,668✔
376
   swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
13,668✔
377
   swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2);
13,668✔
378
   swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2);
13,668✔
379

380
   swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
13,668✔
381
   swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
13,668✔
382
   swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
13,668✔
383
   swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
13,668✔
384
}
13,668✔
385

386
inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r) {
75,560✔
387
   /*
388
   This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation
389
   due to knowing the first and second halves are the same data.
390
   */
391
   for(size_t i = 0; i != 4; ++i) {
377,800✔
392
      B[i] = K[r + i];
302,240✔
393
   }
394

395
   swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
75,560✔
396
   swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
75,560✔
397

398
   swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
75,560✔
399
   swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
75,560✔
400

401
   B[4] = B[0];
75,560✔
402
   B[5] = B[1];
75,560✔
403
   B[6] = B[2];
75,560✔
404
   B[7] = B[3];
75,560✔
405

406
   swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
75,560✔
407
   swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
75,560✔
408
   swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
75,560✔
409
   swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
75,560✔
410
}
75,560✔
411

412
inline void shift_rows(uint32_t B[8]) {
49,882✔
413
   // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31
414
   if constexpr(HasNative64BitRegisters) {
49,882✔
415
      for(size_t i = 0; i != 8; i += 2) {
249,410✔
416
         uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1];
199,528✔
417
         x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
199,528✔
418
         x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
199,528✔
419
         B[i] = static_cast<uint32_t>(x >> 32);
199,528✔
420
         B[i + 1] = static_cast<uint32_t>(x);
199,528✔
421
      }
422
   } else {
423
      for(size_t i = 0; i != 8; ++i) {
424
         uint32_t x = B[i];
425
         x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
426
         x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
427
         B[i] = x;
428
      }
429
   }
430
}
49,882✔
431

432
inline void inv_shift_rows(uint32_t B[8]) {
33,420✔
433
   // Inverse of shift_rows, just inverting the steps
434

435
   if constexpr(HasNative64BitRegisters) {
33,420✔
436
      for(size_t i = 0; i != 8; i += 2) {
167,100✔
437
         uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1];
133,680✔
438
         x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
133,680✔
439
         x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
133,680✔
440
         B[i] = static_cast<uint32_t>(x >> 32);
133,680✔
441
         B[i + 1] = static_cast<uint32_t>(x);
133,680✔
442
      }
443
   } else {
444
      for(size_t i = 0; i != 8; ++i) {
445
         uint32_t x = B[i];
446
         x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
447
         x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
448
         B[i] = x;
449
      }
450
   }
451
}
33,420✔
452

453
inline void mix_columns(uint32_t B[8]) {
76,468✔
454
   // carry high bits in B[0] to positions in 0x1b == 0b11011
455
   const uint32_t X2[8] = {
76,468✔
456
      B[1],
457
      B[2],
458
      B[3],
459
      B[4] ^ B[0],
76,468✔
460
      B[5] ^ B[0],
76,468✔
461
      B[6],
462
      B[7] ^ B[0],
76,468✔
463
      B[0],
464
   };
76,468✔
465

466
   for(size_t i = 0; i != 8; i++) {
688,212✔
467
      const uint32_t X3 = B[i] ^ X2[i];
611,744✔
468
      B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3);
611,744✔
469
   }
470
}
76,468✔
471

472
void inv_mix_columns(uint32_t B[8]) {
30,678✔
473
   /*
474
   OpenSSL's bsaes implementation credits Jussi Kivilinna with the lovely
475
   matrix decomposition
476

477
   | 0e 0b 0d 09 |   | 02 03 01 01 |   | 05 00 04 00 |
478
   | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
479
   | 0d 09 0e 0b |   | 01 01 02 03 |   | 04 00 05 00 |
480
   | 0b 0d 09 0e |   | 03 01 01 02 |   | 00 04 00 05 |
481

482
   Notice the first component is simply the MixColumns matrix. So we can
483
   multiply first by (05,00,04,00) then perform MixColumns to get the equivalent
484
   of InvMixColumn.
485
   */
486
   const uint32_t X4[8] = {
30,678✔
487
      B[2],
488
      B[3],
489
      B[4] ^ B[0],
30,678✔
490
      B[5] ^ B[0] ^ B[1],
30,678✔
491
      B[6] ^ B[1],
30,678✔
492
      B[7] ^ B[0],
30,678✔
493
      B[0] ^ B[1],
30,678✔
494
      B[1],
495
   };
30,678✔
496

497
   for(size_t i = 0; i != 8; i++) {
276,102✔
498
      const uint32_t X5 = X4[i] ^ B[i];
245,424✔
499
      B[i] = X5 ^ rotr<16>(X4[i]);
245,424✔
500
   }
501

502
   mix_columns(B);
30,678✔
503
}
30,678✔
504

505
/*
506
* AES Encryption
507
*/
508
void aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& EK) {
4,050✔
509
   BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set");
4,050✔
510

511
   const size_t rounds = (EK.size() - 4) / 4;
4,050✔
512

513
   uint32_t KS[13 * 8] = {0};  // actual maximum is (rounds - 1) * 8
4,050✔
514
   for(size_t i = 0; i < rounds - 1; i += 1) {
49,386✔
515
      ks_expand(&KS[8 * i], EK.data(), 4 * i + 4);
45,336✔
516
   }
517

518
   const size_t BLOCK_SIZE = 16;
4,050✔
519
   const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE;
4,050✔
520

521
   while(blocks > 0) {
8,142✔
522
      const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
4,092✔
523

524
      uint32_t B[8] = {0};
4,092✔
525

526
      load_be(B, in, this_loop * 4);
4,092✔
527

528
      CT::poison(B, 8);
529

530
      for(size_t i = 0; i != 8; ++i) {
36,828✔
531
         B[i] ^= EK[i % 4];
32,736✔
532
      }
533

534
      bit_transpose(B);
4,092✔
535

536
      for(size_t r = 0; r != rounds - 1; ++r) {
49,882✔
537
         AES_SBOX(B);
45,790✔
538
         shift_rows(B);
45,790✔
539
         mix_columns(B);
45,790✔
540

541
         for(size_t i = 0; i != 8; ++i) {
412,110✔
542
            B[i] ^= KS[8 * r + i];
366,320✔
543
         }
544
      }
545

546
      // Final round:
547
      AES_SBOX(B);
4,092✔
548
      shift_rows(B);
4,092✔
549
      bit_transpose(B);
4,092✔
550

551
      for(size_t i = 0; i != 8; ++i) {
36,828✔
552
         B[i] ^= EK[4 * rounds + i % 4];
32,736✔
553
      }
554

555
      CT::unpoison(B, 8);
4,092✔
556

557
      copy_out_be(std::span(out, this_loop * 4 * sizeof(uint32_t)), B);
4,092✔
558

559
      in += this_loop * BLOCK_SIZE;
4,092✔
560
      out += this_loop * BLOCK_SIZE;
4,092✔
561
      blocks -= this_loop;
4,092✔
562
   }
563
}
4,050✔
564

565
/*
566
* AES Decryption
567
*/
568
void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& DK) {
2,700✔
569
   BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set");
2,700✔
570

571
   const size_t rounds = (DK.size() - 4) / 4;
2,700✔
572

573
   uint32_t KS[13 * 8] = {0};  // actual maximum is (rounds - 1) * 8
2,700✔
574
   for(size_t i = 0; i < rounds - 1; i += 1) {
32,924✔
575
      ks_expand(&KS[8 * i], DK.data(), 4 * i + 4);
30,224✔
576
   }
577

578
   const size_t BLOCK_SIZE = 16;
2,700✔
579
   const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE;
2,700✔
580

581
   while(blocks > 0) {
5,442✔
582
      const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
2,742✔
583

584
      uint32_t B[8] = {0};
2,742✔
585

586
      CT::poison(B, 8);
2,742✔
587

588
      load_be(B, in, this_loop * 4);
2,742✔
589

590
      for(size_t i = 0; i != 8; ++i) {
24,678✔
591
         B[i] ^= DK[i % 4];
21,936✔
592
      }
593

594
      bit_transpose(B);
2,742✔
595

596
      for(size_t r = 0; r != rounds - 1; ++r) {
33,420✔
597
         AES_INV_SBOX(B);
30,678✔
598
         inv_shift_rows(B);
30,678✔
599
         inv_mix_columns(B);
30,678✔
600

601
         for(size_t i = 0; i != 8; ++i) {
276,102✔
602
            B[i] ^= KS[8 * r + i];
245,424✔
603
         }
604
      }
605

606
      // Final round:
607
      AES_INV_SBOX(B);
2,742✔
608
      inv_shift_rows(B);
2,742✔
609
      bit_transpose(B);
2,742✔
610

611
      for(size_t i = 0; i != 8; ++i) {
24,678✔
612
         B[i] ^= DK[4 * rounds + i % 4];
21,936✔
613
      }
614

615
      CT::unpoison(B, 8);
2,742✔
616

617
      copy_out_be(std::span(out, this_loop * 4 * sizeof(uint32_t)), B);
2,742✔
618

619
      in += this_loop * BLOCK_SIZE;
2,742✔
620
      out += this_loop * BLOCK_SIZE;
2,742✔
621
      blocks -= this_loop;
2,742✔
622
   }
623
}
2,700✔
624

625
inline uint32_t xtime32(uint32_t s) {
204,924✔
626
   const uint32_t lo_bit = 0x01010101;
204,924✔
627
   const uint32_t mask = 0x7F7F7F7F;
204,924✔
628
   const uint32_t poly = 0x1B;
204,924✔
629

630
   return ((s & mask) << 1) ^ (((s >> 7) & lo_bit) * poly);
204,924✔
631
}
632

633
inline uint32_t InvMixColumn(uint32_t s1) {
204,924✔
634
   const uint32_t s2 = xtime32(s1);
204,924✔
635
   const uint32_t s4 = xtime32(s2);
204,924✔
636
   const uint32_t s8 = xtime32(s4);
204,924✔
637
   const uint32_t s9 = s8 ^ s1;
204,924✔
638
   const uint32_t s11 = s9 ^ s2;
204,924✔
639
   const uint32_t s13 = s9 ^ s4;
204,924✔
640
   const uint32_t s14 = s8 ^ s4 ^ s2;
204,924✔
641

642
   return s14 ^ rotr<8>(s9) ^ rotr<16>(s13) ^ rotr<24>(s11);
204,924✔
643
}
644

645
void InvMixColumn_x4(uint32_t x[4]) {
51,231✔
646
   x[0] = InvMixColumn(x[0]);
51,231✔
647
   x[1] = InvMixColumn(x[1]);
51,231✔
648
   x[2] = InvMixColumn(x[2]);
51,231✔
649
   x[3] = InvMixColumn(x[3]);
51,231✔
650
}
51,231✔
651

652
uint32_t SE_word(uint32_t x) {
48,213✔
653
   uint32_t I[8] = {0};
48,213✔
654

655
   for(size_t i = 0; i != 8; ++i) {
433,917✔
656
      I[i] = (x >> (7 - i)) & 0x01010101;
385,704✔
657
   }
658

659
   AES_SBOX(I);
48,213✔
660

661
   x = 0;
48,213✔
662

663
   for(size_t i = 0; i != 8; ++i) {
433,917✔
664
      x |= ((I[i] & 0x01010101) << (7 - i));
385,704✔
665
   }
666

667
   return x;
48,213✔
668
}
669

670
void aes_key_schedule(const uint8_t key[],
4,605✔
671
                      size_t length,
672
                      secure_vector<uint32_t>& EK,
673
                      secure_vector<uint32_t>& DK,
674
                      bool bswap_keys = false) {
675
   static const uint32_t RC[10] = {0x01000000,
4,605✔
676
                                   0x02000000,
677
                                   0x04000000,
678
                                   0x08000000,
679
                                   0x10000000,
680
                                   0x20000000,
681
                                   0x40000000,
682
                                   0x80000000,
683
                                   0x1B000000,
684
                                   0x36000000};
685

686
   const size_t X = length / 4;
4,605✔
687

688
   // Can't happen, but make static analyzers happy
689
   BOTAN_ASSERT_NOMSG(X == 4 || X == 6 || X == 8);
4,605✔
690

691
   const size_t rounds = (length / 4) + 6;
4,605✔
692

693
   // Help the optimizer
694
   BOTAN_ASSERT_NOMSG(rounds == 10 || rounds == 12 || rounds == 14);
4,605✔
695

696
   CT::poison(key, length);
4,605✔
697

698
   EK.resize(length + 28);
4,605✔
699
   DK.resize(length + 28);
4,605✔
700

701
   for(size_t i = 0; i != X; ++i) {
32,811✔
702
      EK[i] = load_be<uint32_t>(key, i);
28,206✔
703
   }
704

705
   for(size_t i = X; i < 4 * (rounds + 1); i += X) {
42,576✔
706
      EK[i] = EK[i - X] ^ RC[(i - X) / X] ^ rotl<8>(SE_word(EK[i - 1]));
37,971✔
707

708
      for(size_t j = 1; j != X && (i + j) < EK.size(); ++j) {
213,558✔
709
         EK[i + j] = EK[i + j - X];
175,587✔
710

711
         if(X == 8 && j == 4) {
175,587✔
712
            EK[i + j] ^= SE_word(EK[i + j - 1]);
10,242✔
713
         } else {
714
            EK[i + j] ^= EK[i + j - 1];
165,345✔
715
         }
716
      }
717
   }
718

719
   for(size_t i = 0; i != 4 * (rounds + 1); i += 4) {
65,046✔
720
      DK[i] = EK[4 * rounds - i];
60,441✔
721
      DK[i + 1] = EK[4 * rounds - i + 1];
60,441✔
722
      DK[i + 2] = EK[4 * rounds - i + 2];
60,441✔
723
      DK[i + 3] = EK[4 * rounds - i + 3];
60,441✔
724
   }
725

726
   for(size_t i = 4; i != 4 * rounds; i += 4) {
55,836✔
727
      InvMixColumn_x4(&DK[i]);
51,231✔
728
   }
729

730
   if(bswap_keys) {
4,605✔
731
      // HW AES on little endian needs the subkeys to be byte reversed
732
      for(size_t i = 0; i != EK.size(); ++i) {
28,575✔
733
         EK[i] = reverse_bytes(EK[i]);
28,020✔
734
      }
735
      for(size_t i = 0; i != DK.size(); ++i) {
28,575✔
736
         DK[i] = reverse_bytes(DK[i]);
28,020✔
737
      }
738
   }
739

740
   CT::unpoison(EK.data(), EK.size());
4,605✔
741
   CT::unpoison(DK.data(), DK.size());
4,605✔
742
   CT::unpoison(key, length);
4,605✔
743
}
4,605✔
744

745
size_t aes_parallelism() {
37,354✔
746
#if defined(BOTAN_HAS_AES_VAES)
747
   if(CPUID::has_avx2_vaes()) {
37,354✔
748
      return 8;  // pipelined
749
   }
750
#endif
751

752
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
753
   if(CPUID::has_hw_aes()) {
12,150✔
754
      return 4;  // pipelined
755
   }
756
#endif
757

758
#if defined(BOTAN_HAS_AES_VPERM)
759
   if(CPUID::has_vperm()) {
8,100✔
760
      return 2;  // pipelined
761
   }
762
#endif
763

764
   // bitsliced:
765
   return 2;
766
}
767

768
const char* aes_provider() {
5,404✔
769
#if defined(BOTAN_HAS_AES_VAES)
770
   if(CPUID::has_avx2_vaes()) {
5,404✔
771
      return "vaes";
772
   }
773
#endif
774

775
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
776
   if(CPUID::has_hw_aes()) {
4,050✔
777
      return "cpu";
778
   }
779
#endif
780

781
#if defined(BOTAN_HAS_AES_VPERM)
782
   if(CPUID::has_vperm()) {
2,700✔
783
      return "vperm";
1,350✔
784
   }
785
#endif
786

787
   return "base";
788
}
789

790
}  // namespace
791

792
std::string AES_128::provider() const {
1,540✔
793
   return aes_provider();
1,540✔
794
}
795

796
std::string AES_192::provider() const {
1,804✔
797
   return aes_provider();
1,804✔
798
}
799

800
std::string AES_256::provider() const {
2,060✔
801
   return aes_provider();
2,060✔
802
}
803

804
size_t AES_128::parallelism() const {
14,409✔
805
   return aes_parallelism();
14,409✔
806
}
807

808
size_t AES_192::parallelism() const {
6,202✔
809
   return aes_parallelism();
6,202✔
810
}
811

812
size_t AES_256::parallelism() const {
16,743✔
813
   return aes_parallelism();
16,743✔
814
}
815

816
bool AES_128::has_keying_material() const {
845,171✔
817
   return !m_EK.empty();
845,171✔
818
}
819

820
bool AES_192::has_keying_material() const {
62,128✔
821
   return !m_EK.empty();
62,128✔
822
}
823

824
bool AES_256::has_keying_material() const {
21,314,503✔
825
   return !m_EK.empty();
21,314,503✔
826
}
827

828
void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
739,297✔
829
   assert_key_material_set();
739,297✔
830

831
#if defined(BOTAN_HAS_AES_VAES)
832
   if(CPUID::has_avx2_vaes()) {
733,594✔
833
      return x86_vaes_encrypt_n(in, out, blocks);
730,138✔
834
   }
835
#endif
836

837
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
838
   if(CPUID::has_hw_aes()) {
3,456✔
839
      return hw_aes_encrypt_n(in, out, blocks);
1,152✔
840
   }
841
#endif
842

843
#if defined(BOTAN_HAS_AES_VPERM)
844
   if(CPUID::has_vperm()) {
2,304✔
845
      return vperm_encrypt_n(in, out, blocks);
1,152✔
846
   }
847
#endif
848

849
   aes_encrypt_n(in, out, blocks, m_EK);
1,152✔
850
}
851

852
void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
39,714✔
853
   assert_key_material_set();
39,714✔
854

855
#if defined(BOTAN_HAS_AES_VAES)
856
   if(CPUID::has_avx2_vaes()) {
36,641✔
857
      return x86_vaes_decrypt_n(in, out, blocks);
34,337✔
858
   }
859
#endif
860

861
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
862
   if(CPUID::has_hw_aes()) {
2,304✔
863
      return hw_aes_decrypt_n(in, out, blocks);
768✔
864
   }
865
#endif
866

867
#if defined(BOTAN_HAS_AES_VPERM)
868
   if(CPUID::has_vperm()) {
1,536✔
869
      return vperm_decrypt_n(in, out, blocks);
768✔
870
   }
871
#endif
872

873
   aes_decrypt_n(in, out, blocks, m_DK);
768✔
874
}
875

876
void AES_128::key_schedule(std::span<const uint8_t> key) {
13,526✔
877
#if defined(BOTAN_HAS_AES_NI)
878
   if(CPUID::has_aes_ni()) {
13,526✔
879
      return aesni_key_schedule(key.data(), key.size());
10,955✔
880
   }
881
#endif
882

883
#if defined(BOTAN_HAS_AES_VAES)
884
   if(CPUID::has_avx2_vaes()) {
2,571✔
885
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
267✔
886
   }
887
#endif
888

889
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
890
   if(CPUID::has_hw_aes()) {
2,304✔
891
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
×
892
   }
893
#endif
894

895
#if defined(BOTAN_HAS_AES_VPERM)
896
   if(CPUID::has_vperm()) {
2,304✔
897
      return vperm_key_schedule(key.data(), key.size());
1,152✔
898
   }
899
#endif
900

901
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,152✔
902
}
903

904
void AES_128::clear() {
7,882✔
905
   zap(m_EK);
7,882✔
906
   zap(m_DK);
7,882✔
907
}
7,882✔
908

909
void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
31,710✔
910
   assert_key_material_set();
31,710✔
911

912
#if defined(BOTAN_HAS_AES_VAES)
913
   if(CPUID::has_avx2_vaes()) {
28,050✔
914
      return x86_vaes_encrypt_n(in, out, blocks);
23,991✔
915
   }
916
#endif
917

918
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
919
   if(CPUID::has_hw_aes()) {
4,059✔
920
      return hw_aes_encrypt_n(in, out, blocks);
1,353✔
921
   }
922
#endif
923

924
#if defined(BOTAN_HAS_AES_VPERM)
925
   if(CPUID::has_vperm()) {
2,706✔
926
      return vperm_encrypt_n(in, out, blocks);
1,353✔
927
   }
928
#endif
929

930
   aes_encrypt_n(in, out, blocks, m_EK);
1,353✔
931
}
932

933
void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
7,978✔
934
   assert_key_material_set();
7,978✔
935

936
#if defined(BOTAN_HAS_AES_VAES)
937
   if(CPUID::has_avx2_vaes()) {
4,370✔
938
      return x86_vaes_decrypt_n(in, out, blocks);
1,664✔
939
   }
940
#endif
941

942
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
943
   if(CPUID::has_hw_aes()) {
2,706✔
944
      return hw_aes_decrypt_n(in, out, blocks);
902✔
945
   }
946
#endif
947

948
#if defined(BOTAN_HAS_AES_VPERM)
949
   if(CPUID::has_vperm()) {
1,804✔
950
      return vperm_decrypt_n(in, out, blocks);
902✔
951
   }
952
#endif
953

954
   aes_decrypt_n(in, out, blocks, m_DK);
902✔
955
}
956

957
void AES_192::key_schedule(std::span<const uint8_t> key) {
6,001✔
958
#if defined(BOTAN_HAS_AES_NI)
959
   if(CPUID::has_aes_ni()) {
6,001✔
960
      return aesni_key_schedule(key.data(), key.size());
3,169✔
961
   }
962
#endif
963

964
#if defined(BOTAN_HAS_AES_VAES)
965
   if(CPUID::has_avx2_vaes()) {
2,832✔
966
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
126✔
967
   }
968
#endif
969

970
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
971
   if(CPUID::has_hw_aes()) {
2,706✔
972
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
×
973
   }
974
#endif
975

976
#if defined(BOTAN_HAS_AES_VPERM)
977
   if(CPUID::has_vperm()) {
2,706✔
978
      return vperm_key_schedule(key.data(), key.size());
1,353✔
979
   }
980
#endif
981

982
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,353✔
983
}
984

985
void AES_192::clear() {
4,078✔
986
   zap(m_EK);
4,078✔
987
   zap(m_DK);
4,078✔
988
}
4,078✔
989

990
void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
425,273✔
991
   assert_key_material_set();
425,273✔
992

993
#if defined(BOTAN_HAS_AES_VAES)
994
   if(CPUID::has_avx2_vaes()) {
421,092✔
995
      return x86_vaes_encrypt_n(in, out, blocks);
416,457✔
996
   }
997
#endif
998

999
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1000
   if(CPUID::has_hw_aes()) {
4,635✔
1001
      return hw_aes_encrypt_n(in, out, blocks);
1,545✔
1002
   }
1003
#endif
1004

1005
#if defined(BOTAN_HAS_AES_VPERM)
1006
   if(CPUID::has_vperm()) {
3,090✔
1007
      return vperm_encrypt_n(in, out, blocks);
1,545✔
1008
   }
1009
#endif
1010

1011
   aes_encrypt_n(in, out, blocks, m_EK);
1,545✔
1012
}
1013

1014
void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
16,129✔
1015
   assert_key_material_set();
16,129✔
1016

1017
#if defined(BOTAN_HAS_AES_VAES)
1018
   if(CPUID::has_avx2_vaes()) {
12,005✔
1019
      return x86_vaes_decrypt_n(in, out, blocks);
8,915✔
1020
   }
1021
#endif
1022

1023
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1024
   if(CPUID::has_hw_aes()) {
3,090✔
1025
      return hw_aes_decrypt_n(in, out, blocks);
1,030✔
1026
   }
1027
#endif
1028

1029
#if defined(BOTAN_HAS_AES_VPERM)
1030
   if(CPUID::has_vperm()) {
2,060✔
1031
      return vperm_decrypt_n(in, out, blocks);
1,030✔
1032
   }
1033
#endif
1034

1035
   aes_decrypt_n(in, out, blocks, m_DK);
1,030✔
1036
}
1037

1038
void AES_256::key_schedule(std::span<const uint8_t> key) {
114,725✔
1039
#if defined(BOTAN_HAS_AES_NI)
1040
   if(CPUID::has_aes_ni()) {
114,725✔
1041
      return aesni_key_schedule(key.data(), key.size());
111,473✔
1042
   }
1043
#endif
1044

1045
#if defined(BOTAN_HAS_AES_VAES)
1046
   if(CPUID::has_avx2_vaes()) {
3,252✔
1047
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
162✔
1048
   }
1049
#endif
1050

1051
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1052
   if(CPUID::has_hw_aes()) {
3,090✔
1053
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, CPUID::is_little_endian());
×
1054
   }
1055
#endif
1056

1057
#if defined(BOTAN_HAS_AES_VPERM)
1058
   if(CPUID::has_vperm()) {
3,090✔
1059
      return vperm_key_schedule(key.data(), key.size());
1,545✔
1060
   }
1061
#endif
1062

1063
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,545✔
1064
}
1065

1066
void AES_256::clear() {
99,902✔
1067
   zap(m_EK);
99,902✔
1068
   zap(m_DK);
99,902✔
1069
}
99,902✔
1070

1071
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc