• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 13597425450

28 Feb 2025 10:16PM UTC coverage: 91.687% (-0.006%) from 91.693%
13597425450

push

github

web-flow
Merge pull request #4728 from randombit/jack/use-std-endian

Use std::endian more, remove endian checks in `CPUID`

95836 of 104525 relevant lines covered (91.69%)

11717992.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.99
/src/lib/block/aes/aes.cpp
1
/*
2
* (C) 1999-2010,2015,2017,2018,2020 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6

7
#include <botan/internal/aes.h>
8

9
#include <botan/internal/bit_ops.h>
10
#include <botan/internal/bswap.h>
11
#include <botan/internal/ct_utils.h>
12
#include <botan/internal/loadstor.h>
13
#include <botan/internal/rotate.h>
14

15
#if defined(BOTAN_HAS_CPUID)
16
   #include <botan/internal/cpuid.h>
17
#endif
18

19
#if defined(BOTAN_HAS_AES_POWER8) || defined(BOTAN_HAS_AES_ARMV8) || defined(BOTAN_HAS_AES_NI)
20
   #define BOTAN_HAS_HW_AES_SUPPORT
21
#endif
22

23
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
24
   #include <bit>
25
#endif
26

27
namespace Botan {
28

29
/*
30
* One of three AES implementation strategies are used to get a constant time
31
* implementation which is immune to common cache/timing based side channels:
32
*
33
* - If AES hardware support is available (AES-NI, POWER8, Aarch64) use that
34
*
35
* - If 128-bit SIMD with byte shuffles are available (SSSE3, NEON, or Altivec),
36
*   use the vperm technique published by Mike Hamburg at CHES 2009.
37
*
38
* - If no hardware or SIMD support, fall back to a constant time bitsliced
39
*   implementation. This uses 32-bit words resulting in 2 blocks being processed
40
*   in parallel. Moving to 4 blocks (with 64-bit words) would approximately
41
*   double performance on 64-bit CPUs. Likewise moving to 128 bit SIMD would
42
*   again approximately double performance vs 64-bit. However the assumption is
43
*   that most 64-bit CPUs either have hardware AES or SIMD shuffle support and
44
*   that the majority of users falling back to this code will be 32-bit cores.
45
*   If this assumption proves to be unsound, the bitsliced code can easily be
46
*   extended to operate on either 32 or 64 bit words depending on the native
47
*   wordsize of the target processor.
48
*
49
* Useful references
50
*
51
* - "Accelerating AES with Vector Permute Instructions" Mike Hamburg
52
*   https://www.shiftleft.org/papers/vector_aes/vector_aes.pdf
53
*
54
* - "Faster and Timing-Attack Resistant AES-GCM" Käsper and Schwabe
55
*   https://eprint.iacr.org/2009/129.pdf
56
*
57
* - "A new combinational logic minimization technique with applications to cryptology."
58
*   Boyar and Peralta https://eprint.iacr.org/2009/191.pdf
59
*
60
* - "A depth-16 circuit for the AES S-box" Boyar and Peralta
61
*    https://eprint.iacr.org/2011/332.pdf
62
*
63
* - "A Very Compact S-box for AES" Canright
64
*   https://www.iacr.org/archive/ches2005/032.pdf
65
*   https://core.ac.uk/download/pdf/36694529.pdf (extended)
66
*/
67

68
namespace {
69

70
/*
71
This is an AES sbox circuit which can execute in bitsliced mode up to 32x in
72
parallel.
73

74
The circuit is from the "Circuit Minimization Team" group
75
http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
76
http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt
77

78
This circuit has size 113 and depth 27. In software it is much faster than
79
circuits which are considered faster for hardware purposes (where circuit depth
80
is the critical constraint), because unlike in hardware, on common CPUs we can
81
only execute - at best - 3 or 4 logic operations per cycle. So a smaller circuit
82
is superior. On an x86-64 machine this circuit is about 15% faster than the
83
circuit of size 128 and depth 16 given in "A depth-16 circuit for the AES S-box".
84

85
Another circuit for AES Sbox of size 102 and depth 24 is describted in "New
86
Circuit Minimization Techniques for Smaller and Faster AES SBoxes"
87
[https://eprint.iacr.org/2019/802] however it relies on "non-standard" gates
88
like MUX, NOR, NAND, etc and so in practice in bitsliced software, its size is
89
actually a bit larger than this circuit, as few CPUs have such instructions and
90
otherwise they must be emulated using a sequence of available bit operations.
91
*/
92
void AES_SBOX(uint32_t V[8]) {
98,095✔
93
   const uint32_t U0 = V[0];
98,095✔
94
   const uint32_t U1 = V[1];
98,095✔
95
   const uint32_t U2 = V[2];
98,095✔
96
   const uint32_t U3 = V[3];
98,095✔
97
   const uint32_t U4 = V[4];
98,095✔
98
   const uint32_t U5 = V[5];
98,095✔
99
   const uint32_t U6 = V[6];
98,095✔
100
   const uint32_t U7 = V[7];
98,095✔
101

102
   const uint32_t y14 = U3 ^ U5;
98,095✔
103
   const uint32_t y13 = U0 ^ U6;
98,095✔
104
   const uint32_t y9 = U0 ^ U3;
98,095✔
105
   const uint32_t y8 = U0 ^ U5;
98,095✔
106
   const uint32_t t0 = U1 ^ U2;
98,095✔
107
   const uint32_t y1 = t0 ^ U7;
98,095✔
108
   const uint32_t y4 = y1 ^ U3;
98,095✔
109
   const uint32_t y12 = y13 ^ y14;
98,095✔
110
   const uint32_t y2 = y1 ^ U0;
98,095✔
111
   const uint32_t y5 = y1 ^ U6;
98,095✔
112
   const uint32_t y3 = y5 ^ y8;
98,095✔
113
   const uint32_t t1 = U4 ^ y12;
98,095✔
114
   const uint32_t y15 = t1 ^ U5;
98,095✔
115
   const uint32_t y20 = t1 ^ U1;
98,095✔
116
   const uint32_t y6 = y15 ^ U7;
98,095✔
117
   const uint32_t y10 = y15 ^ t0;
98,095✔
118
   const uint32_t y11 = y20 ^ y9;
98,095✔
119
   const uint32_t y7 = U7 ^ y11;
98,095✔
120
   const uint32_t y17 = y10 ^ y11;
98,095✔
121
   const uint32_t y19 = y10 ^ y8;
98,095✔
122
   const uint32_t y16 = t0 ^ y11;
98,095✔
123
   const uint32_t y21 = y13 ^ y16;
98,095✔
124
   const uint32_t y18 = U0 ^ y16;
98,095✔
125
   const uint32_t t2 = y12 & y15;
98,095✔
126
   const uint32_t t3 = y3 & y6;
98,095✔
127
   const uint32_t t4 = t3 ^ t2;
98,095✔
128
   const uint32_t t5 = y4 & U7;
98,095✔
129
   const uint32_t t6 = t5 ^ t2;
98,095✔
130
   const uint32_t t7 = y13 & y16;
98,095✔
131
   const uint32_t t8 = y5 & y1;
98,095✔
132
   const uint32_t t9 = t8 ^ t7;
98,095✔
133
   const uint32_t t10 = y2 & y7;
98,095✔
134
   const uint32_t t11 = t10 ^ t7;
98,095✔
135
   const uint32_t t12 = y9 & y11;
98,095✔
136
   const uint32_t t13 = y14 & y17;
98,095✔
137
   const uint32_t t14 = t13 ^ t12;
98,095✔
138
   const uint32_t t15 = y8 & y10;
98,095✔
139
   const uint32_t t16 = t15 ^ t12;
98,095✔
140
   const uint32_t t17 = t4 ^ y20;
98,095✔
141
   const uint32_t t18 = t6 ^ t16;
98,095✔
142
   const uint32_t t19 = t9 ^ t14;
98,095✔
143
   const uint32_t t20 = t11 ^ t16;
98,095✔
144
   const uint32_t t21 = t17 ^ t14;
98,095✔
145
   const uint32_t t22 = t18 ^ y19;
98,095✔
146
   const uint32_t t23 = t19 ^ y21;
98,095✔
147
   const uint32_t t24 = t20 ^ y18;
98,095✔
148
   const uint32_t t25 = t21 ^ t22;
98,095✔
149
   const uint32_t t26 = t21 & t23;
98,095✔
150
   const uint32_t t27 = t24 ^ t26;
98,095✔
151
   const uint32_t t28 = t25 & t27;
98,095✔
152
   const uint32_t t29 = t28 ^ t22;
98,095✔
153
   const uint32_t t30 = t23 ^ t24;
98,095✔
154
   const uint32_t t31 = t22 ^ t26;
98,095✔
155
   const uint32_t t32 = t31 & t30;
98,095✔
156
   const uint32_t t33 = t32 ^ t24;
98,095✔
157
   const uint32_t t34 = t23 ^ t33;
98,095✔
158
   const uint32_t t35 = t27 ^ t33;
98,095✔
159
   const uint32_t t36 = t24 & t35;
98,095✔
160
   const uint32_t t37 = t36 ^ t34;
98,095✔
161
   const uint32_t t38 = t27 ^ t36;
98,095✔
162
   const uint32_t t39 = t29 & t38;
98,095✔
163
   const uint32_t t40 = t25 ^ t39;
98,095✔
164
   const uint32_t t41 = t40 ^ t37;
98,095✔
165
   const uint32_t t42 = t29 ^ t33;
98,095✔
166
   const uint32_t t43 = t29 ^ t40;
98,095✔
167
   const uint32_t t44 = t33 ^ t37;
98,095✔
168
   const uint32_t t45 = t42 ^ t41;
98,095✔
169
   const uint32_t z0 = t44 & y15;
98,095✔
170
   const uint32_t z1 = t37 & y6;
98,095✔
171
   const uint32_t z2 = t33 & U7;
98,095✔
172
   const uint32_t z3 = t43 & y16;
98,095✔
173
   const uint32_t z4 = t40 & y1;
98,095✔
174
   const uint32_t z5 = t29 & y7;
98,095✔
175
   const uint32_t z6 = t42 & y11;
98,095✔
176
   const uint32_t z7 = t45 & y17;
98,095✔
177
   const uint32_t z8 = t41 & y10;
98,095✔
178
   const uint32_t z9 = t44 & y12;
98,095✔
179
   const uint32_t z10 = t37 & y3;
98,095✔
180
   const uint32_t z11 = t33 & y4;
98,095✔
181
   const uint32_t z12 = t43 & y13;
98,095✔
182
   const uint32_t z13 = t40 & y5;
98,095✔
183
   const uint32_t z14 = t29 & y2;
98,095✔
184
   const uint32_t z15 = t42 & y9;
98,095✔
185
   const uint32_t z16 = t45 & y14;
98,095✔
186
   const uint32_t z17 = t41 & y8;
98,095✔
187
   const uint32_t tc1 = z15 ^ z16;
98,095✔
188
   const uint32_t tc2 = z10 ^ tc1;
98,095✔
189
   const uint32_t tc3 = z9 ^ tc2;
98,095✔
190
   const uint32_t tc4 = z0 ^ z2;
98,095✔
191
   const uint32_t tc5 = z1 ^ z0;
98,095✔
192
   const uint32_t tc6 = z3 ^ z4;
98,095✔
193
   const uint32_t tc7 = z12 ^ tc4;
98,095✔
194
   const uint32_t tc8 = z7 ^ tc6;
98,095✔
195
   const uint32_t tc9 = z8 ^ tc7;
98,095✔
196
   const uint32_t tc10 = tc8 ^ tc9;
98,095✔
197
   const uint32_t tc11 = tc6 ^ tc5;
98,095✔
198
   const uint32_t tc12 = z3 ^ z5;
98,095✔
199
   const uint32_t tc13 = z13 ^ tc1;
98,095✔
200
   const uint32_t tc14 = tc4 ^ tc12;
98,095✔
201
   const uint32_t S3 = tc3 ^ tc11;
98,095✔
202
   const uint32_t tc16 = z6 ^ tc8;
98,095✔
203
   const uint32_t tc17 = z14 ^ tc10;
98,095✔
204
   const uint32_t tc18 = ~tc13 ^ tc14;
98,095✔
205
   const uint32_t S7 = z12 ^ tc18;
98,095✔
206
   const uint32_t tc20 = z15 ^ tc16;
98,095✔
207
   const uint32_t tc21 = tc2 ^ z11;
98,095✔
208
   const uint32_t S0 = tc3 ^ tc16;
98,095✔
209
   const uint32_t S6 = tc10 ^ tc18;
98,095✔
210
   const uint32_t S4 = tc14 ^ S3;
98,095✔
211
   const uint32_t S1 = ~(S3 ^ tc16);
98,095✔
212
   const uint32_t tc26 = tc17 ^ tc20;
98,095✔
213
   const uint32_t S2 = ~(tc26 ^ z17);
98,095✔
214
   const uint32_t S5 = tc21 ^ tc17;
98,095✔
215

216
   V[0] = S0;
98,095✔
217
   V[1] = S1;
98,095✔
218
   V[2] = S2;
98,095✔
219
   V[3] = S3;
98,095✔
220
   V[4] = S4;
98,095✔
221
   V[5] = S5;
98,095✔
222
   V[6] = S6;
98,095✔
223
   V[7] = S7;
98,095✔
224
}
98,095✔
225

226
/*
227
A circuit for inverse AES Sbox of size 121 and depth 21 from
228
http://www.cs.yale.edu/homes/peralta/CircuitStuff/CMT.html
229
http://www.cs.yale.edu/homes/peralta/CircuitStuff/Sinv.txt
230
*/
231
void AES_INV_SBOX(uint32_t V[8]) {
33,420✔
232
   const uint32_t U0 = V[0];
33,420✔
233
   const uint32_t U1 = V[1];
33,420✔
234
   const uint32_t U2 = V[2];
33,420✔
235
   const uint32_t U3 = V[3];
33,420✔
236
   const uint32_t U4 = V[4];
33,420✔
237
   const uint32_t U5 = V[5];
33,420✔
238
   const uint32_t U6 = V[6];
33,420✔
239
   const uint32_t U7 = V[7];
33,420✔
240

241
   const uint32_t Y0 = U0 ^ U3;
33,420✔
242
   const uint32_t Y2 = ~(U1 ^ U3);
33,420✔
243
   const uint32_t Y4 = U0 ^ Y2;
33,420✔
244
   const uint32_t RTL0 = U6 ^ U7;
33,420✔
245
   const uint32_t Y1 = Y2 ^ RTL0;
33,420✔
246
   const uint32_t Y7 = ~(U2 ^ Y1);
33,420✔
247
   const uint32_t RTL1 = U3 ^ U4;
33,420✔
248
   const uint32_t Y6 = ~(U7 ^ RTL1);
33,420✔
249
   const uint32_t Y3 = Y1 ^ RTL1;
33,420✔
250
   const uint32_t RTL2 = ~(U0 ^ U2);
33,420✔
251
   const uint32_t Y5 = U5 ^ RTL2;
33,420✔
252
   const uint32_t sa1 = Y0 ^ Y2;
33,420✔
253
   const uint32_t sa0 = Y1 ^ Y3;
33,420✔
254
   const uint32_t sb1 = Y4 ^ Y6;
33,420✔
255
   const uint32_t sb0 = Y5 ^ Y7;
33,420✔
256
   const uint32_t ah = Y0 ^ Y1;
33,420✔
257
   const uint32_t al = Y2 ^ Y3;
33,420✔
258
   const uint32_t aa = sa0 ^ sa1;
33,420✔
259
   const uint32_t bh = Y4 ^ Y5;
33,420✔
260
   const uint32_t bl = Y6 ^ Y7;
33,420✔
261
   const uint32_t bb = sb0 ^ sb1;
33,420✔
262
   const uint32_t ab20 = sa0 ^ sb0;
33,420✔
263
   const uint32_t ab22 = al ^ bl;
33,420✔
264
   const uint32_t ab23 = Y3 ^ Y7;
33,420✔
265
   const uint32_t ab21 = sa1 ^ sb1;
33,420✔
266
   const uint32_t abcd1 = ah & bh;
33,420✔
267
   const uint32_t rr1 = Y0 & Y4;
33,420✔
268
   const uint32_t ph11 = ab20 ^ abcd1;
33,420✔
269
   const uint32_t t01 = Y1 & Y5;
33,420✔
270
   const uint32_t ph01 = t01 ^ abcd1;
33,420✔
271
   const uint32_t abcd2 = al & bl;
33,420✔
272
   const uint32_t r1 = Y2 & Y6;
33,420✔
273
   const uint32_t pl11 = ab22 ^ abcd2;
33,420✔
274
   const uint32_t r2 = Y3 & Y7;
33,420✔
275
   const uint32_t pl01 = r2 ^ abcd2;
33,420✔
276
   const uint32_t r3 = sa0 & sb0;
33,420✔
277
   const uint32_t vr1 = aa & bb;
33,420✔
278
   const uint32_t pr1 = vr1 ^ r3;
33,420✔
279
   const uint32_t wr1 = sa1 & sb1;
33,420✔
280
   const uint32_t qr1 = wr1 ^ r3;
33,420✔
281
   const uint32_t ab0 = ph11 ^ rr1;
33,420✔
282
   const uint32_t ab1 = ph01 ^ ab21;
33,420✔
283
   const uint32_t ab2 = pl11 ^ r1;
33,420✔
284
   const uint32_t ab3 = pl01 ^ qr1;
33,420✔
285
   const uint32_t cp1 = ab0 ^ pr1;
33,420✔
286
   const uint32_t cp2 = ab1 ^ qr1;
33,420✔
287
   const uint32_t cp3 = ab2 ^ pr1;
33,420✔
288
   const uint32_t cp4 = ab3 ^ ab23;
33,420✔
289
   const uint32_t tinv1 = cp3 ^ cp4;
33,420✔
290
   const uint32_t tinv2 = cp3 & cp1;
33,420✔
291
   const uint32_t tinv3 = cp2 ^ tinv2;
33,420✔
292
   const uint32_t tinv4 = cp1 ^ cp2;
33,420✔
293
   const uint32_t tinv5 = cp4 ^ tinv2;
33,420✔
294
   const uint32_t tinv6 = tinv5 & tinv4;
33,420✔
295
   const uint32_t tinv7 = tinv3 & tinv1;
33,420✔
296
   const uint32_t d2 = cp4 ^ tinv7;
33,420✔
297
   const uint32_t d0 = cp2 ^ tinv6;
33,420✔
298
   const uint32_t tinv8 = cp1 & cp4;
33,420✔
299
   const uint32_t tinv9 = tinv4 & tinv8;
33,420✔
300
   const uint32_t tinv10 = tinv4 ^ tinv2;
33,420✔
301
   const uint32_t d1 = tinv9 ^ tinv10;
33,420✔
302
   const uint32_t tinv11 = cp2 & cp3;
33,420✔
303
   const uint32_t tinv12 = tinv1 & tinv11;
33,420✔
304
   const uint32_t tinv13 = tinv1 ^ tinv2;
33,420✔
305
   const uint32_t d3 = tinv12 ^ tinv13;
33,420✔
306
   const uint32_t sd1 = d1 ^ d3;
33,420✔
307
   const uint32_t sd0 = d0 ^ d2;
33,420✔
308
   const uint32_t dl = d0 ^ d1;
33,420✔
309
   const uint32_t dh = d2 ^ d3;
33,420✔
310
   const uint32_t dd = sd0 ^ sd1;
33,420✔
311
   const uint32_t abcd3 = dh & bh;
33,420✔
312
   const uint32_t rr2 = d3 & Y4;
33,420✔
313
   const uint32_t t02 = d2 & Y5;
33,420✔
314
   const uint32_t abcd4 = dl & bl;
33,420✔
315
   const uint32_t r4 = d1 & Y6;
33,420✔
316
   const uint32_t r5 = d0 & Y7;
33,420✔
317
   const uint32_t r6 = sd0 & sb0;
33,420✔
318
   const uint32_t vr2 = dd & bb;
33,420✔
319
   const uint32_t wr2 = sd1 & sb1;
33,420✔
320
   const uint32_t abcd5 = dh & ah;
33,420✔
321
   const uint32_t r7 = d3 & Y0;
33,420✔
322
   const uint32_t r8 = d2 & Y1;
33,420✔
323
   const uint32_t abcd6 = dl & al;
33,420✔
324
   const uint32_t r9 = d1 & Y2;
33,420✔
325
   const uint32_t r10 = d0 & Y3;
33,420✔
326
   const uint32_t r11 = sd0 & sa0;
33,420✔
327
   const uint32_t vr3 = dd & aa;
33,420✔
328
   const uint32_t wr3 = sd1 & sa1;
33,420✔
329
   const uint32_t ph12 = rr2 ^ abcd3;
33,420✔
330
   const uint32_t ph02 = t02 ^ abcd3;
33,420✔
331
   const uint32_t pl12 = r4 ^ abcd4;
33,420✔
332
   const uint32_t pl02 = r5 ^ abcd4;
33,420✔
333
   const uint32_t pr2 = vr2 ^ r6;
33,420✔
334
   const uint32_t qr2 = wr2 ^ r6;
33,420✔
335
   const uint32_t p0 = ph12 ^ pr2;
33,420✔
336
   const uint32_t p1 = ph02 ^ qr2;
33,420✔
337
   const uint32_t p2 = pl12 ^ pr2;
33,420✔
338
   const uint32_t p3 = pl02 ^ qr2;
33,420✔
339
   const uint32_t ph13 = r7 ^ abcd5;
33,420✔
340
   const uint32_t ph03 = r8 ^ abcd5;
33,420✔
341
   const uint32_t pl13 = r9 ^ abcd6;
33,420✔
342
   const uint32_t pl03 = r10 ^ abcd6;
33,420✔
343
   const uint32_t pr3 = vr3 ^ r11;
33,420✔
344
   const uint32_t qr3 = wr3 ^ r11;
33,420✔
345
   const uint32_t p4 = ph13 ^ pr3;
33,420✔
346
   const uint32_t S7 = ph03 ^ qr3;
33,420✔
347
   const uint32_t p6 = pl13 ^ pr3;
33,420✔
348
   const uint32_t p7 = pl03 ^ qr3;
33,420✔
349
   const uint32_t S3 = p1 ^ p6;
33,420✔
350
   const uint32_t S6 = p2 ^ p6;
33,420✔
351
   const uint32_t S0 = p3 ^ p6;
33,420✔
352
   const uint32_t X11 = p0 ^ p2;
33,420✔
353
   const uint32_t S5 = S0 ^ X11;
33,420✔
354
   const uint32_t X13 = p4 ^ p7;
33,420✔
355
   const uint32_t X14 = X11 ^ X13;
33,420✔
356
   const uint32_t S1 = S3 ^ X14;
33,420✔
357
   const uint32_t X16 = p1 ^ S7;
33,420✔
358
   const uint32_t S2 = X14 ^ X16;
33,420✔
359
   const uint32_t X18 = p0 ^ p4;
33,420✔
360
   const uint32_t X19 = S5 ^ X16;
33,420✔
361
   const uint32_t S4 = X18 ^ X19;
33,420✔
362

363
   V[0] = S0;
33,420✔
364
   V[1] = S1;
33,420✔
365
   V[2] = S2;
33,420✔
366
   V[3] = S3;
33,420✔
367
   V[4] = S4;
33,420✔
368
   V[5] = S5;
33,420✔
369
   V[6] = S6;
33,420✔
370
   V[7] = S7;
33,420✔
371
}
33,420✔
372

373
inline void bit_transpose(uint32_t B[8]) {
13,668✔
374
   swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
13,668✔
375
   swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
13,668✔
376
   swap_bits<uint32_t>(B[5], B[4], 0x55555555, 1);
13,668✔
377
   swap_bits<uint32_t>(B[7], B[6], 0x55555555, 1);
13,668✔
378

379
   swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
13,668✔
380
   swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
13,668✔
381
   swap_bits<uint32_t>(B[6], B[4], 0x33333333, 2);
13,668✔
382
   swap_bits<uint32_t>(B[7], B[5], 0x33333333, 2);
13,668✔
383

384
   swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
13,668✔
385
   swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
13,668✔
386
   swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
13,668✔
387
   swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
13,668✔
388
}
13,668✔
389

390
inline void ks_expand(uint32_t B[8], const uint32_t K[], size_t r) {
75,560✔
391
   /*
392
   This is bit_transpose of K[r..r+4] || K[r..r+4], we can save some computation
393
   due to knowing the first and second halves are the same data.
394
   */
395
   for(size_t i = 0; i != 4; ++i) {
377,800✔
396
      B[i] = K[r + i];
302,240✔
397
   }
398

399
   swap_bits<uint32_t>(B[1], B[0], 0x55555555, 1);
75,560✔
400
   swap_bits<uint32_t>(B[3], B[2], 0x55555555, 1);
75,560✔
401

402
   swap_bits<uint32_t>(B[2], B[0], 0x33333333, 2);
75,560✔
403
   swap_bits<uint32_t>(B[3], B[1], 0x33333333, 2);
75,560✔
404

405
   B[4] = B[0];
75,560✔
406
   B[5] = B[1];
75,560✔
407
   B[6] = B[2];
75,560✔
408
   B[7] = B[3];
75,560✔
409

410
   swap_bits<uint32_t>(B[4], B[0], 0x0F0F0F0F, 4);
75,560✔
411
   swap_bits<uint32_t>(B[5], B[1], 0x0F0F0F0F, 4);
75,560✔
412
   swap_bits<uint32_t>(B[6], B[2], 0x0F0F0F0F, 4);
75,560✔
413
   swap_bits<uint32_t>(B[7], B[3], 0x0F0F0F0F, 4);
75,560✔
414
}
75,560✔
415

416
inline void shift_rows(uint32_t B[8]) {
49,882✔
417
   // 3 0 1 2 7 4 5 6 10 11 8 9 14 15 12 13 17 18 19 16 21 22 23 20 24 25 26 27 28 29 30 31
418
   if constexpr(HasNative64BitRegisters) {
49,882✔
419
      for(size_t i = 0; i != 8; i += 2) {
249,410✔
420
         uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1];
199,528✔
421
         x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
199,528✔
422
         x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
199,528✔
423
         B[i] = static_cast<uint32_t>(x >> 32);
199,528✔
424
         B[i + 1] = static_cast<uint32_t>(x);
199,528✔
425
      }
426
   } else {
427
      for(size_t i = 0; i != 8; ++i) {
428
         uint32_t x = B[i];
429
         x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
430
         x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
431
         B[i] = x;
432
      }
433
   }
434
}
49,882✔
435

436
inline void inv_shift_rows(uint32_t B[8]) {
33,420✔
437
   // Inverse of shift_rows, just inverting the steps
438

439
   if constexpr(HasNative64BitRegisters) {
33,420✔
440
      for(size_t i = 0; i != 8; i += 2) {
167,100✔
441
         uint64_t x = (static_cast<uint64_t>(B[i]) << 32) | B[i + 1];
133,680✔
442
         x = bit_permute_step<uint64_t>(x, 0x0055005500550055, 1);
133,680✔
443
         x = bit_permute_step<uint64_t>(x, 0x0022331100223311, 2);
133,680✔
444
         B[i] = static_cast<uint32_t>(x >> 32);
133,680✔
445
         B[i + 1] = static_cast<uint32_t>(x);
133,680✔
446
      }
447
   } else {
448
      for(size_t i = 0; i != 8; ++i) {
449
         uint32_t x = B[i];
450
         x = bit_permute_step<uint32_t>(x, 0x00550055, 1);
451
         x = bit_permute_step<uint32_t>(x, 0x00223311, 2);
452
         B[i] = x;
453
      }
454
   }
455
}
33,420✔
456

457
inline void mix_columns(uint32_t B[8]) {
76,468✔
458
   // carry high bits in B[0] to positions in 0x1b == 0b11011
459
   const uint32_t X2[8] = {
76,468✔
460
      B[1],
461
      B[2],
462
      B[3],
463
      B[4] ^ B[0],
76,468✔
464
      B[5] ^ B[0],
76,468✔
465
      B[6],
466
      B[7] ^ B[0],
76,468✔
467
      B[0],
468
   };
76,468✔
469

470
   for(size_t i = 0; i != 8; i++) {
688,212✔
471
      const uint32_t X3 = B[i] ^ X2[i];
611,744✔
472
      B[i] = X2[i] ^ rotr<8>(B[i]) ^ rotr<16>(B[i]) ^ rotr<24>(X3);
611,744✔
473
   }
474
}
76,468✔
475

476
void inv_mix_columns(uint32_t B[8]) {
30,678✔
477
   /*
478
   OpenSSL's bsaes implementation credits Jussi Kivilinna with the lovely
479
   matrix decomposition
480

481
   | 0e 0b 0d 09 |   | 02 03 01 01 |   | 05 00 04 00 |
482
   | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
483
   | 0d 09 0e 0b |   | 01 01 02 03 |   | 04 00 05 00 |
484
   | 0b 0d 09 0e |   | 03 01 01 02 |   | 00 04 00 05 |
485

486
   Notice the first component is simply the MixColumns matrix. So we can
487
   multiply first by (05,00,04,00) then perform MixColumns to get the equivalent
488
   of InvMixColumn.
489
   */
490
   const uint32_t X4[8] = {
30,678✔
491
      B[2],
492
      B[3],
493
      B[4] ^ B[0],
30,678✔
494
      B[5] ^ B[0] ^ B[1],
30,678✔
495
      B[6] ^ B[1],
30,678✔
496
      B[7] ^ B[0],
30,678✔
497
      B[0] ^ B[1],
30,678✔
498
      B[1],
499
   };
30,678✔
500

501
   for(size_t i = 0; i != 8; i++) {
276,102✔
502
      const uint32_t X5 = X4[i] ^ B[i];
245,424✔
503
      B[i] = X5 ^ rotr<16>(X4[i]);
245,424✔
504
   }
505

506
   mix_columns(B);
30,678✔
507
}
30,678✔
508

509
/*
510
* AES Encryption
511
*/
512
void aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& EK) {
4,050✔
513
   BOTAN_ASSERT(EK.size() == 44 || EK.size() == 52 || EK.size() == 60, "Key was set");
4,050✔
514

515
   const size_t rounds = (EK.size() - 4) / 4;
4,050✔
516

517
   uint32_t KS[13 * 8] = {0};  // actual maximum is (rounds - 1) * 8
4,050✔
518
   for(size_t i = 0; i < rounds - 1; i += 1) {
49,386✔
519
      ks_expand(&KS[8 * i], EK.data(), 4 * i + 4);
45,336✔
520
   }
521

522
   const size_t BLOCK_SIZE = 16;
4,050✔
523
   const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE;
4,050✔
524

525
   while(blocks > 0) {
8,142✔
526
      const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
4,092✔
527

528
      uint32_t B[8] = {0};
4,092✔
529

530
      load_be(B, in, this_loop * 4);
4,092✔
531

532
      CT::poison(B, 8);
533

534
      for(size_t i = 0; i != 8; ++i) {
36,828✔
535
         B[i] ^= EK[i % 4];
32,736✔
536
      }
537

538
      bit_transpose(B);
4,092✔
539

540
      for(size_t r = 0; r != rounds - 1; ++r) {
49,882✔
541
         AES_SBOX(B);
45,790✔
542
         shift_rows(B);
45,790✔
543
         mix_columns(B);
45,790✔
544

545
         for(size_t i = 0; i != 8; ++i) {
412,110✔
546
            B[i] ^= KS[8 * r + i];
366,320✔
547
         }
548
      }
549

550
      // Final round:
551
      AES_SBOX(B);
4,092✔
552
      shift_rows(B);
4,092✔
553
      bit_transpose(B);
4,092✔
554

555
      for(size_t i = 0; i != 8; ++i) {
36,828✔
556
         B[i] ^= EK[4 * rounds + i % 4];
32,736✔
557
      }
558

559
      CT::unpoison(B, 8);
4,092✔
560

561
      copy_out_be(std::span(out, this_loop * 4 * sizeof(uint32_t)), B);
4,092✔
562

563
      in += this_loop * BLOCK_SIZE;
4,092✔
564
      out += this_loop * BLOCK_SIZE;
4,092✔
565
      blocks -= this_loop;
4,092✔
566
   }
567
}
4,050✔
568

569
/*
570
* AES Decryption
571
*/
572
void aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, const secure_vector<uint32_t>& DK) {
2,700✔
573
   BOTAN_ASSERT(DK.size() == 44 || DK.size() == 52 || DK.size() == 60, "Key was set");
2,700✔
574

575
   const size_t rounds = (DK.size() - 4) / 4;
2,700✔
576

577
   uint32_t KS[13 * 8] = {0};  // actual maximum is (rounds - 1) * 8
2,700✔
578
   for(size_t i = 0; i < rounds - 1; i += 1) {
32,924✔
579
      ks_expand(&KS[8 * i], DK.data(), 4 * i + 4);
30,224✔
580
   }
581

582
   const size_t BLOCK_SIZE = 16;
2,700✔
583
   const size_t BITSLICED_BLOCKS = 8 * sizeof(uint32_t) / BLOCK_SIZE;
2,700✔
584

585
   while(blocks > 0) {
5,442✔
586
      const size_t this_loop = std::min(blocks, BITSLICED_BLOCKS);
2,742✔
587

588
      uint32_t B[8] = {0};
2,742✔
589

590
      CT::poison(B, 8);
2,742✔
591

592
      load_be(B, in, this_loop * 4);
2,742✔
593

594
      for(size_t i = 0; i != 8; ++i) {
24,678✔
595
         B[i] ^= DK[i % 4];
21,936✔
596
      }
597

598
      bit_transpose(B);
2,742✔
599

600
      for(size_t r = 0; r != rounds - 1; ++r) {
33,420✔
601
         AES_INV_SBOX(B);
30,678✔
602
         inv_shift_rows(B);
30,678✔
603
         inv_mix_columns(B);
30,678✔
604

605
         for(size_t i = 0; i != 8; ++i) {
276,102✔
606
            B[i] ^= KS[8 * r + i];
245,424✔
607
         }
608
      }
609

610
      // Final round:
611
      AES_INV_SBOX(B);
2,742✔
612
      inv_shift_rows(B);
2,742✔
613
      bit_transpose(B);
2,742✔
614

615
      for(size_t i = 0; i != 8; ++i) {
24,678✔
616
         B[i] ^= DK[4 * rounds + i % 4];
21,936✔
617
      }
618

619
      CT::unpoison(B, 8);
2,742✔
620

621
      copy_out_be(std::span(out, this_loop * 4 * sizeof(uint32_t)), B);
2,742✔
622

623
      in += this_loop * BLOCK_SIZE;
2,742✔
624
      out += this_loop * BLOCK_SIZE;
2,742✔
625
      blocks -= this_loop;
2,742✔
626
   }
627
}
2,700✔
628

629
inline uint32_t xtime32(uint32_t s) {
204,924✔
630
   const uint32_t lo_bit = 0x01010101;
204,924✔
631
   const uint32_t mask = 0x7F7F7F7F;
204,924✔
632
   const uint32_t poly = 0x1B;
204,924✔
633

634
   return ((s & mask) << 1) ^ (((s >> 7) & lo_bit) * poly);
204,924✔
635
}
636

637
inline uint32_t InvMixColumn(uint32_t s1) {
204,924✔
638
   const uint32_t s2 = xtime32(s1);
204,924✔
639
   const uint32_t s4 = xtime32(s2);
204,924✔
640
   const uint32_t s8 = xtime32(s4);
204,924✔
641
   const uint32_t s9 = s8 ^ s1;
204,924✔
642
   const uint32_t s11 = s9 ^ s2;
204,924✔
643
   const uint32_t s13 = s9 ^ s4;
204,924✔
644
   const uint32_t s14 = s8 ^ s4 ^ s2;
204,924✔
645

646
   return s14 ^ rotr<8>(s9) ^ rotr<16>(s13) ^ rotr<24>(s11);
204,924✔
647
}
648

649
void InvMixColumn_x4(uint32_t x[4]) {
51,231✔
650
   x[0] = InvMixColumn(x[0]);
51,231✔
651
   x[1] = InvMixColumn(x[1]);
51,231✔
652
   x[2] = InvMixColumn(x[2]);
51,231✔
653
   x[3] = InvMixColumn(x[3]);
51,231✔
654
}
51,231✔
655

656
uint32_t SE_word(uint32_t x) {
48,213✔
657
   uint32_t I[8] = {0};
48,213✔
658

659
   for(size_t i = 0; i != 8; ++i) {
433,917✔
660
      I[i] = (x >> (7 - i)) & 0x01010101;
385,704✔
661
   }
662

663
   AES_SBOX(I);
48,213✔
664

665
   x = 0;
48,213✔
666

667
   for(size_t i = 0; i != 8; ++i) {
433,917✔
668
      x |= ((I[i] & 0x01010101) << (7 - i));
385,704✔
669
   }
670

671
   return x;
48,213✔
672
}
673

674
void aes_key_schedule(const uint8_t key[],
4,605✔
675
                      size_t length,
676
                      secure_vector<uint32_t>& EK,
677
                      secure_vector<uint32_t>& DK,
678
                      bool bswap_keys = false) {
679
   static const uint32_t RC[10] = {0x01000000,
4,605✔
680
                                   0x02000000,
681
                                   0x04000000,
682
                                   0x08000000,
683
                                   0x10000000,
684
                                   0x20000000,
685
                                   0x40000000,
686
                                   0x80000000,
687
                                   0x1B000000,
688
                                   0x36000000};
689

690
   const size_t X = length / 4;
4,605✔
691

692
   // Can't happen, but make static analyzers happy
693
   BOTAN_ASSERT_NOMSG(X == 4 || X == 6 || X == 8);
4,605✔
694

695
   const size_t rounds = (length / 4) + 6;
4,605✔
696

697
   // Help the optimizer
698
   BOTAN_ASSERT_NOMSG(rounds == 10 || rounds == 12 || rounds == 14);
4,605✔
699

700
   CT::poison(key, length);
4,605✔
701

702
   EK.resize(length + 28);
4,605✔
703
   DK.resize(length + 28);
4,605✔
704

705
   for(size_t i = 0; i != X; ++i) {
32,811✔
706
      EK[i] = load_be<uint32_t>(key, i);
28,206✔
707
   }
708

709
   for(size_t i = X; i < 4 * (rounds + 1); i += X) {
42,576✔
710
      EK[i] = EK[i - X] ^ RC[(i - X) / X] ^ rotl<8>(SE_word(EK[i - 1]));
37,971✔
711

712
      for(size_t j = 1; j != X && (i + j) < EK.size(); ++j) {
213,558✔
713
         EK[i + j] = EK[i + j - X];
175,587✔
714

715
         if(X == 8 && j == 4) {
175,587✔
716
            EK[i + j] ^= SE_word(EK[i + j - 1]);
10,242✔
717
         } else {
718
            EK[i + j] ^= EK[i + j - 1];
165,345✔
719
         }
720
      }
721
   }
722

723
   for(size_t i = 0; i != 4 * (rounds + 1); i += 4) {
65,046✔
724
      DK[i] = EK[4 * rounds - i];
60,441✔
725
      DK[i + 1] = EK[4 * rounds - i + 1];
60,441✔
726
      DK[i + 2] = EK[4 * rounds - i + 2];
60,441✔
727
      DK[i + 3] = EK[4 * rounds - i + 3];
60,441✔
728
   }
729

730
   for(size_t i = 4; i != 4 * rounds; i += 4) {
55,836✔
731
      InvMixColumn_x4(&DK[i]);
51,231✔
732
   }
733

734
   if(bswap_keys) {
4,605✔
735
      // HW AES on little endian needs the subkeys to be byte reversed
736
      for(size_t i = 0; i != EK.size(); ++i) {
28,575✔
737
         EK[i] = reverse_bytes(EK[i]);
28,020✔
738
      }
739
      for(size_t i = 0; i != DK.size(); ++i) {
28,575✔
740
         DK[i] = reverse_bytes(DK[i]);
28,020✔
741
      }
742
   }
743

744
   CT::unpoison(EK.data(), EK.size());
4,605✔
745
   CT::unpoison(DK.data(), DK.size());
4,605✔
746
   CT::unpoison(key, length);
4,605✔
747
}
4,605✔
748

749
size_t aes_parallelism() {
37,370✔
750
#if defined(BOTAN_HAS_AES_VAES)
751
   if(CPUID::has_avx2_vaes()) {
37,370✔
752
      return 8;  // pipelined
753
   }
754
#endif
755

756
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
757
   if(CPUID::has_hw_aes()) {
12,150✔
758
      return 4;  // pipelined
759
   }
760
#endif
761

762
#if defined(BOTAN_HAS_AES_VPERM)
763
   if(CPUID::has_vperm()) {
8,100✔
764
      return 2;  // pipelined
765
   }
766
#endif
767

768
   // bitsliced:
769
   return 2;
770
}
771

772
const char* aes_provider() {
5,404✔
773
#if defined(BOTAN_HAS_AES_VAES)
774
   if(CPUID::has_avx2_vaes()) {
5,404✔
775
      return "vaes";
776
   }
777
#endif
778

779
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
780
   if(CPUID::has_hw_aes()) {
4,050✔
781
      return "cpu";
782
   }
783
#endif
784

785
#if defined(BOTAN_HAS_AES_VPERM)
786
   if(CPUID::has_vperm()) {
2,700✔
787
      return "vperm";
1,350✔
788
   }
789
#endif
790

791
   return "base";
792
}
793

794
}  // namespace
795

796
std::string AES_128::provider() const {
1,540✔
797
   return aes_provider();
1,540✔
798
}
799

800
std::string AES_192::provider() const {
1,804✔
801
   return aes_provider();
1,804✔
802
}
803

804
std::string AES_256::provider() const {
2,060✔
805
   return aes_provider();
2,060✔
806
}
807

808
size_t AES_128::parallelism() const {
14,409✔
809
   return aes_parallelism();
14,409✔
810
}
811

812
size_t AES_192::parallelism() const {
6,202✔
813
   return aes_parallelism();
6,202✔
814
}
815

816
size_t AES_256::parallelism() const {
16,759✔
817
   return aes_parallelism();
16,759✔
818
}
819

820
bool AES_128::has_keying_material() const {
843,393✔
821
   return !m_EK.empty();
843,393✔
822
}
823

824
bool AES_192::has_keying_material() const {
62,124✔
825
   return !m_EK.empty();
62,124✔
826
}
827

828
bool AES_256::has_keying_material() const {
21,282,949✔
829
   return !m_EK.empty();
21,282,949✔
830
}
831

832
void AES_128::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
737,977✔
833
   assert_key_material_set();
737,977✔
834

835
#if defined(BOTAN_HAS_AES_VAES)
836
   if(CPUID::has_avx2_vaes()) {
732,274✔
837
      return x86_vaes_encrypt_n(in, out, blocks);
728,818✔
838
   }
839
#endif
840

841
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
842
   if(CPUID::has_hw_aes()) {
3,456✔
843
      return hw_aes_encrypt_n(in, out, blocks);
1,152✔
844
   }
845
#endif
846

847
#if defined(BOTAN_HAS_AES_VPERM)
848
   if(CPUID::has_vperm()) {
2,304✔
849
      return vperm_encrypt_n(in, out, blocks);
1,152✔
850
   }
851
#endif
852

853
   aes_encrypt_n(in, out, blocks, m_EK);
1,152✔
854
}
855

856
void AES_128::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
39,344✔
857
   assert_key_material_set();
39,344✔
858

859
#if defined(BOTAN_HAS_AES_VAES)
860
   if(CPUID::has_avx2_vaes()) {
36,271✔
861
      return x86_vaes_decrypt_n(in, out, blocks);
33,967✔
862
   }
863
#endif
864

865
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
866
   if(CPUID::has_hw_aes()) {
2,304✔
867
      return hw_aes_decrypt_n(in, out, blocks);
768✔
868
   }
869
#endif
870

871
#if defined(BOTAN_HAS_AES_VPERM)
872
   if(CPUID::has_vperm()) {
1,536✔
873
      return vperm_decrypt_n(in, out, blocks);
768✔
874
   }
875
#endif
876

877
   aes_decrypt_n(in, out, blocks, m_DK);
768✔
878
}
879

880
void AES_128::key_schedule(std::span<const uint8_t> key) {
13,526✔
881
#if defined(BOTAN_HAS_AES_NI)
882
   if(CPUID::has_aes_ni()) {
13,526✔
883
      return aesni_key_schedule(key.data(), key.size());
10,955✔
884
   }
885
#endif
886

887
#if defined(BOTAN_HAS_AES_VAES)
888
   if(CPUID::has_avx2_vaes()) {
2,571✔
889
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, true);
267✔
890
   }
891
#endif
892

893
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
894
   if(CPUID::has_hw_aes()) {
2,304✔
895
      constexpr bool is_little_endian = std::endian::native == std::endian::little;
×
896
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, is_little_endian);
×
897
   }
898
#endif
899

900
#if defined(BOTAN_HAS_AES_VPERM)
901
   if(CPUID::has_vperm()) {
2,304✔
902
      return vperm_key_schedule(key.data(), key.size());
1,152✔
903
   }
904
#endif
905

906
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,152✔
907
}
908

909
void AES_128::clear() {
7,882✔
910
   zap(m_EK);
7,882✔
911
   zap(m_DK);
7,882✔
912
}
7,882✔
913

914
void AES_192::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
31,709✔
915
   assert_key_material_set();
31,709✔
916

917
#if defined(BOTAN_HAS_AES_VAES)
918
   if(CPUID::has_avx2_vaes()) {
28,049✔
919
      return x86_vaes_encrypt_n(in, out, blocks);
23,990✔
920
   }
921
#endif
922

923
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
924
   if(CPUID::has_hw_aes()) {
4,059✔
925
      return hw_aes_encrypt_n(in, out, blocks);
1,353✔
926
   }
927
#endif
928

929
#if defined(BOTAN_HAS_AES_VPERM)
930
   if(CPUID::has_vperm()) {
2,706✔
931
      return vperm_encrypt_n(in, out, blocks);
1,353✔
932
   }
933
#endif
934

935
   aes_encrypt_n(in, out, blocks, m_EK);
1,353✔
936
}
937

938
void AES_192::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
7,978✔
939
   assert_key_material_set();
7,978✔
940

941
#if defined(BOTAN_HAS_AES_VAES)
942
   if(CPUID::has_avx2_vaes()) {
4,370✔
943
      return x86_vaes_decrypt_n(in, out, blocks);
1,664✔
944
   }
945
#endif
946

947
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
948
   if(CPUID::has_hw_aes()) {
2,706✔
949
      return hw_aes_decrypt_n(in, out, blocks);
902✔
950
   }
951
#endif
952

953
#if defined(BOTAN_HAS_AES_VPERM)
954
   if(CPUID::has_vperm()) {
1,804✔
955
      return vperm_decrypt_n(in, out, blocks);
902✔
956
   }
957
#endif
958

959
   aes_decrypt_n(in, out, blocks, m_DK);
902✔
960
}
961

962
void AES_192::key_schedule(std::span<const uint8_t> key) {
6,001✔
963
#if defined(BOTAN_HAS_AES_NI)
964
   if(CPUID::has_aes_ni()) {
6,001✔
965
      return aesni_key_schedule(key.data(), key.size());
3,169✔
966
   }
967
#endif
968

969
#if defined(BOTAN_HAS_AES_VAES)
970
   if(CPUID::has_avx2_vaes()) {
2,832✔
971
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, true);
126✔
972
   }
973
#endif
974

975
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
976
   if(CPUID::has_hw_aes()) {
2,706✔
977
      constexpr bool is_little_endian = std::endian::native == std::endian::little;
×
978
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, is_little_endian);
×
979
   }
980
#endif
981

982
#if defined(BOTAN_HAS_AES_VPERM)
983
   if(CPUID::has_vperm()) {
2,706✔
984
      return vperm_key_schedule(key.data(), key.size());
1,353✔
985
   }
986
#endif
987

988
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,353✔
989
}
990

991
void AES_192::clear() {
4,078✔
992
   zap(m_EK);
4,078✔
993
   zap(m_DK);
4,078✔
994
}
4,078✔
995

996
void AES_256::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
423,886✔
997
   assert_key_material_set();
423,886✔
998

999
#if defined(BOTAN_HAS_AES_VAES)
1000
   if(CPUID::has_avx2_vaes()) {
419,705✔
1001
      return x86_vaes_encrypt_n(in, out, blocks);
415,070✔
1002
   }
1003
#endif
1004

1005
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1006
   if(CPUID::has_hw_aes()) {
4,635✔
1007
      return hw_aes_encrypt_n(in, out, blocks);
1,545✔
1008
   }
1009
#endif
1010

1011
#if defined(BOTAN_HAS_AES_VPERM)
1012
   if(CPUID::has_vperm()) {
3,090✔
1013
      return vperm_encrypt_n(in, out, blocks);
1,545✔
1014
   }
1015
#endif
1016

1017
   aes_encrypt_n(in, out, blocks, m_EK);
1,545✔
1018
}
1019

1020
void AES_256::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
15,589✔
1021
   assert_key_material_set();
15,589✔
1022

1023
#if defined(BOTAN_HAS_AES_VAES)
1024
   if(CPUID::has_avx2_vaes()) {
11,465✔
1025
      return x86_vaes_decrypt_n(in, out, blocks);
8,375✔
1026
   }
1027
#endif
1028

1029
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1030
   if(CPUID::has_hw_aes()) {
3,090✔
1031
      return hw_aes_decrypt_n(in, out, blocks);
1,030✔
1032
   }
1033
#endif
1034

1035
#if defined(BOTAN_HAS_AES_VPERM)
1036
   if(CPUID::has_vperm()) {
2,060✔
1037
      return vperm_decrypt_n(in, out, blocks);
1,030✔
1038
   }
1039
#endif
1040

1041
   aes_decrypt_n(in, out, blocks, m_DK);
1,030✔
1042
}
1043

1044
void AES_256::key_schedule(std::span<const uint8_t> key) {
114,456✔
1045
#if defined(BOTAN_HAS_AES_NI)
1046
   if(CPUID::has_aes_ni()) {
114,456✔
1047
      return aesni_key_schedule(key.data(), key.size());
111,204✔
1048
   }
1049
#endif
1050

1051
#if defined(BOTAN_HAS_AES_VAES)
1052
   if(CPUID::has_avx2_vaes()) {
3,252✔
1053
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, true);
162✔
1054
   }
1055
#endif
1056

1057
#if defined(BOTAN_HAS_HW_AES_SUPPORT)
1058
   if(CPUID::has_hw_aes()) {
3,090✔
1059
      constexpr bool is_little_endian = std::endian::native == std::endian::little;
×
1060
      return aes_key_schedule(key.data(), key.size(), m_EK, m_DK, is_little_endian);
×
1061
   }
1062
#endif
1063

1064
#if defined(BOTAN_HAS_AES_VPERM)
1065
   if(CPUID::has_vperm()) {
3,090✔
1066
      return vperm_key_schedule(key.data(), key.size());
1,545✔
1067
   }
1068
#endif
1069

1070
   aes_key_schedule(key.data(), key.size(), m_EK, m_DK);
1,545✔
1071
}
1072

1073
void AES_256::clear() {
99,655✔
1074
   zap(m_EK);
99,655✔
1075
   zap(m_DK);
99,655✔
1076
}
99,655✔
1077

1078
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc