• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 11494611882

24 Oct 2024 07:27AM UTC coverage: 91.139% (-0.001%) from 91.14%
11494611882

Pull #4402

github

web-flow
Merge 16ac6de21 into b09c72b63
Pull Request #4402: Fix CPUID detection when looking at multiple bits

91057 of 99910 relevant lines covered (91.14%)

9359946.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.77
/src/lib/utils/cpuid/cpuid_x86.cpp
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7

8
#include <botan/internal/cpuid.h>
9

10
#include <botan/mem_ops.h>
11
#include <botan/internal/loadstor.h>
12

13
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
   #include <immintrin.h>
15
#endif
16

17
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18
   #include <intrin.h>
19
#endif
20

21
namespace Botan {
22

23
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
24

25
namespace {
26

27
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
26,046✔
28
   clear_mem(out, 4);
52,092✔
29

30
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
31
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
26,046✔
32

33
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
34
   __cpuid((int*)out, type);
35

36
   #else
37
   BOTAN_UNUSED(type);
38
      #warning "No way of calling x86 cpuid instruction for this compiler"
39
   #endif
40
}
41

42
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
26,046✔
43
   clear_mem(out, 4);
52,092✔
44

45
   #if defined(BOTAN_USE_GCC_INLINE_ASM)
46
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
26,046✔
47

48
   #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
49
   __cpuidex((int*)out, type, level);
50

51
   #else
52
   BOTAN_UNUSED(type, level);
53
      #warning "No way of calling x86 cpuid instruction for this compiler"
54
   #endif
55
}
56

57
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
13,022✔
58
   return _xgetbv(0);
13,022✔
59
}
60

61
}  // namespace
62

63
uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
13,023✔
64
   enum class x86_CPUID_1_bits : uint64_t {
13,023✔
65
      RDTSC = (1ULL << 4),
66
      SSE2 = (1ULL << 26),
67
      CLMUL = (1ULL << 33),
68
      SSSE3 = (1ULL << 41),
69
      SSE41 = (1ULL << 51),
70
      AESNI = (1ULL << 57),
71
      // AVX + OSXSAVE
72
      OSXSAVE = (1ULL << 59) | (1ULL << 60),
73
      RDRAND = (1ULL << 62)
74
   };
75

76
   enum class x86_CPUID_7_bits : uint64_t {
13,023✔
77
      BMI1 = (1ULL << 3),
78
      AVX2 = (1ULL << 5),
79
      BMI2 = (1ULL << 8),
80
      BMI_1_AND_2 = BMI1 | BMI2,
81
      AVX512_F = (1ULL << 16),
82
      AVX512_DQ = (1ULL << 17),
83
      RDSEED = (1ULL << 18),
84
      ADX = (1ULL << 19),
85
      AVX512_IFMA = (1ULL << 21),
86
      SHA = (1ULL << 29),
87
      AVX512_BW = (1ULL << 30),
88
      AVX512_VL = (1ULL << 31),
89
      AVX512_VBMI = (1ULL << 33),
90
      AVX512_VBMI2 = (1ULL << 38),
91
      GFNI = (1ULL << 40),
92
      AVX512_VAES = (1ULL << 41),
93
      AVX512_VCLMUL = (1ULL << 42),
94
      AVX512_VBITALG = (1ULL << 44),
95

96
      /*
97
      We only enable AVX512 support if all of the below flags are available
98

99
      This is more than we strictly need for most uses, however it also has
100
      the effect of preventing execution of AVX512 codepaths on cores that
101
      have serious downclocking problems when AVX512 code executes,
102
      especially Intel Skylake.
103

104
      VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
105
      Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
106
      executing AVX512.
107

108
      There is nothing stopping some future processor from supporting the
109
      above flags and having AVX512 penalties, but maybe you should not have
110
      bought such a processor.
111
      */
112
      AVX512_PROFILE =
113
         AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
114
   };
115

116
   // NOLINTNEXTLINE(performance-enum-size)
117
   enum class x86_CPUID_7_1_bits : uint64_t {
13,023✔
118
      SHA512 = (1 << 0),
119
      SM3 = (1 << 1),
120
      SM4 = (1 << 2),
121
   };
122

123
   uint32_t feat = 0;
13,023✔
124
   uint32_t cpuid[4] = {0};
13,023✔
125
   bool has_os_ymm_support = false;
13,023✔
126
   bool has_os_zmm_support = false;
13,023✔
127

128
   // CPUID 0: vendor identification, max sublevel
129
   invoke_cpuid(0, cpuid);
13,023✔
130

131
   const uint32_t max_supported_sublevel = cpuid[0];
13,023✔
132

133
   if(max_supported_sublevel >= 1) {
13,023✔
134
      // CPUID 1: feature bits
135
      invoke_cpuid(1, cpuid);
13,023✔
136
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
13,023✔
137

138
      feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUID::CPUID_RDTSC_BIT, allowed);
13,023✔
139

140
      feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUID::CPUID_RDRAND_BIT, allowed);
13,023✔
141

142
      feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUID::CPUID_SSE2_BIT, allowed);
13,023✔
143

144
      if(feat & CPUID::CPUID_SSE2_BIT) {
13,023✔
145
         feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUID::CPUID_SSSE3_BIT, allowed);
13,022✔
146

147
         if(feat & CPUID::CPUID_SSSE3_BIT) {
13,022✔
148
            feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUID::CPUID_CLMUL_BIT, allowed);
13,021✔
149
            feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUID::CPUID_AESNI_BIT, allowed);
26,042✔
150
         }
151

152
         const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
13,022✔
153
         if((flags0 & osxsave64) == osxsave64) {
13,022✔
154
            const uint64_t xcr_flags = xgetbv();
13,022✔
155
            if((xcr_flags & 0x6) == 0x6) {
13,022✔
156
               has_os_ymm_support = true;
13,022✔
157
               has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
13,022✔
158
            }
159
         }
160
      }
161
   }
162

163
   if(max_supported_sublevel >= 7) {
13,023✔
164
      clear_mem(cpuid, 4);
13,023✔
165
      invoke_cpuid_sublevel(7, 0, cpuid);
13,023✔
166

167
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
13,023✔
168

169
      clear_mem(cpuid, 4);
13,023✔
170
      invoke_cpuid_sublevel(7, 1, cpuid);
13,023✔
171
      const uint32_t flags7_1 = cpuid[0];
13,023✔
172

173
      feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUID::CPUID_RDSEED_BIT, allowed);
13,023✔
174
      feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUID::CPUID_ADX_BIT, allowed);
13,023✔
175

176
      /*
177
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
178
      typically we want to use both extensions in the same code.
179
      */
180
      feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUID::CPUID_BMI_BIT, allowed);
13,023✔
181

182
      if(feat & CPUID::CPUID_SSSE3_BIT) {
13,023✔
183
         feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUID::CPUID_SHA_BIT, allowed);
13,021✔
184
         feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUID::CPUID_SM3_BIT, allowed);
13,021✔
185
      }
186

187
      if(has_os_ymm_support) {
13,023✔
188
         feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUID::CPUID_AVX2_BIT, allowed);
13,022✔
189

190
         if(feat & CPUID::CPUID_AVX2_BIT) {
13,022✔
191
            feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUID::CPUID_GFNI_BIT, allowed);
13,021✔
192
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX2_AES_BIT, allowed);
13,021✔
193
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX2_CLMUL_BIT, allowed);
13,021✔
194
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUID::CPUID_SHA512_BIT, allowed);
13,021✔
195
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUID::CPUID_SM4_BIT, allowed);
13,021✔
196

197
            if(has_os_zmm_support) {
13,021✔
198
               feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUID::CPUID_AVX512_BIT, allowed);
×
199

200
               if(feat & CPUID::CPUID_AVX512_BIT) {
×
201
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX512_AES_BIT, allowed);
×
202
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX512_CLMUL_BIT, allowed);
×
203
               }
204
            }
205
         }
206
      }
207
   }
208

209
   /*
210
   * If we don't have access to CPUID, we can still safely assume that
211
   * any x86-64 processor has SSE2 and RDTSC
212
   */
213
   #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
214
   if(feat == 0) {
13,023✔
215
      feat |= CPUID::CPUID_SSE2_BIT & allowed;
×
216
      feat |= CPUID::CPUID_RDTSC_BIT & allowed;
×
217
   }
218
   #endif
219

220
   return feat;
13,023✔
221
}
222

223
#endif
224

225
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc