• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 14120723934

28 Mar 2025 02:31AM UTC coverage: 91.539% (+0.004%) from 91.535%
14120723934

Pull #4798

github

web-flow
Merge db2c0eef1 into 70cd16046
Pull Request #4798: Move most architecture-specific logic out of CPUID and into a submodule

95384 of 104200 relevant lines covered (91.54%)

11667903.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.77
/src/lib/utils/cpuid/cpuid_x86/cpuid_x86.cpp
1
/*
2
* Runtime CPU detection for x86
3
* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7

8
#include <botan/internal/cpuid.h>
9

10
#include <botan/assert.h>
11
#include <botan/mem_ops.h>
12
#include <botan/internal/loadstor.h>
13
#include <botan/internal/target_info.h>
14

15
#include <immintrin.h>
16

17
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18
   #include <intrin.h>
19
#endif
20

21
namespace Botan {
22

23
namespace {
24

25
void invoke_cpuid(uint32_t type, uint32_t out[4]) {
26,034✔
26
   clear_mem(out, 4);
52,068✔
27

28
#if defined(BOTAN_USE_GCC_INLINE_ASM)
29
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
26,034✔
30

31
#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
32
   __cpuid((int*)out, type);
33

34
#else
35
   BOTAN_UNUSED(type);
36
   #warning "No way of calling x86 cpuid instruction for this compiler"
37
#endif
38
}
39

40
void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
26,034✔
41
   clear_mem(out, 4);
52,068✔
42

43
#if defined(BOTAN_USE_GCC_INLINE_ASM)
44
   asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
26,034✔
45

46
#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
47
   __cpuidex((int*)out, type, level);
48

49
#else
50
   BOTAN_UNUSED(type, level);
51
   #warning "No way of calling x86 cpuid instruction for this compiler"
52
#endif
53
}
54

55
BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
13,016✔
56
   return _xgetbv(0);
13,016✔
57
}
58

59
}  // namespace
60

61
uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
13,017✔
62
   enum class x86_CPUID_1_bits : uint64_t {
13,017✔
63
      RDTSC = (1ULL << 4),
64
      SSE2 = (1ULL << 26),
65
      CLMUL = (1ULL << 33),
66
      SSSE3 = (1ULL << 41),
67
      SSE41 = (1ULL << 51),
68
      AESNI = (1ULL << 57),
69
      // AVX + OSXSAVE
70
      OSXSAVE = (1ULL << 59) | (1ULL << 60),
71
      RDRAND = (1ULL << 62)
72
   };
73

74
   enum class x86_CPUID_7_bits : uint64_t {
13,017✔
75
      BMI1 = (1ULL << 3),
76
      AVX2 = (1ULL << 5),
77
      BMI2 = (1ULL << 8),
78
      BMI_1_AND_2 = BMI1 | BMI2,
79
      AVX512_F = (1ULL << 16),
80
      AVX512_DQ = (1ULL << 17),
81
      RDSEED = (1ULL << 18),
82
      ADX = (1ULL << 19),
83
      AVX512_IFMA = (1ULL << 21),
84
      SHA = (1ULL << 29),
85
      AVX512_BW = (1ULL << 30),
86
      AVX512_VL = (1ULL << 31),
87
      AVX512_VBMI = (1ULL << 33),
88
      AVX512_VBMI2 = (1ULL << 38),
89
      GFNI = (1ULL << 40),
90
      AVX512_VAES = (1ULL << 41),
91
      AVX512_VCLMUL = (1ULL << 42),
92
      AVX512_VBITALG = (1ULL << 44),
93

94
      /*
95
      We only enable AVX512 support if all of the below flags are available
96

97
      This is more than we strictly need for most uses, however it also has
98
      the effect of preventing execution of AVX512 codepaths on cores that
99
      have serious downclocking problems when AVX512 code executes,
100
      especially Intel Skylake.
101

102
      VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
103
      Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
104
      executing AVX512.
105

106
      There is nothing stopping some future processor from supporting the
107
      above flags and having AVX512 penalties, but maybe you should not have
108
      bought such a processor.
109
      */
110
      AVX512_PROFILE =
111
         AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
112
   };
113

114
   // NOLINTNEXTLINE(performance-enum-size)
115
   enum class x86_CPUID_7_1_bits : uint64_t {
13,017✔
116
      SHA512 = (1 << 0),
117
      SM3 = (1 << 1),
118
      SM4 = (1 << 2),
119
   };
120

121
   uint32_t feat = 0;
13,017✔
122
   uint32_t cpuid[4] = {0};
13,017✔
123
   bool has_os_ymm_support = false;
13,017✔
124
   bool has_os_zmm_support = false;
13,017✔
125

126
   // CPUID 0: vendor identification, max sublevel
127
   invoke_cpuid(0, cpuid);
13,017✔
128

129
   const uint32_t max_supported_sublevel = cpuid[0];
13,017✔
130

131
   if(max_supported_sublevel >= 1) {
13,017✔
132
      // CPUID 1: feature bits
133
      invoke_cpuid(1, cpuid);
13,017✔
134
      const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
13,017✔
135

136
      feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUFeature::Bit::RDTSC, allowed);
13,017✔
137

138
      feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUFeature::Bit::RDRAND, allowed);
13,017✔
139

140
      feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUFeature::Bit::SSE2, allowed);
13,017✔
141

142
      if(feat & CPUFeature::Bit::SSE2) {
13,017✔
143
         feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUFeature::Bit::SSSE3, allowed);
13,016✔
144

145
         if(feat & CPUFeature::Bit::SSSE3) {
13,016✔
146
            feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUFeature::Bit::CLMUL, allowed);
13,015✔
147
            feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUFeature::Bit::AESNI, allowed);
26,030✔
148
         }
149

150
         const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
13,016✔
151
         if((flags0 & osxsave64) == osxsave64) {
13,016✔
152
            const uint64_t xcr_flags = xgetbv();
13,016✔
153
            if((xcr_flags & 0x6) == 0x6) {
13,016✔
154
               has_os_ymm_support = true;
13,016✔
155
               has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
13,016✔
156
            }
157
         }
158
      }
159
   }
160

161
   if(max_supported_sublevel >= 7) {
13,017✔
162
      clear_mem(cpuid, 4);
13,017✔
163
      invoke_cpuid_sublevel(7, 0, cpuid);
13,017✔
164

165
      const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
13,017✔
166

167
      clear_mem(cpuid, 4);
13,017✔
168
      invoke_cpuid_sublevel(7, 1, cpuid);
13,017✔
169
      const uint32_t flags7_1 = cpuid[0];
13,017✔
170

171
      feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUFeature::Bit::RDSEED, allowed);
13,017✔
172
      feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUFeature::Bit::ADX, allowed);
13,017✔
173

174
      /*
175
      We only set the BMI bit if both BMI1 and BMI2 are supported, since
176
      typically we want to use both extensions in the same code.
177
      */
178
      feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUFeature::Bit::BMI, allowed);
13,017✔
179

180
      if(feat & CPUFeature::Bit::SSSE3) {
13,017✔
181
         feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUFeature::Bit::SHA, allowed);
13,015✔
182
         feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUFeature::Bit::SM3, allowed);
13,015✔
183
      }
184

185
      if(has_os_ymm_support) {
13,017✔
186
         feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUFeature::Bit::AVX2, allowed);
13,016✔
187

188
         if(feat & CPUFeature::Bit::AVX2) {
13,016✔
189
            feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUFeature::Bit::GFNI, allowed);
13,015✔
190
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX2_AES, allowed);
13,015✔
191
            feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX2_CLMUL, allowed);
13,015✔
192
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUFeature::Bit::SHA512, allowed);
13,015✔
193
            feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUFeature::Bit::SM4, allowed);
13,015✔
194

195
            if(has_os_zmm_support) {
13,015✔
196
               feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUFeature::Bit::AVX512, allowed);
×
197

198
               if(feat & CPUFeature::Bit::AVX512) {
×
199
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX512_AES, allowed);
×
200
                  feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX512_CLMUL, allowed);
×
201
               }
202
            }
203
         }
204
      }
205
   }
206

207
/*
208
   * If we don't have access to CPUID, we can still safely assume that
209
   * any x86-64 processor has SSE2 and RDTSC
210
   */
211
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
212
   if(feat == 0) {
13,017✔
213
      feat |= CPUFeature::Bit::SSE2 & allowed;
×
214
      feat |= CPUFeature::Bit::RDTSC & allowed;
×
215
   }
216
#endif
217

218
   return feat;
13,017✔
219
}
220

221
}  // namespace Botan
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc