• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

randombit / botan / 13369064240

17 Feb 2025 11:20AM UTC coverage: 91.643% (+0.006%) from 91.637%
13369064240

push

github

web-flow
Merge pull request #4693 from randombit/jack/speed-compare

Add script to help compare performance changes

94979 of 103640 relevant lines covered (91.64%)

11047273.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.4
/src/cli/speed.cpp
1
/*
2
* (C) 2009,2010,2014,2015,2017,2018,2024 Jack Lloyd
3
* (C) 2015 Simon Warta (Kullo GmbH)
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7

8
#include "cli.h"
9
#include "perf.h"
10

11
#include <algorithm>
12
#include <chrono>
13
#include <iomanip>
14
#include <map>
15
#include <set>
16
#include <sstream>
17

18
// Always available:
19
#include <botan/version.h>
20
#include <botan/internal/cpuid.h>
21
#include <botan/internal/fmt.h>
22
#include <botan/internal/stl_util.h>
23

24
#if defined(BOTAN_HAS_OS_UTILS)
25
   #include <botan/internal/os_utils.h>
26
#endif
27

28
#if defined(BOTAN_HAS_ECC_GROUP)
29
   #include <botan/ec_group.h>
30
#endif
31

32
namespace Botan_CLI {
33

34
namespace {
35

36
class JSON_Output final {
2✔
37
   public:
38
      void add(const Timer& timer) { m_results.push_back(timer); }
2✔
39

40
      std::string print() const {
1✔
41
         std::ostringstream out;
1✔
42

43
         out << "[\n";
1✔
44

45
         out << "{"
1✔
46
             << "\"arch\": \"" << BOTAN_TARGET_ARCH << "\", "
47
             << "\"version\": \"" << Botan::short_version_cstr() << "\", "
48
             << "\"git\": \"" << BOTAN_VERSION_VC_REVISION << "\", "
49
             << "\"compiler\": \"" << BOTAN_COMPILER_INVOCATION_STRING << "\""
50
             << "},\n";
1✔
51

52
         for(size_t i = 0; i != m_results.size(); ++i) {
3✔
53
            const Timer& t = m_results[i];
2✔
54

55
            out << "{"
2✔
56
                << "\"algo\": \"" << t.get_name() << "\", "
2✔
57
                << "\"op\": \"" << t.doing() << "\", "
2✔
58
                << "\"events\": " << t.events() << ", ";
2✔
59

60
            if(t.cycles_consumed() > 0) {
4✔
61
               out << "\"cycles\": " << t.cycles_consumed() << ", ";
4✔
62
            }
63

64
            if(t.buf_size() > 0) {
2✔
65
               out << "\"bps\": " << static_cast<uint64_t>(t.events() / (t.value() / 1000000000.0)) << ", ";
2✔
66
               out << "\"buf_size\": " << t.buf_size() << ", ";
2✔
67
            }
68

69
            out << "\"nanos\": " << t.value() << "}";
2✔
70

71
            if(i != m_results.size() - 1) {
2✔
72
               out << ",";
1✔
73
            }
74

75
            out << "\n";
2✔
76
         }
77
         out << "]\n";
1✔
78

79
         return out.str();
2✔
80
      }
1✔
81

82
   private:
83
      std::vector<Timer> m_results;
84
};
85

86
class Summary final {
1✔
87
   public:
88
      Summary() = default;
1✔
89

90
      void add(const Timer& t) {
2✔
91
         if(t.buf_size() == 0) {
4✔
92
            m_ops_entries.push_back(t);
×
93
         } else {
94
            m_bps_entries[std::make_pair(t.doing(), t.get_name())].push_back(t);
4✔
95
         }
96
      }
2✔
97

98
      std::string print() {
1✔
99
         const size_t name_padding = 35;
1✔
100
         const size_t op_name_padding = 16;
1✔
101
         const size_t op_padding = 16;
1✔
102

103
         std::ostringstream result_ss;
1✔
104
         result_ss << std::fixed;
1✔
105

106
         if(!m_bps_entries.empty()) {
1✔
107
            result_ss << "\n";
1✔
108

109
            // add table header
110
            result_ss << std::setw(name_padding) << std::left << "algo" << std::setw(op_name_padding) << std::left
1✔
111
                      << "operation";
1✔
112

113
            for(const Timer& t : m_bps_entries.begin()->second) {
2✔
114
               result_ss << std::setw(op_padding) << std::right << (std::to_string(t.buf_size()) + " bytes");
2✔
115
            }
116
            result_ss << "\n";
1✔
117

118
            // add table entries
119
            for(const auto& entry : m_bps_entries) {
3✔
120
               if(entry.second.empty()) {
2✔
121
                  continue;
×
122
               }
123

124
               result_ss << std::setw(name_padding) << std::left << (entry.first.second) << std::setw(op_name_padding)
2✔
125
                         << std::left << (entry.first.first);
2✔
126

127
               for(const Timer& t : entry.second) {
4✔
128
                  if(t.events() == 0) {
2✔
129
                     result_ss << std::setw(op_padding) << std::right << "N/A";
×
130
                  } else {
131
                     result_ss << std::setw(op_padding) << std::right << std::setprecision(2)
2✔
132
                               << (t.bytes_per_second() / 1000.0);
2✔
133
                  }
134
               }
135

136
               result_ss << "\n";
2✔
137
            }
138

139
            result_ss << "\n[results are the number of 1000s bytes processed per second]\n";
1✔
140
         }
141

142
         if(!m_ops_entries.empty()) {
1✔
143
            result_ss << std::setprecision(6) << "\n";
×
144

145
            // sort entries
146
            std::sort(m_ops_entries.begin(), m_ops_entries.end());
×
147

148
            // add table header
149
            result_ss << std::setw(name_padding) << std::left << "algo" << std::setw(op_name_padding) << std::left
×
150
                      << "operation" << std::setw(op_padding) << std::right << "sec/op" << std::setw(op_padding)
×
151
                      << std::right << "op/sec"
×
152
                      << "\n";
×
153

154
            // add table entries
155
            for(const Timer& entry : m_ops_entries) {
×
156
               result_ss << std::setw(name_padding) << std::left << entry.get_name() << std::setw(op_name_padding)
×
157
                         << std::left << entry.doing() << std::setw(op_padding) << std::right
×
158
                         << entry.seconds_per_event() << std::setw(op_padding) << std::right
×
159
                         << entry.events_per_second() << "\n";
×
160
            }
161
         }
162

163
         return result_ss.str();
2✔
164
      }
1✔
165

166
   private:
167
      std::map<std::pair<std::string, std::string>, std::vector<Timer>> m_bps_entries;
168
      std::vector<Timer> m_ops_entries;
169
};
170

171
std::vector<size_t> unique_buffer_sizes(const std::string& cmdline_arg) {
28✔
172
   const size_t MAX_BUF_SIZE = 64 * 1024 * 1024;
28✔
173

174
   std::set<size_t> buf;
28✔
175
   for(const std::string& size_str : Command::split_on(cmdline_arg, ',')) {
54✔
176
      size_t x = 0;
29✔
177
      try {
29✔
178
         size_t converted = 0;
29✔
179
         x = static_cast<size_t>(std::stoul(size_str, &converted, 0));
29✔
180

181
         if(converted != size_str.size()) {
28✔
182
            throw CLI_Usage_Error("Invalid integer");
×
183
         }
184
      } catch(std::exception&) {
1✔
185
         throw CLI_Usage_Error("Invalid integer value '" + size_str + "' for option buf-size");
2✔
186
      }
1✔
187

188
      if(x == 0) {
28✔
189
         throw CLI_Usage_Error("Cannot have a zero-sized buffer");
2✔
190
      }
191

192
      if(x > MAX_BUF_SIZE) {
27✔
193
         throw CLI_Usage_Error("Specified buffer size is too large");
2✔
194
      }
195

196
      buf.insert(x);
26✔
197
   }
28✔
198

199
   return std::vector<size_t>(buf.begin(), buf.end());
28✔
200
}
25✔
201

202
std::string format_timer(const Timer& t, size_t time_unit) {
483✔
203
   constexpr size_t MiB = 1024 * 1024;
483✔
204

205
   std::ostringstream oss;
483✔
206

207
   oss << t.get_name() << " ";
483✔
208

209
   const uint64_t events = t.events();
483✔
210

211
   if(t.buf_size() == 0) {
483✔
212
      // Report operations/time unit
213

214
      if(events == 0) {
460✔
215
         oss << "no events ";
×
216
      } else {
217
         oss << static_cast<uint64_t>(t.events_per_second()) << ' ' << t.doing() << "/sec; ";
920✔
218

219
         if(time_unit == 1000) {
460✔
220
            oss << std::setprecision(2) << std::fixed << (t.milliseconds() / events) << " ms/op ";
460✔
221
         } else if(time_unit == 1000 * 1000) {
×
222
            oss << std::setprecision(2) << std::fixed << (t.microseconds() / events) << " us/op ";
×
223
         } else if(time_unit == 1000 * 1000 * 1000) {
×
224
            oss << std::setprecision(0) << std::fixed << (t.nanoseconds() / events) << " ns/op ";
×
225
         }
226

227
         if(t.cycles_consumed() != 0 && events > 0) {
920✔
228
            const double cycles_per_op = static_cast<double>(t.cycles_consumed()) / events;
460✔
229
            const int precision = (cycles_per_op < 10000) ? 2 : 0;
460✔
230
            oss << std::fixed << std::setprecision(precision) << cycles_per_op << " cycles/op ";
460✔
231
         }
232

233
         oss << "(" << events << " " << (events == 1 ? "op" : "ops") << " in " << t.milliseconds() << " ms)";
711✔
234
      }
235
   } else {
236
      // Bulk op - report bytes/time unit
237

238
      const double MiB_total = static_cast<double>(events) / MiB;
23✔
239
      const double MiB_per_sec = MiB_total / t.seconds();
23✔
240

241
      if(!t.doing().empty()) {
23✔
242
         oss << t.doing() << " ";
23✔
243
      }
244

245
      if(t.buf_size() > 0) {
23✔
246
         oss << "buffer size " << t.buf_size() << " bytes: ";
23✔
247
      }
248

249
      if(events == 0) {
23✔
250
         oss << "N/A ";
×
251
      } else {
252
         oss << std::fixed << std::setprecision(3) << MiB_per_sec << " MiB/sec ";
23✔
253
      }
254

255
      if(t.cycles_consumed() != 0 && events > 0) {
46✔
256
         const double cycles_per_byte = static_cast<double>(t.cycles_consumed()) / events;
23✔
257
         oss << std::fixed << std::setprecision(2) << cycles_per_byte << " cycles/byte ";
23✔
258
      }
259

260
      oss << "(" << MiB_total << " MiB in " << t.milliseconds() << " ms)";
23✔
261
   }
262

263
   return oss.str();
966✔
264
}
483✔
265

266
}  // namespace
267

268
class Speed final : public Command {
×
269
   public:
270
      Speed() :
29✔
271
            Command(
272
               "speed --msec=500 --format=default --time-unit=ms --ecc-groups= --buf-size=1024 --clear-cpuid= --cpu-clock-speed=0 --cpu-clock-ratio=1.0 *algos") {
58✔
273
      }
29✔
274

275
      static std::vector<std::string> default_benchmark_list() {
×
276
         /*
277
         This is not intended to be exhaustive: it just hits the high
278
         points of the most interesting or widely used algorithms.
279
         */
280
         // clang-format off
281
         return {
×
282
            /* Block ciphers */
283
            "AES-128",
284
            "AES-192",
285
            "AES-256",
286
            "ARIA-128",
287
            "ARIA-192",
288
            "ARIA-256",
289
            "Blowfish",
290
            "CAST-128",
291
            "Camellia-128",
292
            "Camellia-192",
293
            "Camellia-256",
294
            "DES",
295
            "TripleDES",
296
            "GOST-28147-89",
297
            "IDEA",
298
            "Noekeon",
299
            "SHACAL2",
300
            "SM4",
301
            "Serpent",
302
            "Threefish-512",
303
            "Twofish",
304

305
            /* Cipher modes */
306
            "AES-128/CBC",
307
            "AES-128/CTR-BE",
308
            "AES-128/EAX",
309
            "AES-128/OCB",
310
            "AES-128/GCM",
311
            "AES-128/XTS",
312
            "AES-128/SIV",
313

314
            "Serpent/CBC",
315
            "Serpent/CTR-BE",
316
            "Serpent/EAX",
317
            "Serpent/OCB",
318
            "Serpent/GCM",
319
            "Serpent/XTS",
320
            "Serpent/SIV",
321

322
            "ChaCha20Poly1305",
323

324
            /* Stream ciphers */
325
            "RC4",
326
            "Salsa20",
327
            "ChaCha20",
328

329
            /* Hashes */
330
            "SHA-1",
331
            "SHA-256",
332
            "SHA-512",
333
            "SHA-3(256)",
334
            "SHA-3(512)",
335
            "RIPEMD-160",
336
            "Skein-512",
337
            "Blake2b",
338
            "Whirlpool",
339

340
            /* XOFs */
341
            "SHAKE-128",
342
            "SHAKE-256",
343

344
            /* MACs */
345
            "CMAC(AES-128)",
346
            "HMAC(SHA-256)",
347

348
            /* pubkey */
349
            "RSA",
350
            "DH",
351
            "ECDH",
352
            "ECDSA",
353
            "Ed25519",
354
            "Ed448",
355
            "X25519",
356
            "X448",
357
            "ML-KEM",
358
            "ML-DSA",
359
            "SLH-DSA",
360
            "FrodoKEM",
361
            "HSS-LMS",
362
         };
×
363
         // clang-format on
364
      }
365

366
      std::string group() const override { return "misc"; }
1✔
367

368
      std::string description() const override { return "Measures the speed of algorithms"; }
1✔
369

370
      void go() override {
28✔
371
         std::chrono::milliseconds msec(get_arg_sz("msec"));
28✔
372
         std::vector<std::string> ecc_groups = Command::split_on(get_arg("ecc-groups"), ',');
56✔
373
         const std::string format = get_arg("format");
28✔
374
         const std::string clock_ratio = get_arg("cpu-clock-ratio");
31✔
375

376
         const size_t clock_speed = get_arg_sz("cpu-clock-speed");
28✔
377

378
         double clock_cycle_ratio = std::strtod(clock_ratio.c_str(), nullptr);
28✔
379

380
         m_time_unit = [](std::string_view tu) {
115✔
381
            if(tu == "ms") {
28✔
382
               return 1000;
28✔
383
            } else if(tu == "us") {
×
384
               return 1000 * 1000;
×
385
            } else if(tu == "ns") {
×
386
               return 1000 * 1000 * 1000;
×
387
            } else {
388
               throw CLI_Usage_Error("Unknown time unit (supported: ms, us, ns)");
×
389
            }
390
         }(get_arg("time-unit"));
28✔
391

392
         /*
393
         * This argument is intended to be the ratio between the cycle counter
394
         * and the actual machine cycles. It is extremely unlikely that there is
395
         * any machine where the cycle counter increments faster than the actual
396
         * clock.
397
         */
398
         if(clock_cycle_ratio < 0.0 || clock_cycle_ratio > 1.0) {
28✔
399
            throw CLI_Usage_Error("Unlikely CPU clock ratio of " + clock_ratio);
×
400
         }
401

402
         clock_cycle_ratio = 1.0 / clock_cycle_ratio;
28✔
403

404
#if defined(BOTAN_HAS_OS_UTILS)
405
         if(clock_speed != 0 && Botan::OS::get_cpu_cycle_counter() != 0) {
28✔
406
            error_output() << "The --cpu-clock-speed option is only intended to be used on "
×
407
                              "platforms without access to a cycle counter.\n"
408
                              "Expect incorrect results\n\n";
×
409
         }
410
#endif
411

412
         if(format == "table") {
28✔
413
            m_summary = std::make_unique<Summary>();
1✔
414
         } else if(format == "json") {
27✔
415
            m_json = std::make_unique<JSON_Output>();
1✔
416
         } else if(format != "default") {
26✔
417
            throw CLI_Usage_Error("Unknown --format type '" + format + "'");
×
418
         }
419

420
#if defined(BOTAN_HAS_ECC_GROUP)
421
         if(ecc_groups.empty()) {
28✔
422
            ecc_groups = {"secp256r1", "secp384r1", "secp521r1", "brainpool256r1", "brainpool384r1", "brainpool512r1"};
224✔
423
         } else if(ecc_groups.size() == 1 && ecc_groups[0] == "all") {
×
424
            auto all = Botan::EC_Group::known_named_groups();
×
425
            ecc_groups.assign(all.begin(), all.end());
×
426
         }
×
427
#endif
428

429
         std::vector<std::string> algos = get_arg_list("algos");
31✔
430

431
         const std::vector<size_t> buf_sizes = unique_buffer_sizes(get_arg("buf-size"));
59✔
432

433
         for(const std::string& cpuid_to_clear : Command::split_on(get_arg("clear-cpuid"), ',')) {
26✔
434
            auto bits = Botan::CPUID::bit_from_string(cpuid_to_clear);
1✔
435
            if(bits.empty()) {
1✔
436
               error_output() << "Warning don't know CPUID flag '" << cpuid_to_clear << "'\n";
1✔
437
            }
438

439
            for(auto bit : bits) {
1✔
440
               Botan::CPUID::clear_cpuid_bit(bit);
×
441
            }
442
         }
26✔
443

444
         if(verbose() || m_summary) {
25✔
445
            output() << Botan::version_string() << "\n"
2✔
446
                     << "CPUID: " << Botan::CPUID::to_string() << "\n\n";
3✔
447
         }
448

449
         const bool using_defaults = (algos.empty());
25✔
450
         if(using_defaults) {
25✔
451
            algos = default_benchmark_list();
×
452
         }
453

454
         PerfConfig perf_config([&](const Timer& t) { this->record_result(t); },
510✔
455
                                clock_speed,
456
                                clock_cycle_ratio,
457
                                msec,
458
                                ecc_groups,
459
                                buf_sizes,
460
                                this->error_output(),
461
                                this->rng());
25✔
462

463
         for(const auto& algo : algos) {
70✔
464
            if(auto perf = PerfTest::get(algo)) {
45✔
465
               perf->go(perf_config);
45✔
466
            } else if(verbose() || !using_defaults) {
×
467
               error_output() << "Unknown algorithm '" << algo << "'\n";
×
468
            }
45✔
469
         }
470

471
         if(m_json) {
25✔
472
            output() << m_json->print();
2✔
473
         }
474
         if(m_summary) {
25✔
475
            output() << m_summary->print() << "\n";
3✔
476
         }
477

478
         if(verbose() && clock_speed == 0 && m_cycles_consumed > 0 && m_ns_taken > 0) {
25✔
479
            const double seconds = static_cast<double>(m_ns_taken) / 1000000000;
×
480
            const double Hz = static_cast<double>(m_cycles_consumed) / seconds;
×
481
            const double MHz = Hz / 1000000;
×
482
            output() << "\nEstimated clock speed " << MHz << " MHz\n";
×
483
         }
484
      }
115✔
485

486
   private:
487
      size_t m_time_unit = 0;
488
      uint64_t m_cycles_consumed = 0;
489
      uint64_t m_ns_taken = 0;
490
      std::unique_ptr<Summary> m_summary;
491
      std::unique_ptr<JSON_Output> m_json;
492

493
      void record_result(const Timer& t) {
485✔
494
         m_ns_taken += t.value();
485✔
495
         m_cycles_consumed += t.cycles_consumed();
485✔
496
         if(m_json) {
485✔
497
            m_json->add(t);
2✔
498
         } else {
499
            output() << format_timer(t, m_time_unit) << std::endl;
966✔
500

501
            if(m_summary) {
483✔
502
               m_summary->add(t);
2✔
503
            }
504
         }
505
      }
485✔
506
};
507

508
BOTAN_REGISTER_COMMAND("speed", Speed);
29✔
509

510
}  // namespace Botan_CLI
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc