• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PeterCDMcLean / BitLib / 15231925347

24 May 2025 11:21PM UTC coverage: 52.363% (+2.5%) from 49.894%
15231925347

push

github

web-flow
Add support for signed word types. (#14)

* Add support for signed word types.
Use logical shift right and avoid signed multiplication.
Someone builtin_popcount is also sensitive to sign

* Use common mask function to avoid signed word underflows

* Allow _mask to optimize by default

8602 of 16014 branches covered (53.72%)

Branch coverage included in aggregate %.

216 of 264 new or added lines in 12 files covered. (81.82%)

4 existing lines in 1 file now uncovered.

5323 of 10579 relevant lines covered (50.32%)

5579260.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

60.62
/include/bitlib/bit-iterator/bit_details.hpp
1
// ============================== BIT DETAILS =============================== //
2
// Project:         The C++ Bit Library
3
// Name:            bit_details.hpp
4
// Description:     Provides common implementation details and helper classes
5
// Creator:         Vincent Reverdy
6
// Contributor(s):  Vincent Reverdy [2015-2017]
7
//                  Bryce Kille [2019]
8
// License:         BSD 3-Clause License
9
// ========================================================================== //
10
#ifndef _BIT_DETAILS_HPP_INCLUDED
11
#define _BIT_DETAILS_HPP_INCLUDED
12
// ========================================================================== //
13

14

15

16
// ================================ PREAMBLE ================================ //
17
// C++ standard library
18
#include <algorithm>
19
#include <cassert>
20
#include <concepts>
21
#include <cstddef>
22
#include <cstdint>
23
#include <iterator>
24
#include <limits>
25
#include <ranges>
26
#include <stdexcept>
27
#include <tuple>
28
#include <type_traits>
29
#include <utility>
30

31
#include "bitlib/bit-containers/bit_bitsof.hpp"
32
#include "bitlib/bit_concepts.hpp"
33

34
// Project sources
35
// Third-party libraries
36
// Miscellaneous
37
namespace bit {
38
class bit_value;
39
template <class WordType>
40
class bit_reference;
41
template <class Iterator> class bit_iterator;
42
template <class WordType>
43
using bit_pointer = bit_iterator<WordType*>;
44

45
// ========================================================================== //
46

47
/* ***************************** BINARY DIGITS ****************************** */
48
// Binary digits structure definition
49
// Implementation template: only instantiates static_asserts for non-byte types.
50
template <typename T, bool = std::is_same<T, std::byte>::value>
51
struct binary_digits_impl : std::integral_constant<std::size_t, std::numeric_limits<std::make_unsigned_t<T>>::digits> {
52
  static_assert(std::is_integral<T>::value, "Type must be integral");
53
  //static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
54
  static_assert(!std::is_same<T, bool>::value, "Type must not be bool");
55
  static_assert(!std::is_same<T, char>::value, "Type must not be char");
56
};
57

58
// Specialization for std::byte.
59
template <>
60
struct binary_digits_impl<std::byte, true> : std::integral_constant<std::size_t, std::numeric_limits<unsigned char>::digits> {};
61

62
// Public interface that removes cv-qualifiers.
63
template <typename UIntType>
64
struct binary_digits : binary_digits_impl<std::remove_cv_t<UIntType>> {};
65

66
// Binary digits value
67
template <class T>
68
constexpr std::size_t binary_digits_v = binary_digits<T>::value;
69
/* ************************************************************************** */
70

71
#if 0
72
template <typename T>
73
using smallest_integral = std::conditional_t<
74
    (sizeof(T) <= sizeof(std::uint8_t)),
75
    std::uint8_t,
76
    std::conditional_t<
77
        (sizeof(T) <= sizeof(std::uint16_t)),
78
        std::uint16_t,
79
        std::conditional_t<
80
            (sizeof(T) <= sizeof(std::uint32_t)),
81
            std::uint32_t,
82
            std::conditional_t<
83
                (sizeof(T) <= sizeof(std::uint64_t)),
84
                std::uint64_t,
85
                T>>>>;
86
#endif
87

88
/* *************** IMPLEMENTATION DETAILS: CV ITERATOR TRAITS *************** */
89
// Cv iterator traits structure definition
90
template <class Iterator>
91
struct _cv_iterator_traits
92
{
93
    // Assertions
94
    private:
95
    using _traits_t = std::iterator_traits<Iterator>;
96
    using _difference_t = typename _traits_t::difference_type;
97
    using _value_t = typename _traits_t::value_type;
98
    using _pointer_t = typename _traits_t::pointer;
99
    using _reference_t = typename _traits_t::reference;
100
    using _category_t =  typename _traits_t::iterator_category;
101
    using _no_pointer_t = typename std::remove_pointer<_pointer_t>::type;
102
    using _no_reference_t = typename std::remove_reference<_reference_t>::type;
103
    using _raw_value_t = typename std::remove_cv<_value_t>::type;
104
    using _raw_pointer_t = typename std::remove_cv<_no_pointer_t>::type;
105
    using _raw_reference_t = typename std::remove_cv<_no_reference_t>::type;
106
    using _cv_value_t = _no_reference_t;
107
    static_assert(std::is_same<_raw_pointer_t, _raw_value_t>::value, "");
108
    static_assert(std::is_same<_raw_reference_t, _raw_value_t>::value, "");
109

110
    // Types
111
    public:
112
    using difference_type = _difference_t;
113
    using value_type = _cv_value_t;
114
    using pointer = _pointer_t;
115
    using reference = _reference_t;
116
    using iterator_category = _category_t;
117
};
118
/* ************************************************************************** */
119

120

121

122
/* *********** IMPLEMENTATION DETAILS: NARROWEST AND WIDEST TYPES *********** */
123
// Narrowest type structure declaration
124
template <class... T>
125
struct _narrowest_type;
126

127
// Narrowest type structure specialization: selects the only passed type
128
template <class T>
129
struct _narrowest_type<T>
130
: std::common_type<T>
131
{
132
    static_assert(binary_digits<T>::value, "");
133
};
134

135
// Narrowest type structure specialization: selects the type with less bits
136
template <class T, class U>
137
struct _narrowest_type<T, U>
138
: _narrowest_type<
139
    typename std::conditional<
140
        (binary_digits<T>::value < binary_digits<U>::value),
141
        T,
142
        typename std::conditional<
143
            (binary_digits<T>::value > binary_digits<U>::value),
144
            U,
145
            typename std::common_type<T, U>::type
146
        >::type
147
    >::type
148
>
149
{
150
};
151

152
// Narrowest type structure specialization: recursively selects the right type
153
template <class T, class... U>
154
struct _narrowest_type<T, U...>
155
: _narrowest_type<T, typename _narrowest_type<U...>::type>
156
{
157
};
158

159
// Narrowest type alias
160
template <class... T>
161
using _narrowest_type_t = typename _narrowest_type<T...>::type;
162

163
// Widest type structure declaration
164
template <class... X>
165
struct _widest_type;
166

167
// Widest type structure specialization: selects the only passed type
168
template <class T>
169
struct _widest_type<T>
170
: std::common_type<T>
171
{
172
    static_assert(binary_digits<T>::value, "");
173
};
174

175
// Widest type structure specialization: selects the type with more bits
176
template <class T, class U>
177
struct _widest_type<T, U>
178
: _widest_type<
179
    typename std::conditional<
180
        (binary_digits<T>::value > binary_digits<U>::value),
181
        T,
182
        typename std::conditional<
183
            (binary_digits<T>::value < binary_digits<U>::value),
184
            U,
185
            typename std::common_type<T, U>::type
186
        >::type
187
    >::type
188
>
189
{
190
};
191

192
// Widest type structure specialization: recursively selects the right type
193
template <class T, class... X>
194
struct _widest_type<T, X...>
195
: _widest_type<T, typename _widest_type<X...>::type>
196
{
197
};
198

199
// Widest type alias
200
template <class... T>
201
using _widest_type_t = typename _widest_type<T...>::type;
202
/* ************************************************************************** */
203

204

205

206
/* ************ IMPLEMENTATION DETAILS: NARROWER AND WIDER TYPES ************ */
207
// Narrower type structure definition
208
template <class T, int I = 0>
209
struct _narrower_type
210
{
211
    using tuple = std::tuple<
212
        unsigned long long int,
213
        unsigned long int,
214
        unsigned int,
215
        unsigned short int,
216
        unsigned char
217
    >;
218
    using lhs_bits = binary_digits<T>;
219
    using rhs_bits = binary_digits<typename std::tuple_element<I, tuple>::type>;
220
    using type = typename std::conditional<
221
        (lhs_bits::value > rhs_bits::value),
222
        typename std::tuple_element<I, tuple>::type,
223
        typename std::conditional<
224
            (I + 1 < std::tuple_size<tuple>::value),
225
            typename _narrower_type<
226
                T,
227
                (I + 1 < std::tuple_size<tuple>::value ? I + 1 : -1)
228
            >::type,
229
            typename std::tuple_element<I, tuple>::type
230
        >::type
231
    >::type;
232
};
233

234
// Narrower type structure specialization: not found
235
template <class T>
236
struct _narrower_type<T, -1>
237
{
238
    using type = T;
239
};
240

241
// Narrower type alias
242
template <class T>
243
using _narrower_type_t = typename _narrower_type<T>::type;
244

245
// Wider type structure definition
246
template <class T, int I = 0>
247
struct _wider_type
248
{
249
    using tuple = std::tuple<
250
        unsigned char,
251
        unsigned short int,
252
        unsigned int,
253
        unsigned long int,
254
        unsigned long long int
255
    >;
256
    using lhs_bits = binary_digits<T>;
257
    using rhs_bits = binary_digits<typename std::tuple_element<I, tuple>::type>;
258
    using type = typename std::conditional<
259
        (lhs_bits::value < rhs_bits::value),
260
        typename std::tuple_element<I, tuple>::type,
261
        typename std::conditional<
262
            (I + 1 < std::tuple_size<tuple>::value),
263
            typename _narrower_type<
264
                T,
265
                (I + 1 < std::tuple_size<tuple>::value ? I + 1 : -1)
266
            >::type,
267
            typename std::tuple_element<I, tuple>::type
268
        >::type
269
    >::type;
270
};
271

272
// Wider type structure specialization: not found
273
template <class T>
274
struct _wider_type<T, -1>
275
{
276
    using type = T;
277
};
278

279
// Wider type alias
280
template <class T>
281
using _wider_type_t = typename _wider_type<T>::type;
282
/* ************************************************************************** */
283

284

285

286
/* ******************* IMPLEMENTATION DETAILS: UTILITIES ******************** */
287
// Assertions
288
template <class Iterator>
289
constexpr bool _assert_range_viability(Iterator first, Iterator last);
290
/* ************************************************************************** */
291

292

293

294
/* ****************** IMPLEMENTATION DETAILS: INSTRUCTIONS ****************** */
295
// Population count
296
template <class T, class = decltype(__builtin_popcountll(T()))>
297
constexpr T _popcnt(T src) noexcept;
298
template <class T, class... X>
299
constexpr T _popcnt(T src, X...) noexcept;
300

301
// Leading zeros count
302
template <class T, class = decltype(__builtin_clzll(T()))>
303
constexpr T _lzcnt(T src) noexcept;
304
template <class T, class... X>
305
constexpr T _lzcnt(T src, X...) noexcept;
306

307
// Trailing zeros count
308
template <class T, class = decltype(__builtin_ctzll(T()))>
309
constexpr T _tzcnt(T src) noexcept;
310
template <class T, class... X>
311
constexpr T _tzcnt(T src, X...) noexcept;
312

313
// Bit field extraction
314
template <class T, class = decltype(__builtin_ia32_bextr_u64(T(), T(), T()))>
315
constexpr T _bextr(T src, T start, T len) noexcept;
316
template <class T, class... X>
317
constexpr T _bextr(T src, T start, T len, X...) noexcept;
318

319
// Parallel bits deposit
320
template <class T, class = decltype(_pdep_u64(T()))>
321
constexpr T _pdep(T src, T msk) noexcept;
322
template <class T, class... X>
323
constexpr T _pdep(T src, T msk, X...) noexcept;
324

325
// Parallel bits extract
326
template <class T, class = decltype(_pext_u64(T()))>
327
constexpr T _pext(T src, T msk) noexcept;
328
template <class T, class... X>
329
constexpr T _pext(T src, T msk, X...) noexcept;
330

331
// Byte swap
332
template <class T, class T128 = decltype(__uint128_t(__builtin_bswap64(T())))>
333
constexpr T _byteswap(T src) noexcept;
334
template <class T, class... X>
335
constexpr T _byteswap(T src, X...) noexcept;
336

337
// Bit swap
338
template <class T>
339
constexpr T _bitswap(T src) noexcept;
340
template <class T, std::size_t N>
341
constexpr T _bitswap(T src) noexcept;
342
template <class T, std::size_t N>
343
constexpr T _bitswap() noexcept;
344

345
// Bit blend
346
template <class T>
347
constexpr T _bitblend(T src0, T src1, T msk) noexcept;
348
template <class T>
349
constexpr T _bitblend(T src0, T src1, T start, T len) noexcept;
350

351
// Bit exchange
352
template <class T>
353
constexpr void _bitexch(T& src0, T& src1, T msk) noexcept;
354
template <class T, class S>
355
constexpr void _bitexch(T& src0, T& src1, S start, S len) noexcept;
356
template <class T, class S>
357
constexpr void _bitexch(T& src0, T& src1, S start0, S start1, S len) noexcept;
358

359
// Bit compare
360
template <class T>
361
constexpr T _bitcmp(T src0, T src1, T start0, T start1, T len) noexcept;
362

363
// Double precision shift left
364
template <class T>
365
constexpr T _shld(T dst, T src, T cnt) noexcept;
366

367
// Double precision shift right
368
template <class T>
369
constexpr T _shrd(T dst, T src, T cnt) noexcept;
370

371
// Add carry
372
template <class... T>
373
using _supports_adc = decltype(__builtin_ia32_addcarryx_u64(T()...));
374
template <class C, class T, class = _supports_adc<C, T, T, std::nullptr_t>>
375
constexpr C _addcarry(C carry, T src0, T src1, T* dst) noexcept;
376
template <class C, class T, class... X>
377
constexpr C _addcarry(C carry, T src0, T src1, T* dst, X...) noexcept;
378

379
// Sub borrow
380
template <class... T>
381
using _supports_sbb = decltype(__builtin_ia32_sbb_u64(T()...));
382
template <class... T>
383
using _supports_sbb_alt = decltype(__builtin_ia32_subborrow_u64(T()...));
384
template <class B, class T, class = _supports_sbb<B, T, T, std::nullptr_t>>
385
constexpr B _subborrow(B borrow, T src0, T src1, T* dst) noexcept;
386
template <class B, class T, class = _supports_sbb_alt<B, T, T, std::nullptr_t>>
387
constexpr B _subborrow(const B& borrow, T src0, T src1, T* dst) noexcept;
388
template <class B, class T, class... X>
389
constexpr B _subborrow(B borrow, T src0, T src1, T* dst, X...) noexcept;
390

391
// Multiword multiply
392
template <class T, class T128 = decltype(__uint128_t(T()))>
393
constexpr T _mulx(T src0, T src1, T* hi) noexcept;
394
template <class T, class... X>
395
constexpr T _mulx(T src0, T src1, T* hi, X...) noexcept;
396
/* ************************************************************************** */
397

398
/*
399
Logical shift right
400
*/
401
template <std::integral T, typename size_type = size_t>
402
constexpr T lsr(const T& val, const size_type shift) {
85,662,609✔
403
  return static_cast<T>(static_cast<std::make_unsigned_t<T>>(val) >> shift);
85,662,609✔
404
}
85,662,609✔
405

406
enum class _mask_len {
407
  unknown,
408
  in_range
409
};
410

411
template <std::integral T, _mask_len len_in_range = _mask_len::in_range, typename size_type = size_t>
412
constexpr T _mask(const size_type len) {
21,404,909✔
413
  constexpr std::make_unsigned_t<T> one = std::make_unsigned_t<T>(1);
21,404,909✔
414
  if constexpr (len_in_range != _mask_len::unknown) {
21,404,909✔
415
    return static_cast<T>((one << len) - one);
11,067✔
416
  } else {
21,393,842✔
417
    // The digits_mask is solely here to prevent Undefined Sanitizer
418
    // complaining about shift of len >= digits
419
    // Note: on -O1 the (len & digits_mask) is optimized to simply (len)
420
    constexpr std::make_unsigned_t<T> digits_mask = bitsof<T>() - one;
21,393,842✔
421
    return static_cast<T>((one << (len & digits_mask)) * (len < bitsof<T>()) - one);
21,393,842✔
422
  }
21,393,842✔
423
}
21,404,909✔
424

425
// ------------- IMPLEMENTATION DETAILS: UTILITIES: ASSERTIONS -------------- //
426
// If the range allows multipass iteration, checks if last - first >= 0
427
template <class Iterator>
428
constexpr bool _assert_range_viability(Iterator first, Iterator last) {
473,587✔
429
  using traits_t = std::iterator_traits<Iterator>;
473,587✔
430
  using category_t = typename traits_t::iterator_category;
473,587✔
431
  using multi_t = std::forward_iterator_tag;
473,587✔
432
  constexpr bool is_multipass = std::is_base_of<multi_t, category_t>::value;
473,587✔
433
  const bool is_viable = !is_multipass || std::distance(first, last) >= 0;
473,587!
434
  assert(is_viable);
473,587!
435
  return is_viable;
473,587✔
436
}
473,587✔
437
// -------------------------------------------------------------------------- //
438

439
// --------- IMPLEMENTATION DETAILS: INSTRUCTIONS: POPULATION COUNT --------- //
440
// Counts the number of bits set to 1 with compiler intrinsics
441
template <class T, class>
442
constexpr T _popcnt(T src) noexcept {
52,544✔
443
  static_assert(binary_digits<T>::value, "");
52,544✔
444
  constexpr T digits = binary_digits<T>::value;
52,544✔
445
  if (digits <= std::numeric_limits<unsigned int>::digits) {
52,544✔
446
    src = __builtin_popcount(static_cast<std::make_unsigned_t<T>>(src));
24,128✔
447
  } else if (digits <= std::numeric_limits<unsigned long int>::digits) {
28,416✔
448
    src = __builtin_popcountl(static_cast<std::make_unsigned_t<T>>(src));
28,416✔
449
  } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
28,416✔
NEW
450
    src = __builtin_popcountll(static_cast<std::make_unsigned_t<T>>(src));
×
NEW
451
  } else {
×
NEW
452
    src = _popcnt(src, std::ignore);
×
NEW
453
  }
×
454
  return src;
52,544✔
455
}
52,544✔
456

457
// Counts the number of bits set to 1 without compiler intrinsics
458
template <class T, class... X>
NEW
459
constexpr T _popcnt(T src, X...) noexcept {
×
NEW
460
  static_assert(binary_digits<T>::value, "");
×
NEW
461
  T dst = T();
×
NEW
462
  for (dst = T(); src; src = lsr(src, 1)) {
×
NEW
463
    dst += src & 1;
×
NEW
464
  }
×
NEW
465
  return dst;
×
UNCOV
466
}
×
467
// -------------------------------------------------------------------------- //
468

469
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: LEADING ZEROS COUNT -------- //
470
// Counts the number of leading zeros with compiler intrinsics
471
template <class T, class>
472
constexpr T _lzcnt(T src) noexcept {
473
  static_assert(binary_digits<T>::value, "");
474
  constexpr T digits = binary_digits<T>::value;
475
  T dst = T();
476
  if (digits < std::numeric_limits<unsigned int>::digits) {
477
    dst = src ? __builtin_clz(src) - (std::numeric_limits<unsigned int>::digits - digits)
478
              : digits;
479
  } else if (digits == std::numeric_limits<unsigned int>::digits) {
480
    dst = src ? __builtin_clz(src) : digits;
481
  } else if (digits < std::numeric_limits<unsigned long int>::digits) {
482
    dst = src ? __builtin_clzl(src) - (std::numeric_limits<unsigned long int>::digits - digits)
483
              : digits;
484
  } else if (digits == std::numeric_limits<unsigned long int>::digits) {
485
    dst = src ? __builtin_clzl(src) : digits;
486
  } else if (digits < std::numeric_limits<unsigned long long int>::digits) {
487
    dst = src ? __builtin_clzll(src) - (std::numeric_limits<unsigned long long int>::digits - digits)
488
              : digits;
489
  } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
490
    dst = src ? __builtin_clzll(src) : digits;
491
  } else {
492
    dst = _lzcnt(src, std::ignore);
493
  }
494
  return dst;
495
}
496

497
// Counts the number of leading zeros without compiler intrinsics
498
template <class T, class... X>
499
constexpr T _lzcnt(T src, X...) noexcept {
500
  static_assert(binary_digits<T>::value, "");
501
  constexpr T digits = binary_digits<T>::value;
502
  T dst = src != T();
503
  while ((src = lsr(src, 1))) {
504
    ++dst;
505
  }
506
  return digits - dst;
507
}
508
// -------------------------------------------------------------------------- //
509

510
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: TRAILING ZEROS COUNT ------- //
511
// Counts the number of trailing zeros with compiler intrinsics
512
template <class T, class>
513
constexpr T _tzcnt(T src) noexcept {
33,230✔
514
  static_assert(binary_digits<T>::value, "");
33,230✔
515
  constexpr T digits = binary_digits<T>::value;
33,230✔
516
  T dst = T();
33,230✔
517
  if (digits <= std::numeric_limits<unsigned int>::digits) {
33,230✔
518
    dst = src ? __builtin_ctz(src) : digits;
16,206!
519
  } else if (digits <= std::numeric_limits<unsigned long int>::digits) {
17,024✔
520
    dst = src ? __builtin_ctzl(src) : digits;
17,024!
521
  } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
17,024✔
NEW
522
    dst = src ? __builtin_ctzll(src) : digits;
×
NEW
523
  } else {
×
NEW
524
    dst = _tzcnt(src, std::ignore);
×
NEW
525
  }
×
526
  return dst;
33,230✔
527
}
33,230✔
528

529
// Counts the number of trailing zeros without compiler intrinsics
530
template <class T, class... X>
NEW
531
constexpr T _tzcnt(T src, X...) noexcept {
×
NEW
532
  static_assert(binary_digits<T>::value, "");
×
NEW
533
  constexpr T digits = binary_digits<T>::value;
×
NEW
534
  T dst = digits;
×
NEW
535
  if (src) {
×
NEW
536
    src = lsr((src ^ (src - 1)), 1);
×
NEW
537
    for (dst = T(); src; dst++) {
×
NEW
538
      src = lsr(src, 1);
×
539
    }
×
NEW
540
  }
×
NEW
541
  return dst;
×
UNCOV
542
}
×
543
// -------------------------------------------------------------------------- //
544

545
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT FIELD EXTRACTION ------- //
546
// Extacts to lsbs a field of contiguous bits with compiler intrinsics
547
template <class T, class>
548
constexpr T _bextr(T src, T start, T len) noexcept {
549
  static_assert(binary_digits<T>::value, "");
550
  constexpr T digits = binary_digits<T>::value;
551
  T dst = T();
552
  if (digits <= std::numeric_limits<unsigned int>::digits) {
553
    dst = __builtin_ia32_bextr_u32(src, start, len);
554
  } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
555
    dst = __builtin_ia32_bextr_u64(src, start, len);
556
  } else {
557
    dst = _bextr(src, start, len, std::ignore);
558
  }
559
  return dst;
560
}
561

562
// Extacts to lsbs a field of contiguous bits without compiler intrinsics
563
template <class T, class... X>
564
constexpr T _bextr(T src, T start, T len, X...) noexcept {
7,616✔
565
  static_assert(binary_digits<T>::value, "");
7,616✔
566
  constexpr T digits = binary_digits<T>::value;
7,616✔
567
  constexpr T one = 1;
7,616✔
568
  const T msk = (one << len) * (len < digits) - one;
7,616✔
569
  return (lsr(src, start)) & msk * (start < digits);
7,616✔
570
}
7,616✔
571
// -------------------------------------------------------------------------- //
572

573
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: PARALLEL BIT DEPOSIT ------- //
574
// Deposits bits according to a mask with compiler instrinsics
575
template <class T, class>
576
constexpr T _pdep(T src, T msk) noexcept {
577
  static_assert(binary_digits<T>::value, "");
578
  constexpr T digits = binary_digits<T>::value;
579
  T dst = T();
580
  if (digits <= std::numeric_limits<unsigned int>::digits) {
581
    dst = _pdep_u32(src, msk);
582
  } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
583
    dst = _pdep_u64(src, msk);
584
  } else {
585
    dst = _pdep(src, msk, std::ignore);
586
  }
587
  return dst;
588
}
589

590
// Deposits bits according to a mask without compiler instrinsics
591
template <class T, class... X>
592
constexpr T _pdep(T src, T msk, X...) noexcept {
593
  static_assert(binary_digits<T>::value, "");
594
  constexpr T digits = binary_digits<T>::value;
595
  T dst = T();
596
  T cnt = T();
597
  while (msk) {
598
    dst = lsr(dst, 1);
599
    if (msk & 1) {
600
      dst |= src << (digits - 1);
601
      src = lsr(src, 1);
602
    }
603
    msk = lsr(msk, 1);
604
    ++cnt;
605
  }
606
  dst = lsr(dst, (digits - cnt) * (cnt > 0));
607
  return dst;
608
}
609
// -------------------------------------------------------------------------- //
610

611
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: PARALLEL BIT EXTRACT ------- //
612
// Extracts bits according to a mask with compiler instrinsics
613
template <class T, class>
614
constexpr T _pext(T src, T msk) noexcept {
615
  static_assert(binary_digits<T>::value, "");
616
  constexpr T digits = binary_digits<T>::value;
617
  T dst = T();
618
  if (digits <= std::numeric_limits<unsigned int>::digits) {
619
    dst = _pext_u32(src, msk);
620
  } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
621
    dst = _pext_u64(src, msk);
622
  } else {
623
    dst = _pext(src, msk, std::ignore);
624
  }
625
  return dst;
626
}
627

628
// Extracts bits according to a mask without compiler instrinsics
629
template <class T, class... X>
630
constexpr T _pext(T src, T msk, X...) noexcept {
631
  static_assert(binary_digits<T>::value, "");
632
  constexpr T digits = binary_digits<T>::value;
633
  T dst = T();
634
  T cnt = T();
635
  while (msk) {
636
    if (msk & 1) {
637
      dst = lsr(dst, 1);
638
      dst |= src << (digits - 1);
639
      ++cnt;
640
    }
641
    src = lsr(src, 1);
642
    msk = lsr(msk, 1);
643
  }
644
  dst = lsr(dst, (digits - cnt) * (cnt > 0));
645
  return dst;
646
}
647
// -------------------------------------------------------------------------- //
648

649
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: BYTE SWAP ------------- //
650
// Reverses the order of the underlying bytes with compiler intrinsics
651
template <class T, class T128>
652
constexpr T _byteswap(T src) noexcept {
653
  static_assert(binary_digits<T>::value, "");
654
  using byte_t = unsigned char;
655
  constexpr T digits = sizeof(T) * std::numeric_limits<byte_t>::digits;
656
  std::uint64_t tmp64 = 0;
657
  std::uint64_t* ptr64 = nullptr;
658
  if (std::is_same<T, T128>::value) {
659
    ptr64 = reinterpret_cast<std::uint64_t*>(&src);
660
    tmp64 = __builtin_bswap64(*ptr64);
661
    *ptr64 = __builtin_bswap64(*(ptr64 + 1));
662
    *(ptr64 + 1) = tmp64;
663
  } else if (digits == std::numeric_limits<std::uint16_t>::digits) {
664
    src = __builtin_bswap16(src);
665
  } else if (digits == std::numeric_limits<std::uint32_t>::digits) {
666
    src = __builtin_bswap32(src);
667
  } else if (digits == std::numeric_limits<std::uint64_t>::digits) {
668
    src = __builtin_bswap64(src);
669
  } else if (digits > std::numeric_limits<byte_t>::digits) {
670
    src = _byteswap(src, std::ignore);
671
  }
672
  return src;
673
}
674

675
// Reverses the order of the underlying bytes without compiler intrinsics
676
template <class T, class... X>
677
constexpr T _byteswap(T src, X...) noexcept {
678
  static_assert(binary_digits<T>::value, "");
679
  using byte_t = unsigned char;
680
  constexpr T half = sizeof(T) / 2;
681
  constexpr T end = sizeof(T) - 1;
682
  unsigned char* bytes = reinterpret_cast<byte_t*>(&src);
683
  unsigned char byte = 0;
684
  for (T i = T(); i < half; ++i) {
685
    byte = bytes[i];
686
    bytes[i] = bytes[end - i];
687
    bytes[end - i] = byte;
688
  }
689
  return src;
690
}
691
// -------------------------------------------------------------------------- //
692

693
// ------------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT SWAP ------------- //
694
// Reverses the order of the bits with or without of compiler intrinsics
695
template <class T>
696
constexpr T _bitswap(T src) noexcept {
10,521,728✔
697
  static_assert(binary_digits<T>::value, "");
10,521,728✔
698
  using byte_t = unsigned char;
10,521,728✔
699
  constexpr auto ignore = nullptr;
10,521,728✔
700
  constexpr T digits = binary_digits<T>::value;
10,521,728✔
701
  constexpr unsigned long long int first = 0x80200802ULL;
10,521,728✔
702
  constexpr unsigned long long int second = 0x0884422110ULL;
10,521,728✔
703
  constexpr unsigned long long int third = 0x0101010101ULL;
10,521,728✔
704
  constexpr unsigned long long int fourth = 32;
10,521,728✔
705
  constexpr bool is_size1 = sizeof(T) == 1;
10,521,728✔
706
  constexpr bool is_byte = digits == std::numeric_limits<byte_t>::digits;
10,521,728✔
707
  constexpr bool is_octet = std::numeric_limits<byte_t>::digits == 8;
10,521,728✔
708
  constexpr bool is_pow2 = _popcnt(digits, ignore) == 1;
10,521,728✔
709
  T dst = src;
10,521,728✔
710
  T i = digits - 1;
10,521,728✔
711
  if (is_size1 && is_byte && is_octet) {
10,521,728✔
712
    dst = static_cast<T>(lsr(((static_cast<std::make_unsigned_t<T>>(src) * first) & second) * third, fourth));
2,622,432✔
713
  } else if (is_pow2) {
7,899,296✔
714
    dst = _bitswap<T, digits>(src);
7,899,296✔
715
  } else {
7,899,296✔
NEW
716
    for (src = lsr(src, 1); src; src = lsr(src, 1)) {
×
NEW
717
      dst <<= 1;
×
NEW
718
      dst |= src & 1;
×
NEW
719
      i--;
×
UNCOV
720
    }
×
NEW
721
    dst <<= i;
×
NEW
722
  }
×
723
  return dst;
10,521,728✔
724
}
10,521,728✔
725

726
// Reverses the order of the bits: recursive metafunction
727
template <class T, std::size_t N>
728
constexpr T _bitswap(T src) noexcept {
39,511,840✔
729
  static_assert(binary_digits<T>::value, "");
39,511,840✔
730
  constexpr T cnt = N >> 1;
39,511,840✔
731
  constexpr T msk = _bitswap<T, cnt>();
39,511,840✔
732
  src = ((lsr(src, cnt)) & msk) | ((src << cnt) & ~msk);
39,511,840✔
733
  return cnt > 1 ? _bitswap<T, cnt>(src) : src;
39,511,840✔
734
}
39,511,840✔
735

736
// Reverses the order of the bits: mask for the recursive metafunction
737
template <class T, std::size_t N>
NEW
738
constexpr T _bitswap() noexcept {
×
NEW
739
  static_assert(binary_digits<T>::value, "");
×
NEW
740
  constexpr T digits = binary_digits<T>::value;
×
NEW
741
  T cnt = digits;
×
NEW
742
  T msk = ~T();
×
NEW
743
  while (cnt != N) {
×
NEW
744
    cnt = lsr(cnt, 1);
×
NEW
745
    msk ^= (msk << cnt);
×
NEW
746
  }
×
NEW
747
  return msk;
×
UNCOV
748
}
×
749
// -------------------------------------------------------------------------- //
750

751
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT BLEND ------------- //
752
// Replaces bits of src0 by the ones of src1 where the mask is true
753
template <class T>
754
constexpr T _bitblend(T src0, T src1, T msk) noexcept {
5,016✔
755
  static_assert(binary_digits<T>::value, "");
5,016✔
756
  return src0 ^ ((src0 ^ src1) & msk);
5,016✔
757
}
5,016✔
758

759
// Replaces len bits of src0 by the ones of src1 starting at start
760
template <class T>
761
constexpr T _bitblend(T src0, T src1, T start, T len) noexcept {
796,999✔
762
  static_assert(binary_digits<T>::value, "");
796,999✔
763
  constexpr T digits = binary_digits<T>::value;
796,999✔
764
  const T msk = _mask<T, _mask_len::unknown>(len) << start;
796,999✔
765
  return src0 ^ ((src0 ^ src1) & msk * (start < digits));
796,999✔
766
}
796,999✔
767
// -------------------------------------------------------------------------- //
768

769
// ---------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT EXCHANGE ------------ //
770
// Exchanges/swaps bits of src0 by the ones of src1 where the mask is true
771
template <class T>
772
constexpr void _bitexch(T& src0, T& src1, T msk) noexcept {
773
  src0 = src0 ^ static_cast<T>(src1 & msk);
774
  src1 = src1 ^ static_cast<T>(src0 & msk);
775
  src0 = src0 ^ static_cast<T>(src1 & msk);
776
  return;
777
}
778

779
// Replaces len bits of src0 by the ones of src1 starting at start
780
template <class T, class S>
781
constexpr void _bitexch(T& src0, T& src1, S start, S len) noexcept {
2,371✔
782
  static_assert(binary_digits<T>::value, "");
2,371✔
783
  constexpr auto digits = binary_digits<T>::value;
2,371✔
784
  const T msk = (len < digits)
2,371!
785
                    ? _mask<T, _mask_len::unknown>(len) << start
2,371✔
786
                    : -1;  // TODO: What if start > 0 here?
2,371✔
787
  src0 = src0 ^ static_cast<T>(src1 & msk);
2,371✔
788
  src1 = src1 ^ static_cast<T>(src0 & msk);
2,371✔
789
  src0 = src0 ^ static_cast<T>(src1 & msk);
2,371✔
790
  return;
2,371✔
791
}
2,371✔
792

793
// Replaces len bits of src0 by the ones of src1 starting at start0
794
// in src0 and start1 in src1.
795
// len <= digits-max(start0, start1)
796
// clang-format off
797
template <class T, class S>
798
constexpr void _bitexch(T& src0, T& src1, S start0, S start1, S len) noexcept
799
{
20,594,586✔
800
    static_assert(binary_digits<T>::value, "");
20,594,586✔
801
    constexpr auto digits = binary_digits<T>::value;
20,594,586✔
802
    const T msk = _mask<T, _mask_len::unknown>(len);
20,594,586✔
803
    if (start0 >= start1) {
20,594,586✔
804
        src0 = src0 ^ (
10,298,778✔
805
                static_cast<T>(src1 << (start0 - start1))
10,298,778✔
806
                &
10,298,778✔
807
                static_cast<T>(msk << start0)
10,298,778✔
808
        );
10,298,778✔
809
        src1 = src1 ^ (
10,298,778✔
810
                static_cast<T>(lsr(src0, (start0 - start1)))
10,298,778✔
811
                &
10,298,778✔
812
                static_cast<T>(msk << start1)
10,298,778✔
813
        );
10,298,778✔
814
        src0 = src0 ^ (
10,298,778✔
815
                static_cast<T>(src1 << (start0 - start1))
10,298,778✔
816
                &
10,298,778✔
817
                static_cast<T>(msk << start0)
10,298,778✔
818
        );
10,298,778✔
819
    } else {
10,298,778✔
820
        src0 = src0 ^ (
10,295,808✔
821
                static_cast<T>(lsr(src1, (start1 - start0)))
10,295,808✔
822
                &
10,295,808✔
823
                static_cast<T>(msk << start0)
10,295,808✔
824
        );
10,295,808✔
825
        src1 = src1 ^ (
10,295,808✔
826
                static_cast<T>(src0 << (start1 - start0))
10,295,808✔
827
                &
10,295,808✔
828
                static_cast<T>(msk << start1)
10,295,808✔
829
        );
10,295,808✔
830
        src0 = src0 ^ (
10,295,808✔
831
                static_cast<T>(lsr(src1, (start1 - start0)))
10,295,808✔
832
                &
10,295,808✔
833
                static_cast<T>(msk << start0)
10,295,808✔
834
        );
10,295,808✔
835
    }
10,295,808✔
836
    return;
20,594,586✔
837
}
20,594,586✔
838
// clang-format on
839
// -------------------------------------------------------------------------- //
840

841
// ----------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT COMPARE ------------ //
842
// Compares a subsequence of bits within src0 and src1 and returns 0 if equal
843
template <class T>
844
constexpr T _bitcmp(T src0, T src1, T start0, T start1, T len) noexcept {
845
  static_assert(binary_digits<T>::value, "");
846
  return _bextr(src0, start0, len) == _bextr(src1, start1, len);
847
}
848
// -------------------------------------------------------------------------- //
849

850
// --- IMPLEMENTATION DETAILS: INSTRUCTIONS: DOUBLE PRECISION SHIFT LEFT ---- //
851
// Left shifts dst by cnt bits, filling the lsbs of dst by the msbs of src
852
template <class T>
853
constexpr T _shld(T dst, T src, T cnt) noexcept {
854
  static_assert(binary_digits<T>::value, "");
855
  constexpr T digits = binary_digits<T>::value;
856
  if (cnt < digits) {
857
    dst = (dst << cnt) | (lsr(src, (digits - cnt)));
858
  } else {
859
    dst = (src << (cnt - digits)) * (cnt < digits + digits);
860
  }
861
  return dst;
862
}
863
// -------------------------------------------------------------------------- //
864

865
// --- IMPLEMENTATION DETAILS: INSTRUCTIONS: DOUBLE PRECISION SHIFT RIGHT --- //
866
// Right shifts dst by cnt bits, filling the msbs of dst by the lsbs of src
867
template <class T>
868
constexpr T _shrd(T dst, T src, T cnt) noexcept {
11,838,264✔
869
  static_assert(binary_digits<T>::value, "");
11,838,264✔
870
  constexpr T digits = binary_digits<T>::value;
11,838,264✔
871
  if (cnt < digits) {
11,838,264!
872
    dst = (lsr(dst, cnt)) | (src << (digits - cnt));
11,838,264✔
873
  } else {
11,838,264✔
NEW
874
    dst = (lsr(src, (cnt - digits))) * (cnt < digits + digits);
×
NEW
875
  }
×
876
  return dst;
11,838,264✔
877
}
11,838,264✔
878
// -------------------------------------------------------------------------- //
879

880
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: ADD CARRY ------------- //
881
// Adds src0 and src1 and returns the new carry bit with intrinsics
882
template <class C, class T, class>
883
constexpr C _addcarry(C carry, T src0, T src1, T* dst) noexcept {
884
  static_assert(binary_digits<T>::value, "");
885
  using wider_t = typename _wider_type<T>::type;
886
  constexpr T digits = binary_digits<T>::value;
887
  wider_t tmp = 0;
888
  unsigned int udst = 0;
889
  unsigned long long int ulldst = 0;
890
  if (digits == std::numeric_limits<unsigned int>::digits) {
891
    carry = __builtin_ia32_addcarryx_u32(carry, src0, src1, &udst);
892
    *dst = udst;
893
  } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
894
    carry = __builtin_ia32_addcarryx_u64(carry, src0, src1, &ulldst);
895
    *dst = ulldst;
896
  } else if (digits < binary_digits<wider_t>::value) {
897
    tmp = static_cast<wider_t>(src0) + static_cast<wider_t>(src1);
898
    tmp += static_cast<wider_t>(static_cast<bool>(carry));
899
    *dst = tmp;
900
    carry = static_cast<bool>(tmp >> digits);
901
  } else {
902
    carry = _addcarry(carry, src0, src1, dst, std::ignore);
903
  }
904
  return carry;
905
}
906

907
// Adds src0 and src1 and returns the new carry bit without intrinsics
908
template <class C, class T, class... X>
909
constexpr C _addcarry(C carry, T src0, T src1, T* dst, X...) noexcept {
910
  static_assert(binary_digits<T>::value, "");
911
  *dst = src0 + src1 + static_cast<T>(static_cast<bool>(carry));
912
  return carry ? *dst <= src0 || *dst <= src1 : *dst < src0 || *dst < src1;
913
}
914
// -------------------------------------------------------------------------- //
915

916
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: SUB BORROW ------------ //
917
// Subtracts src1 to src0 and returns the new borrow bit with intrinsics
918
template <class B, class T, class>
919
constexpr B _subborrow(B borrow, T src0, T src1, T* dst) noexcept {
920
  static_assert(binary_digits<T>::value, "");
921
  using wider_t = typename _wider_type<T>::type;
922
  constexpr T digits = binary_digits<T>::value;
923
  wider_t tmp = 0;
924
  unsigned int udst = 0;
925
  unsigned long long int ulldst = 0;
926
  if (digits == std::numeric_limits<unsigned int>::digits) {
927
    borrow = __builtin_ia32_sbb_u32(borrow, src0, src1, &udst);
928
    *dst = udst;
929
  } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
930
    borrow = __builtin_ia32_sbb_u64(borrow, src0, src1, &ulldst);
931
    *dst = ulldst;
932
  } else if (digits < binary_digits<wider_t>::value) {
933
    tmp = static_cast<wider_t>(src1);
934
    tmp += static_cast<wider_t>(static_cast<bool>(borrow));
935
    borrow = tmp > static_cast<wider_t>(src0);
936
    *dst = static_cast<wider_t>(src0) - tmp;
937
  } else {
938
    borrow = _subborrow(borrow, src0, src1, dst, std::ignore);
939
  }
940
  return borrow;
941
}
942

943
// Subtracts src1 to src0 and returns the new borrow bit with other intrinsics
944
template <class B, class T, class>
945
constexpr B _subborrow(const B& borrow, T src0, T src1, T* dst) noexcept {
946
  static_assert(binary_digits<T>::value, "");
947
  using wider_t = typename _wider_type<T>::type;
948
  constexpr T digits = binary_digits<T>::value;
949
  wider_t tmp = 0;
950
  unsigned int udst = 0;
951
  unsigned long long int ulldst = 0;
952
  B flag = borrow;
953
  if (digits == std::numeric_limits<unsigned int>::digits) {
954
    flag = __builtin_ia32_subborrow_u32(borrow, src0, src1, &udst);
955
    *dst = udst;
956
  } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
957
    flag = __builtin_ia32_subborrow_u64(borrow, src0, src1, &ulldst);
958
    *dst = ulldst;
959
  } else if (digits < binary_digits<wider_t>::value) {
960
    tmp = static_cast<wider_t>(src1);
961
    tmp += static_cast<wider_t>(static_cast<bool>(borrow));
962
    flag = tmp > static_cast<wider_t>(src0);
963
    *dst = static_cast<wider_t>(src0) - tmp;
964
  } else {
965
    flag = _subborrow(borrow, src0, src1, dst, std::ignore);
966
  }
967
  return flag;
968
}
969

970
// Subtracts src1 to src0 and returns the new borrow bit without intrinsics
971
template <class B, class T, class... X>
972
constexpr B _subborrow(B borrow, T src0, T src1, T* dst, X...) noexcept {
973
  static_assert(binary_digits<T>::value, "");
974
  *dst = src0 - (src1 + static_cast<T>(static_cast<bool>(borrow)));
975
  return borrow ? src1 >= src0 : src1 > src0;
976
}
977
// -------------------------------------------------------------------------- //
978

979
// -------- IMPLEMENTATION DETAILS: INSTRUCTIONS: MULTIWORD MULTIPLY -------- //
980
// Multiplies src0 and src1 and gets the full result with compiler intrinsics
981
template <class T, class T128>
982
constexpr T _mulx(T src0, T src1, T* hi) noexcept
983
{
984
    static_assert(binary_digits<T>::value, "");
985
    using wider_t = typename _wider_type<T>::type;
986
    constexpr T digits = binary_digits<T>::value;
987
    wider_t tmp = 0;
988
    T128 tmp128 = 0;
989
    T lo = 0;
990
    if (digits == std::numeric_limits<std::uint64_t>::digits) {
991
        tmp128 = static_cast<T128>(src0) * static_cast<T128>(src1);
992
        *hi = tmp128 >> digits;
993
        lo = tmp128;
994
    } else if (digits + digits == binary_digits<wider_t>::value) {
995
        tmp = static_cast<wider_t>(src0) * static_cast<wider_t>(src1);
996
        *hi = tmp >> digits;
997
        lo = tmp;
998
    } else {
999
        lo = _mulx(src0, src1, hi, std::ignore);
1000
    }
1001
    return lo;
1002
}
1003

1004
// Multiplies src0 and src1 and gets the full result without compiler intrinsics
1005
template <class T, class... X>
1006
constexpr T _mulx(T src0, T src1, T* hi, X...) noexcept
1007
{
1008
    static_assert(binary_digits<T>::value, "");
1009
    constexpr T digits = binary_digits<T>::value;
1010
    constexpr T offset = digits / 2;
1011
    constexpr T ones = ~static_cast<T>(0);
1012
    const T lsbs0 = src0 & static_cast<T>(lsr(ones, (digits - offset)));
1013
    const T msbs0 = lsr(src0, offset);
1014
    const T lsbs1 = src1 & static_cast<T>(lsr(ones, (digits - offset)));
1015
    const T msbs1 = lsr(src1, offset);
1016
    const T llsbs = lsbs0 * lsbs1;
1017
    const T mlsbs = msbs0 * lsbs1;
1018
    const T lmsbs = lsbs0 * msbs1;
1019
    const T mi = mlsbs + lmsbs;
1020
    const T lo = llsbs + static_cast<T>(mi << offset);
1021
    const T lcarry = lo < llsbs || lo < static_cast<T>(mi << offset);
1022
    const T mcarry = static_cast<T>(mi < mlsbs || mi < lmsbs) << offset;
1023
    *hi = static_cast<T>(lsr(mi, offset)) + msbs0 * msbs1 + mcarry + lcarry;
1024
    return lo;
1025
}
1026
// -------------------------------------------------------------------------- //
1027

1028

1029

1030
// ========================================================================== //
1031
} // namespace bit
1032
#endif // _BIT_DETAILS_HPP_INCLUDED
1033
// ========================================================================== //
1034

STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc