• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PeterCDMcLean / BitLib / 15127764614

20 May 2025 02:53AM UTC coverage: 49.646% (-0.2%) from 49.868%
15127764614

Pull #11

github

web-flow
Merge 8e8f6383f into 9126838eb
Pull Request #11: Bitwise operators in bit_array_base

5356 of 11060 branches covered (48.43%)

Branch coverage included in aggregate %.

190 of 192 new or added lines in 5 files covered. (98.96%)

6 existing lines in 1 file now uncovered.

5452 of 10710 relevant lines covered (50.91%)

3199890.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.25
/include/bitlib/bit-iterator/bit_details.hpp
1
// ============================== BIT DETAILS =============================== //
2
// Project:         The C++ Bit Library
3
// Name:            bit_details.hpp
4
// Description:     Provides common implementation details and helper classes
5
// Creator:         Vincent Reverdy
6
// Contributor(s):  Vincent Reverdy [2015-2017]
7
//                  Bryce Kille [2019]
8
// License:         BSD 3-Clause License
9
// ========================================================================== //
10
#ifndef _BIT_DETAILS_HPP_INCLUDED
11
#define _BIT_DETAILS_HPP_INCLUDED
12
// ========================================================================== //
13

14

15

16
// ================================ PREAMBLE ================================ //
17
// C++ standard library
18
#include <immintrin.h>
19

20
#include <algorithm>
21
#include <cassert>
22
#include <concepts>
23
#include <cstddef>
24
#include <cstdint>
25
#include <iterator>
26
#include <limits>
27
#include <ranges>
28
#include <stdexcept>
29
#include <tuple>
30
#include <type_traits>
31
#include <utility>
32

33
#include "bitlib/bit-containers/bit_bitsof.hpp"
34
#include "bitlib/bit_concepts.hpp"
35

36
// Project sources
37
// Third-party libraries
38
// Miscellaneous
39
namespace bit {
40
class bit_value;
41
template <class WordType>
42
class bit_reference;
43
template <class Iterator> class bit_iterator;
44
template <class WordType>
45
using bit_pointer = bit_iterator<WordType*>;
46

47
// ========================================================================== //
48

49
/* ***************************** BINARY DIGITS ****************************** */
50
// Binary digits structure definition
51
// Implementation template: only instantiates static_asserts for non-byte types.
52
template <typename T, bool = std::is_same<T, std::byte>::value>
53
struct binary_digits_impl : std::integral_constant<std::size_t, std::numeric_limits<T>::digits>
54
{
55
    static_assert(std::is_integral<T>::value, "Type must be integral");
56
    static_assert(std::is_unsigned<T>::value, "Type must be unsigned");
57
    static_assert(!std::is_same<T, bool>::value, "Type must not be bool");
58
    static_assert(!std::is_same<T, char>::value, "Type must not be char");
59
};
60

61
// Specialization for std::byte.
62
template <>
63
struct binary_digits_impl<std::byte, true> : std::integral_constant<std::size_t, std::numeric_limits<unsigned char>::digits> {};
64

65
// Public interface that removes cv-qualifiers.
66
template <typename UIntType>
67
struct binary_digits : binary_digits_impl<std::remove_cv_t<UIntType>> {};
68

69
// Binary digits value
70
template <class T>
71
constexpr std::size_t binary_digits_v = binary_digits<T>::value;
72
/* ************************************************************************** */
73

74
#if 0
75
template <typename T>
76
using smallest_integral = std::conditional_t<
77
    (sizeof(T) <= sizeof(std::uint8_t)),
78
    std::uint8_t,
79
    std::conditional_t<
80
        (sizeof(T) <= sizeof(std::uint16_t)),
81
        std::uint16_t,
82
        std::conditional_t<
83
            (sizeof(T) <= sizeof(std::uint32_t)),
84
            std::uint32_t,
85
            std::conditional_t<
86
                (sizeof(T) <= sizeof(std::uint64_t)),
87
                std::uint64_t,
88
                T>>>>;
89
#endif
90

91
/* *************** IMPLEMENTATION DETAILS: CV ITERATOR TRAITS *************** */
92
// Cv iterator traits structure definition
93
template <class Iterator>
94
struct _cv_iterator_traits
95
{
96
    // Assertions
97
    private:
98
    using _traits_t = std::iterator_traits<Iterator>;
99
    using _difference_t = typename _traits_t::difference_type;
100
    using _value_t = typename _traits_t::value_type;
101
    using _pointer_t = typename _traits_t::pointer;
102
    using _reference_t = typename _traits_t::reference;
103
    using _category_t =  typename _traits_t::iterator_category;
104
    using _no_pointer_t = typename std::remove_pointer<_pointer_t>::type;
105
    using _no_reference_t = typename std::remove_reference<_reference_t>::type;
106
    using _raw_value_t = typename std::remove_cv<_value_t>::type;
107
    using _raw_pointer_t = typename std::remove_cv<_no_pointer_t>::type;
108
    using _raw_reference_t = typename std::remove_cv<_no_reference_t>::type;
109
    using _cv_value_t = _no_reference_t;
110
    static_assert(std::is_same<_raw_pointer_t, _raw_value_t>::value, "");
111
    static_assert(std::is_same<_raw_reference_t, _raw_value_t>::value, "");
112

113
    // Types
114
    public:
115
    using difference_type = _difference_t;
116
    using value_type = _cv_value_t;
117
    using pointer = _pointer_t;
118
    using reference = _reference_t;
119
    using iterator_category = _category_t;
120
};
121
/* ************************************************************************** */
122

123

124

125
/* *********** IMPLEMENTATION DETAILS: NARROWEST AND WIDEST TYPES *********** */
126
// Narrowest type structure declaration
127
template <class... T>
128
struct _narrowest_type;
129

130
// Narrowest type structure specialization: selects the only passed type
131
template <class T>
132
struct _narrowest_type<T>
133
: std::common_type<T>
134
{
135
    static_assert(binary_digits<T>::value, "");
136
};
137

138
// Narrowest type structure specialization: selects the type with less bits
139
template <class T, class U>
140
struct _narrowest_type<T, U>
141
: _narrowest_type<
142
    typename std::conditional<
143
        (binary_digits<T>::value < binary_digits<U>::value),
144
        T,
145
        typename std::conditional<
146
            (binary_digits<T>::value > binary_digits<U>::value),
147
            U,
148
            typename std::common_type<T, U>::type
149
        >::type
150
    >::type
151
>
152
{
153
};
154

155
// Narrowest type structure specialization: recursively selects the right type
156
template <class T, class... U>
157
struct _narrowest_type<T, U...>
158
: _narrowest_type<T, typename _narrowest_type<U...>::type>
159
{
160
};
161

162
// Narrowest type alias
163
template <class... T>
164
using _narrowest_type_t = typename _narrowest_type<T...>::type;
165

166
// Widest type structure declaration
167
template <class... X>
168
struct _widest_type;
169

170
// Widest type structure specialization: selects the only passed type
171
template <class T>
172
struct _widest_type<T>
173
: std::common_type<T>
174
{
175
    static_assert(binary_digits<T>::value, "");
176
};
177

178
// Widest type structure specialization: selects the type with more bits
179
template <class T, class U>
180
struct _widest_type<T, U>
181
: _widest_type<
182
    typename std::conditional<
183
        (binary_digits<T>::value > binary_digits<U>::value),
184
        T,
185
        typename std::conditional<
186
            (binary_digits<T>::value < binary_digits<U>::value),
187
            U,
188
            typename std::common_type<T, U>::type
189
        >::type
190
    >::type
191
>
192
{
193
};
194

195
// Widest type structure specialization: recursively selects the right type
196
template <class T, class... X>
197
struct _widest_type<T, X...>
198
: _widest_type<T, typename _widest_type<X...>::type>
199
{
200
};
201

202
// Widest type alias
203
template <class... T>
204
using _widest_type_t = typename _widest_type<T...>::type;
205
/* ************************************************************************** */
206

207

208

209
/* ************ IMPLEMENTATION DETAILS: NARROWER AND WIDER TYPES ************ */
210
// Narrower type structure definition
211
template <class T, int I = 0>
212
struct _narrower_type
213
{
214
    using tuple = std::tuple<
215
        unsigned long long int,
216
        unsigned long int,
217
        unsigned int,
218
        unsigned short int,
219
        unsigned char
220
    >;
221
    using lhs_bits = binary_digits<T>;
222
    using rhs_bits = binary_digits<typename std::tuple_element<I, tuple>::type>;
223
    using type = typename std::conditional<
224
        (lhs_bits::value > rhs_bits::value),
225
        typename std::tuple_element<I, tuple>::type,
226
        typename std::conditional<
227
            (I + 1 < std::tuple_size<tuple>::value),
228
            typename _narrower_type<
229
                T,
230
                (I + 1 < std::tuple_size<tuple>::value ? I + 1 : -1)
231
            >::type,
232
            typename std::tuple_element<I, tuple>::type
233
        >::type
234
    >::type;
235
};
236

237
// Narrower type structure specialization: not found
238
template <class T>
239
struct _narrower_type<T, -1>
240
{
241
    using type = T;
242
};
243

244
// Narrower type alias
245
template <class T>
246
using _narrower_type_t = typename _narrower_type<T>::type;
247

248
// Wider type structure definition
249
template <class T, int I = 0>
250
struct _wider_type
251
{
252
    using tuple = std::tuple<
253
        unsigned char,
254
        unsigned short int,
255
        unsigned int,
256
        unsigned long int,
257
        unsigned long long int
258
    >;
259
    using lhs_bits = binary_digits<T>;
260
    using rhs_bits = binary_digits<typename std::tuple_element<I, tuple>::type>;
261
    using type = typename std::conditional<
262
        (lhs_bits::value < rhs_bits::value),
263
        typename std::tuple_element<I, tuple>::type,
264
        typename std::conditional<
265
            (I + 1 < std::tuple_size<tuple>::value),
266
            typename _narrower_type<
267
                T,
268
                (I + 1 < std::tuple_size<tuple>::value ? I + 1 : -1)
269
            >::type,
270
            typename std::tuple_element<I, tuple>::type
271
        >::type
272
    >::type;
273
};
274

275
// Wider type structure specialization: not found
276
template <class T>
277
struct _wider_type<T, -1>
278
{
279
    using type = T;
280
};
281

282
// Wider type alias
283
template <class T>
284
using _wider_type_t = typename _wider_type<T>::type;
285
/* ************************************************************************** */
286

287

288

289
/* ******************* IMPLEMENTATION DETAILS: UTILITIES ******************** */
290
// Assertions
291
template <class Iterator>
292
constexpr bool _assert_range_viability(Iterator first, Iterator last);
293
/* ************************************************************************** */
294

295

296

297
/* ****************** IMPLEMENTATION DETAILS: INSTRUCTIONS ****************** */
298
// Population count
299
template <class T, class = decltype(__builtin_popcountll(T()))>
300
constexpr T _popcnt(T src) noexcept;
301
template <class T, class... X>
302
constexpr T _popcnt(T src, X...) noexcept;
303

304
// Leading zeros count
305
template <class T, class = decltype(__builtin_clzll(T()))>
306
constexpr T _lzcnt(T src) noexcept;
307
template <class T, class... X>
308
constexpr T _lzcnt(T src, X...) noexcept;
309

310
// Trailing zeros count
311
template <class T, class = decltype(__builtin_ctzll(T()))>
312
constexpr T _tzcnt(T src) noexcept;
313
template <class T, class... X>
314
constexpr T _tzcnt(T src, X...) noexcept;
315

316
// Bit field extraction
317
template <class T, class = decltype(__builtin_ia32_bextr_u64(T(), T(), T()))>
318
constexpr T _bextr(T src, T start, T len) noexcept;
319
template <class T, class... X>
320
constexpr T _bextr(T src, T start, T len, X...) noexcept;
321

322
// Parallel bits deposit
323
template <class T, class = decltype(_pdep_u64(T()))>
324
constexpr T _pdep(T src, T msk) noexcept;
325
template <class T, class... X>
326
constexpr T _pdep(T src, T msk, X...) noexcept;
327

328
// Parallel bits extract
329
template <class T, class = decltype(_pext_u64(T()))>
330
constexpr T _pext(T src, T msk) noexcept;
331
template <class T, class... X>
332
constexpr T _pext(T src, T msk, X...) noexcept;
333

334
// Byte swap
335
template <class T, class T128 = decltype(__uint128_t(__builtin_bswap64(T())))>
336
constexpr T _byteswap(T src) noexcept;
337
template <class T, class... X>
338
constexpr T _byteswap(T src, X...) noexcept;
339

340
// Bit swap
341
template <class T>
342
constexpr T _bitswap(T src) noexcept;
343
template <class T, std::size_t N>
344
constexpr T _bitswap(T src) noexcept;
345
template <class T, std::size_t N>
346
constexpr T _bitswap() noexcept;
347

348
// Bit blend
349
template <class T>
350
constexpr T _bitblend(T src0, T src1, T msk) noexcept;
351
template <class T>
352
constexpr T _bitblend(T src0, T src1, T start, T len) noexcept;
353

354
// Bit exchange
355
template <class T>
356
constexpr void _bitexch(T& src0, T& src1, T msk) noexcept;
357
template <class T, class S>
358
constexpr void _bitexch(T& src0, T& src1, S start, S len) noexcept;
359
template <class T, class S>
360
constexpr void _bitexch(T& src0, T& src1, S start0, S start1, S len) noexcept;
361

362
// Bit compare
363
template <class T>
364
constexpr T _bitcmp(T src0, T src1, T start0, T start1, T len) noexcept;
365

366
// Double precision shift left
367
template <class T>
368
constexpr T _shld(T dst, T src, T cnt) noexcept;
369

370
// Double precision shift right
371
template <class T>
372
constexpr T _shrd(T dst, T src, T cnt) noexcept;
373

374
// Add carry
375
template <class... T>
376
using _supports_adc = decltype(__builtin_ia32_addcarryx_u64(T()...));
377
template <class C, class T, class = _supports_adc<C, T, T, std::nullptr_t>>
378
constexpr C _addcarry(C carry, T src0, T src1, T* dst) noexcept;
379
template <class C, class T, class... X>
380
constexpr C _addcarry(C carry, T src0, T src1, T* dst, X...) noexcept;
381

382
// Sub borrow
383
template <class... T>
384
using _supports_sbb = decltype(__builtin_ia32_sbb_u64(T()...));
385
template <class... T>
386
using _supports_sbb_alt = decltype(__builtin_ia32_subborrow_u64(T()...));
387
template <class B, class T, class = _supports_sbb<B, T, T, std::nullptr_t>>
388
constexpr B _subborrow(B borrow, T src0, T src1, T* dst) noexcept;
389
template <class B, class T, class = _supports_sbb_alt<B, T, T, std::nullptr_t>>
390
constexpr B _subborrow(const B& borrow, T src0, T src1, T* dst) noexcept;
391
template <class B, class T, class... X>
392
constexpr B _subborrow(B borrow, T src0, T src1, T* dst, X...) noexcept;
393

394
// Multiword multiply
395
template <class T, class T128 = decltype(__uint128_t(T()))>
396
constexpr T _mulx(T src0, T src1, T* hi) noexcept;
397
template <class T, class... X>
398
constexpr T _mulx(T src0, T src1, T* hi, X...) noexcept;
399
/* ************************************************************************** */
400

401

402

403
// ------------- IMPLEMENTATION DETAILS: UTILITIES: ASSERTIONS -------------- //
404
// If the range allows multipass iteration, checks if last - first >= 0
405
template <class Iterator>
406
constexpr bool _assert_range_viability(Iterator first, Iterator last)
407
{
233,738✔
408
    using traits_t = std::iterator_traits<Iterator>;
233,738✔
409
    using category_t =  typename traits_t::iterator_category;
233,738✔
410
    using multi_t = std::forward_iterator_tag;
233,738✔
411
    constexpr bool is_multipass = std::is_base_of<multi_t, category_t>::value;
233,738✔
412
    const bool is_viable = !is_multipass || std::distance(first, last) >= 0;
233,738!
413
    assert(is_viable);
233,738!
414
    return is_viable;
233,738✔
415
}
233,738✔
416
// -------------------------------------------------------------------------- //
417

418

419

420
// --------- IMPLEMENTATION DETAILS: INSTRUCTIONS: POPULATION COUNT --------- //
421
// Counts the number of bits set to 1 with compiler intrinsics
422
template <class T, class>
423
constexpr T _popcnt(T src) noexcept
424
{
26,368✔
425
    static_assert(binary_digits<T>::value, "");
26,368✔
426
    constexpr T digits = binary_digits<T>::value;
26,368✔
427
    if (digits <= std::numeric_limits<unsigned int>::digits) {
26,368✔
428
        src = __builtin_popcount(src);
12,160✔
429
    } else if (digits <= std::numeric_limits<unsigned long int>::digits) {
14,208✔
430
        src = __builtin_popcountl(src);
14,208✔
431
    } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
14,208✔
432
        src = __builtin_popcountll(src);
×
433
    } else {
×
434
        src = _popcnt(src, std::ignore);
×
UNCOV
435
    }
×
436
    return src;
26,368✔
437
}
26,368✔
438

439
// Counts the number of bits set to 1 without compiler intrinsics
440
template <class T, class... X>
441
constexpr T _popcnt(T src, X...) noexcept
442
{
×
443
    static_assert(binary_digits<T>::value, "");
×
444
    T dst = T();
×
445
    for (dst = T(); src; src >>= 1) {
×
446
        dst += src & 1;
×
447
    }
×
448
    return dst;
×
UNCOV
449
}
×
450
// -------------------------------------------------------------------------- //
451

452

453

454
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: LEADING ZEROS COUNT -------- //
455
// Counts the number of leading zeros with compiler intrinsics
456
template <class T, class>
457
constexpr T _lzcnt(T src) noexcept
458
{
459
    static_assert(binary_digits<T>::value, "");
460
    constexpr T digits = binary_digits<T>::value;
461
    T dst = T();
462
    if (digits < std::numeric_limits<unsigned int>::digits) {
463
        dst = src ? __builtin_clz(src)
464
                     - (std::numeric_limits<unsigned int>::digits
465
                     - digits)
466
                   : digits;
467
    } else if (digits == std::numeric_limits<unsigned int>::digits) {
468
        dst = src ? __builtin_clz(src) : digits;
469
    } else if (digits < std::numeric_limits<unsigned long int>::digits) {
470
        dst = src ? __builtin_clzl(src)
471
                     - (std::numeric_limits<unsigned long int>::digits
472
                     - digits)
473
                   : digits;
474
    } else if (digits == std::numeric_limits<unsigned long int>::digits) {
475
        dst = src ? __builtin_clzl(src) : digits;
476
    } else if (digits < std::numeric_limits<unsigned long long int>::digits) {
477
        dst = src ? __builtin_clzll(src)
478
                     - (std::numeric_limits<unsigned long long int>::digits
479
                     - digits)
480
                   : digits;
481
    } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
482
        dst = src ? __builtin_clzll(src) : digits;
483
    } else {
484
        dst = _lzcnt(src, std::ignore);
485
    }
486
    return dst;
487
}
488

489
// Counts the number of leading zeros without compiler intrinsics
490
template <class T, class... X>
491
constexpr T _lzcnt(T src, X...) noexcept
492
{
493
    static_assert(binary_digits<T>::value, "");
494
    constexpr T digits = binary_digits<T>::value;
495
    T dst = src != T();
496
    while (src >>= 1) {
497
        ++dst;
498
    }
499
    return digits - dst;
500
}
501
// -------------------------------------------------------------------------- //
502

503

504

505
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: TRAILING ZEROS COUNT ------- //
506
// Counts the number of trailing zeros with compiler intrinsics
507
template <class T, class>
508
constexpr T _tzcnt(T src) noexcept
509
{
16,623✔
510
    static_assert(binary_digits<T>::value, "");
16,623✔
511
    constexpr T digits = binary_digits<T>::value;
16,623✔
512
    T dst = T();
16,623✔
513
    if (digits <= std::numeric_limits<unsigned int>::digits) {
16,623✔
514
        dst = src ? __builtin_ctz(src) : digits;
8,111!
515
    } else if (digits <= std::numeric_limits<unsigned long int>::digits) {
8,512✔
516
        dst = src ? __builtin_ctzl(src) : digits;
8,512!
517
    } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
8,512✔
518
        dst = src ? __builtin_ctzll(src) : digits;
×
519
    } else {
×
520
        dst = _tzcnt(src, std::ignore);
×
UNCOV
521
    }
×
522
    return dst;
16,623✔
523
}
16,623✔
524

525
// Counts the number of trailing zeros without compiler intrinsics
526
template <class T, class... X>
527
constexpr T _tzcnt(T src, X...) noexcept
528
{
×
529
    static_assert(binary_digits<T>::value, "");
×
530
    constexpr T digits = binary_digits<T>::value;
×
531
    T dst = digits;
×
532
    if (src) {
×
533
        src = (src ^ (src - 1)) >> 1;
×
534
        for (dst = T(); src; dst++) {
×
535
            src >>= 1;
×
536
        }
×
537
    }
×
538
    return dst;
×
UNCOV
539
}
×
540
// -------------------------------------------------------------------------- //
541

542

543

544
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT FIELD EXTRACTION ------- //
545
// Extacts to lsbs a field of contiguous bits with compiler intrinsics
546
template <class T, class>
547
constexpr T _bextr(T src, T start, T len) noexcept
548
{
549
    static_assert(binary_digits<T>::value, "");
550
    constexpr T digits = binary_digits<T>::value;
551
    T dst = T();
552
    if (digits <= std::numeric_limits<unsigned int>::digits) {
553
        dst = __builtin_ia32_bextr_u32(src, start, len);
554
    } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
555
        dst = __builtin_ia32_bextr_u64(src, start, len);
556
    } else {
557
        dst = _bextr(src, start, len, std::ignore);
558
    }
559
    return dst;
560
}
561

562
// Extacts to lsbs a field of contiguous bits without compiler intrinsics
563
template <class T, class... X>
564
constexpr T _bextr(T src, T start, T len, X...) noexcept
565
{
3,712✔
566
    static_assert(binary_digits<T>::value, "");
3,712✔
567
    constexpr T digits = binary_digits<T>::value;
3,712✔
568
    constexpr T one = 1;
3,712✔
569
    const T msk = (one << len) * (len < digits) - one;
3,712✔
570
    return (src >> start) & msk * (start < digits);
3,712✔
571
}
3,712✔
572
// -------------------------------------------------------------------------- //
573

574

575

576
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: PARALLEL BIT DEPOSIT ------- //
577
// Deposits bits according to a mask with compiler instrinsics
578
template <class T, class>
579
constexpr T _pdep(T src, T msk) noexcept
580
{
581
    static_assert(binary_digits<T>::value, "");
582
    constexpr T digits = binary_digits<T>::value;
583
    T dst = T();
584
    if (digits <= std::numeric_limits<unsigned int>::digits) {
585
        dst = _pdep_u32(src, msk);
586
    } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
587
        dst = _pdep_u64(src, msk);
588
    } else {
589
        dst = _pdep(src, msk, std::ignore);
590
    }
591
    return dst;
592
}
593

594
// Deposits bits according to a mask without compiler instrinsics
595
template <class T, class... X>
596
constexpr T _pdep(T src, T msk, X...) noexcept
597
{
598
    static_assert(binary_digits<T>::value, "");
599
    constexpr T digits = binary_digits<T>::value;
600
    T dst = T();
601
    T cnt = T();
602
    while (msk) {
603
        dst >>= 1;
604
        if (msk & 1) {
605
            dst |= src << (digits - 1);
606
            src >>= 1;
607
        }
608
        msk >>= 1;
609
        ++cnt;
610
    }
611
    dst >>= (digits - cnt) * (cnt > 0);
612
    return dst;
613
}
614
// -------------------------------------------------------------------------- //
615

616

617

618
// ------- IMPLEMENTATION DETAILS: INSTRUCTIONS: PARALLEL BIT EXTRACT ------- //
619
// Extracts bits according to a mask with compiler instrinsics
620
template <class T, class>
621
constexpr T _pext(T src, T msk) noexcept
622
{
623
    static_assert(binary_digits<T>::value, "");
624
    constexpr T digits = binary_digits<T>::value;
625
    T dst = T();
626
    if (digits <= std::numeric_limits<unsigned int>::digits) {
627
        dst = _pext_u32(src, msk);
628
    } else if (digits <= std::numeric_limits<unsigned long long int>::digits) {
629
        dst = _pext_u64(src, msk);
630
    } else {
631
        dst = _pext(src, msk, std::ignore);
632
    }
633
    return dst;
634
}
635

636
// Extracts bits according to a mask without compiler instrinsics
637
template <class T, class... X>
638
constexpr T _pext(T src, T msk, X...) noexcept
639
{
640
    static_assert(binary_digits<T>::value, "");
641
    constexpr T digits = binary_digits<T>::value;
642
    T dst = T();
643
    T cnt = T();
644
    while (msk) {
645
        if (msk & 1) {
646
            dst >>= 1;
647
            dst |= src << (digits - 1);
648
            ++cnt;
649
        }
650
        src >>= 1;
651
        msk >>= 1;
652
    }
653
    dst >>= (digits - cnt) * (cnt > 0);
654
    return dst;
655
}
656
// -------------------------------------------------------------------------- //
657

658

659

660
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: BYTE SWAP ------------- //
661
// Reverses the order of the underlying bytes with compiler intrinsics
662
template <class T, class T128>
663
constexpr T _byteswap(T src) noexcept
664
{
665
    static_assert(binary_digits<T>::value, "");
666
    using byte_t = unsigned char;
667
    constexpr T digits = sizeof(T) * std::numeric_limits<byte_t>::digits;
668
    std::uint64_t tmp64 = 0;
669
    std::uint64_t* ptr64 = nullptr;
670
    if (std::is_same<T, T128>::value) {
671
        ptr64 = reinterpret_cast<std::uint64_t*>(&src);
672
        tmp64 = __builtin_bswap64(*ptr64);
673
        *ptr64 = __builtin_bswap64(*(ptr64 + 1));
674
        *(ptr64 + 1) = tmp64;
675
    } else if (digits == std::numeric_limits<std::uint16_t>::digits) {
676
        src = __builtin_bswap16(src);
677
    } else if (digits == std::numeric_limits<std::uint32_t>::digits) {
678
        src = __builtin_bswap32(src);
679
    } else if (digits == std::numeric_limits<std::uint64_t>::digits)  {
680
        src = __builtin_bswap64(src);
681
    } else if (digits > std::numeric_limits<byte_t>::digits) {
682
        src = _byteswap(src, std::ignore);
683
    }
684
    return src;
685
}
686

687
// Reverses the order of the underlying bytes without compiler intrinsics
688
template <class T, class... X>
689
constexpr T _byteswap(T src, X...) noexcept
690
{
691
    static_assert(binary_digits<T>::value, "");
692
    using byte_t = unsigned char;
693
    constexpr T half = sizeof(T) / 2;
694
    constexpr T end = sizeof(T) - 1;
695
    unsigned char* bytes = reinterpret_cast<byte_t*>(&src);
696
    unsigned char byte = 0;
697
    for (T i = T(); i < half; ++i) {
698
        byte = bytes[i];
699
        bytes[i] = bytes[end - i];
700
        bytes[end - i] = byte;
701
    }
702
    return src;
703
}
704
// -------------------------------------------------------------------------- //
705

706

707

708
// ------------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT SWAP ------------- //
709
// Reverses the order of the bits with or without of compiler intrinsics
710
template <class T>
711
constexpr T _bitswap(T src) noexcept
712
{
5,260,704✔
713
    static_assert(binary_digits<T>::value, "");
5,260,704✔
714
    using byte_t = unsigned char;
5,260,704✔
715
    constexpr auto ignore = nullptr;
5,260,704✔
716
    constexpr T digits = binary_digits<T>::value;
5,260,704✔
717
    constexpr unsigned long long int first = 0x80200802ULL;
5,260,704✔
718
    constexpr unsigned long long int second = 0x0884422110ULL;
5,260,704✔
719
    constexpr unsigned long long int third = 0x0101010101ULL;
5,260,704✔
720
    constexpr unsigned long long int fourth = 32;
5,260,704✔
721
    constexpr bool is_size1 = sizeof(T) == 1;
5,260,704✔
722
    constexpr bool is_byte = digits == std::numeric_limits<byte_t>::digits;
5,260,704✔
723
    constexpr bool is_octet = std::numeric_limits<byte_t>::digits == 8;
5,260,704✔
724
    constexpr bool is_pow2 = _popcnt(digits, ignore) == 1;
5,260,704✔
725
    T dst = src;
5,260,704✔
726
    T i = digits - 1;
5,260,704✔
727
    if (is_size1 && is_byte && is_octet) {
5,260,704✔
728
        dst = ((src * first) & second) * third >> fourth;
1,311,056✔
729
    } else if (is_pow2) {
3,949,648✔
730
        dst = _bitswap<T, digits>(src);
3,949,648✔
731
    } else {
3,949,648✔
732
        for (src >>= 1; src; src >>= 1) {
×
733
            dst <<= 1;
×
734
            dst |= src & 1;
×
735
            i--;
×
736
        }
×
737
        dst <<= i;
×
UNCOV
738
    }
×
739
    return dst;
5,260,704✔
740
}
5,260,704✔
741

742
// Reverses the order of the bits: recursive metafunction
743
template <class T, std::size_t N>
744
constexpr T _bitswap(T src) noexcept
745
{
19,755,920✔
746
    static_assert(binary_digits<T>::value, "");
19,755,920✔
747
    constexpr T cnt = N >> 1;
19,755,920✔
748
    constexpr T msk = _bitswap<T, cnt>();
19,755,920✔
749
    src = ((src >> cnt) & msk) | ((src << cnt) & ~msk);
19,755,920✔
750
    return cnt > 1 ? _bitswap<T, cnt>(src) : src;
19,755,920✔
751
}
19,755,920✔
752

753
// Reverses the order of the bits: mask for the recursive metafunction
754
template <class T, std::size_t N>
755
constexpr T _bitswap() noexcept
756
{
×
757
    static_assert(binary_digits<T>::value, "");
×
758
    constexpr T digits = binary_digits<T>::value;
×
759
    T cnt = digits;
×
760
    T msk = ~T();
×
761
    while (cnt != N) {
×
762
        cnt >>= 1;
×
763
        msk ^= (msk << cnt);
×
764
    }
×
765
    return msk;
×
UNCOV
766
}
×
767
// -------------------------------------------------------------------------- //
768

769

770

771
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT BLEND ------------- //
772
// Replaces bits of src0 by the ones of src1 where the mask is true
773
template <class T>
774
constexpr T _bitblend(T src0, T src1, T msk) noexcept
775
{
2,500✔
776
    static_assert(binary_digits<T>::value, "");
2,500✔
777
    return src0 ^ ((src0 ^ src1) & msk);
2,500✔
778
}
2,500✔
779

780
// Replaces len bits of src0 by the ones of src1 starting at start
781
template <class T>
782
constexpr T _bitblend(T src0, T src1, T start, T len) noexcept
783
{
494,372✔
784
    static_assert(binary_digits<T>::value, "");
494,372✔
785
    constexpr T digits = binary_digits<T>::value;
494,372✔
786
    constexpr T one = 1;
494,372✔
787
    // The digits_mask is solely here to prevent Undefined Sanitizer
788
    // complaining about shift of len >= digits
789
    // Note: on -O1 the (len & digits_mask) is optimized to simply (len)
790
    constexpr T digits_mask = digits - one;
494,372✔
791
    const T msk = ((one << (len & digits_mask)) * (len < digits) - one) << start;
494,372✔
792
    return src0 ^ ((src0 ^ src1) & msk * (start < digits));
494,372✔
793
}
494,372✔
794
// -------------------------------------------------------------------------- //
795

796

797

798
// ---------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT EXCHANGE ------------ //
799
// Exchanges/swaps bits of src0 by the ones of src1 where the mask is true
800
template <class T>
801
constexpr void _bitexch(T& src0, T& src1, T msk) noexcept
802
{
803
    src0 = src0 ^ static_cast<T>(src1 & msk);
804
    src1 = src1 ^ static_cast<T>(src0 & msk);
805
    src0 = src0 ^ static_cast<T>(src1 & msk);
806
    return;
807
}
808

809
// Replaces len bits of src0 by the ones of src1 starting at start
810
template <class T, class S>
811
constexpr void _bitexch(T& src0, T& src1, S start, S len) noexcept
812
{
1,391✔
813
    static_assert(binary_digits<T>::value, "");
1,391✔
814
    constexpr auto digits = binary_digits<T>::value;
1,391✔
815
    constexpr T one = 1;
1,391✔
816
    const T msk = (len < digits)
1,391!
817
        ? ((one << len) - one) << start : -1;
1,391✔
818
    src0 = src0 ^ static_cast<T>(src1 & msk);
1,391✔
819
    src1 = src1 ^ static_cast<T>(src0 & msk);
1,391✔
820
    src0 = src0 ^ static_cast<T>(src1 & msk);
1,391✔
821
    return;
1,391✔
822
}
1,391✔
823

824
// Replaces len bits of src0 by the ones of src1 starting at start0
825
// in src0 and start1 in src1.
826
// len <= digits-max(start0, start1)
827
template <class T, class S>
828
constexpr void _bitexch(T& src0, T& src1, S start0, S start1, S len) noexcept
829
{
10,244,208✔
830
    static_assert(binary_digits<T>::value, "");
10,244,208✔
831
    constexpr auto digits = binary_digits<T>::value;
10,244,208✔
832
    constexpr T one = 1;
10,244,208✔
833
    const T msk = (len < digits) ?
10,244,208!
834
        ((one << len) - one) : -1;
10,244,208✔
835
    if (start0 >= start1) {
10,244,208✔
836
        src0 = src0 ^ (
5,123,232✔
837
                static_cast<T>(src1 << (start0 - start1))
5,123,232✔
838
                &
5,123,232✔
839
                static_cast<T>(msk << start0)
5,123,232✔
840
        );
5,123,232✔
841
        src1 = src1 ^ (
5,123,232✔
842
                static_cast<T>(src0 >> (start0 - start1))
5,123,232✔
843
                &
5,123,232✔
844
                static_cast<T>(msk << start1)
5,123,232✔
845
        );
5,123,232✔
846
        src0 = src0 ^ (
5,123,232✔
847
                static_cast<T>(src1 << (start0 - start1))
5,123,232✔
848
                &
5,123,232✔
849
                static_cast<T>(msk << start0)
5,123,232✔
850
        );
5,123,232✔
851
    } else {
5,123,232✔
852
        src0 = src0 ^ (
5,120,976✔
853
                static_cast<T>(src1 >> (start1 - start0))
5,120,976✔
854
                &
5,120,976✔
855
                static_cast<T>(msk << start0)
5,120,976✔
856
        );
5,120,976✔
857
        src1 = src1 ^ (
5,120,976✔
858
                static_cast<T>(src0 << (start1 - start0))
5,120,976✔
859
                &
5,120,976✔
860
                static_cast<T>(msk << start1)
5,120,976✔
861
        );
5,120,976✔
862
        src0 = src0 ^ (
5,120,976✔
863
                static_cast<T>(src1 >> (start1 - start0))
5,120,976✔
864
                &
5,120,976✔
865
                static_cast<T>(msk << start0)
5,120,976✔
866
        );
5,120,976✔
867
    }
5,120,976✔
868
    return;
10,244,208✔
869
}
10,244,208✔
870
// -------------------------------------------------------------------------- //
871

872

873

874
// ----------- IMPLEMENTATION DETAILS: INSTRUCTIONS: BIT COMPARE ------------ //
875
// Compares a subsequence of bits within src0 and src1 and returns 0 if equal
876
template <class T>
877
constexpr T _bitcmp(T src0, T src1, T start0, T start1, T len) noexcept
878
{
879
    static_assert(binary_digits<T>::value, "");
880
    return _bextr(src0, start0, len) == _bextr(src1, start1, len);
881
}
882
// -------------------------------------------------------------------------- //
883

884

885

886
// --- IMPLEMENTATION DETAILS: INSTRUCTIONS: DOUBLE PRECISION SHIFT LEFT ---- //
887
// Left shifts dst by cnt bits, filling the lsbs of dst by the msbs of src
888
template <class T>
889
constexpr T _shld(T dst, T src, T cnt) noexcept
890
{
891
    static_assert(binary_digits<T>::value, "");
892
    constexpr T digits = binary_digits<T>::value;
893
    if (cnt < digits) {
894
        dst = (dst << cnt) | (src >> (digits - cnt));
895
    } else {
896
        dst = (src << (cnt - digits)) * (cnt < digits + digits);
897
    }
898
    return dst;
899
}
900
// -------------------------------------------------------------------------- //
901

902

903

904
// --- IMPLEMENTATION DETAILS: INSTRUCTIONS: DOUBLE PRECISION SHIFT RIGHT --- //
905
// Right shifts dst by cnt bits, filling the msbs of dst by the lsbs of src
906
template <class T>
907
constexpr T _shrd(T dst, T src, T cnt) noexcept
908
{
6,257,882✔
909
    static_assert(binary_digits<T>::value, "");
6,257,882✔
910
    constexpr T digits = binary_digits<T>::value;
6,257,882✔
911
    if (cnt < digits) {
6,257,882!
912
        dst = (dst >> cnt) | (src << (digits - cnt));
6,257,882✔
913
    } else {
6,257,882✔
NEW
914
        dst = (src >> (cnt - digits)) * (cnt < digits + digits);
×
NEW
915
    }
×
916
    return dst;
6,257,882✔
917
}
6,257,882✔
918
// -------------------------------------------------------------------------- //
919

920
#if 1
921

922
#if defined(__ADX__)
923
template <bool Add>
924
unsigned char ADDCARRYSUBBORROW32(unsigned char c, uint32_t a, uint32_t b, uint32_t* out) {
925
  return (Add ? _addcarryx_u32(c, a, b, out) : _subborrow_u32(c, a, b, out));
926
}
927
template <bool Add>
928
unsigned char ADDCARRYSUBBORROW64(unsigned char c, uint64_t a, uint64_t b, uint64_t* out) {
929
  static_assert(sizeof(uint64_t) == sizeof(unsigned long long int));
930
  return (Add ? _addcarryx_u64(c, a, b, reinterpret_cast<unsigned long long int*>(out)) : _subborrow_u64(c, a, b, reinterpret_cast<unsigned long long int*>(out)));
931
}
932
#else
933
template <bool Add>
934
unsigned char ADDCARRYSUBBORROW32(unsigned char c, uint32_t a, uint32_t b, uint32_t* out) {
6✔
935
  return (Add ? _addcarry_u32(c, a, b, out) : _subborrow_u32(c, a, b, out));
6✔
936
}
6✔
937
template <bool Add>
938
unsigned char ADDCARRYSUBBORROW64(unsigned char c, uint64_t a, uint64_t b, uint64_t* out) {
939
  static_assert(sizeof(uint64_t) == sizeof(unsigned long long int));
940
  return (Add ? _addcarry_u64(c, a, b, reinterpret_cast<unsigned long long int*>(out)) : _subborrow_u64(c, a, b, reinterpret_cast<unsigned long long int*>(out)));
941
}
942
#endif
943

944
template <bool Add, std::integral U>
945
static inline unsigned char add_carry_sub_borrow(unsigned char c_in, U a, U b, U* out) noexcept {
6✔
946
  if constexpr (32 > bitsof<U>()) {
6✔
947
    // a       [aaaaaaaa111111111111111111111111111]
948
    // b     + [bbbbbbbb000000000000000000000000000]
949
    // carry +                            [0000000c]
950
    const uint8_t shift = (32 - bitsof<U>());
6✔
951
    uint32_t carry_propagation = Add ? ((1 << shift) - 1) : 0;
6✔
952
    uint32_t tmp_out;
6✔
953
    unsigned char carry = ADDCARRYSUBBORROW32<Add>(
6✔
954
        c_in,
6✔
955
        (static_cast<uint32_t>(a) << shift) | carry_propagation,
6✔
956
        (static_cast<uint32_t>(b) << shift),
6✔
957
        &tmp_out);
6✔
958
    *out = static_cast<U>(tmp_out >> shift);
6✔
959
    return carry;
6✔
960
  } else if constexpr (32 == bitsof<U>()) {
961
    return ADDCARRYSUBBORROW32<Add>(c_in, static_cast<uint32_t>(a), static_cast<uint32_t>(b), reinterpret_cast<uint32_t>(out));
962
  } else if constexpr (64 == bitsof<U>()) {
963
    return ADDCARRYSUBBORROW64<Add>(c_in, static_cast<uint64_t>(a), static_cast<uint64_t>(b), reinterpret_cast<uint64_t>(out));
964
  } else if constexpr (0 == (bitsof<U>() % 64)) {
965
    using t64 = std::conditional<std::is_signed_v<U>, int64_t, uint64_t>;
966
    unsigned char carry;
967
    for (int i = 0; i < (bitsof<U>() / 64); i++) {
968
      carry = ADDCARRYSUBBORROW64<Add>(c_in, static_cast<t64>(a >> (i * 64)), static_cast<t64>(b >> (i * 64)), reinterpret_cast<t64>(out) + i);
969
    }
970
    return carry;
971
  } else {
972
    assert(((void)"add carry intrinsics support only support powers of 2 bits", false));
973
  }
974
}
6✔
975

976
template <std::integral U>
977
static inline unsigned char add_carry(unsigned char c_in, U a, U b, U* out) noexcept {
6✔
978
  return add_carry_sub_borrow<true, U>(c_in, a, b, out);
6✔
979
}
6✔
980

981
template <std::integral U>
982
static inline unsigned char sub_borrow(unsigned char c_in, U a, U b, U* out) noexcept {
983
  return add_carry_sub_borrow<false, U>(c_in, a, b, out);
984
}
985

986
#else
987

988
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: ADD CARRY ------------- //
989
// Adds src0 and src1 and returns the new carry bit with intrinsics
990
template <class C, class T, class>
991
constexpr C _addcarry(C carry, T src0, T src1, T* dst) noexcept {
992
  static_assert(binary_digits<T>::value, "");
993
  using wider_t = typename _wider_type<T>::type;
994
  constexpr T digits = binary_digits<T>::value;
995
  wider_t tmp = 0;
996
  unsigned int udst = 0;
997
  unsigned long long int ulldst = 0;
998
  if (digits == std::numeric_limits<unsigned int>::digits) {
999
    carry = __builtin_ia32_addcarryx_u32(carry, src0, src1, &udst);
1000
    *dst = udst;
1001
  } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
1002
    carry = __builtin_ia32_addcarryx_u64(carry, src0, src1, &ulldst);
1003
    *dst = ulldst;
1004
  } else if (digits < binary_digits<wider_t>::value) {
1005
    tmp = static_cast<wider_t>(src0) + static_cast<wider_t>(src1);
1006
    tmp += static_cast<wider_t>(static_cast<bool>(carry));
1007
    *dst = tmp;
1008
    carry = static_cast<bool>(tmp >> digits);
1009
  } else {
1010
    carry = _addcarry(carry, src0, src1, dst, std::ignore);
1011
  }
1012
  return carry;
1013
}
1014

1015
// Adds src0 and src1 and returns the new carry bit without intrinsics
1016
template <class C, class T, class... X>
1017
constexpr C _addcarry(C carry, T src0, T src1, T* dst, X...) noexcept
1018
{
1019
    static_assert(binary_digits<T>::value, "");
1020
    *dst = src0 + src1 + static_cast<T>(static_cast<bool>(carry));
1021
    return carry ? *dst <= src0 || *dst <= src1 : *dst < src0 || *dst < src1;
1022
}
1023

1024
// -------------------------------------------------------------------------- //
1025

1026
// ------------ IMPLEMENTATION DETAILS: INSTRUCTIONS: SUB BORROW ------------ //
1027
// Subtracts src1 to src0 and returns the new borrow bit with intrinsics
1028
template <class B, class T, class>
1029
constexpr B _subborrow(B borrow, T src0, T src1, T* dst) noexcept
1030
{
1031
    static_assert(binary_digits<T>::value, "");
1032
    using wider_t = typename _wider_type<T>::type;
1033
    constexpr T digits = binary_digits<T>::value;
1034
    wider_t tmp = 0;
1035
    unsigned int udst = 0;
1036
    unsigned long long int ulldst = 0;
1037
    if (digits == std::numeric_limits<unsigned int>::digits) {
1038
        borrow = __builtin_ia32_sbb_u32(borrow, src0, src1, &udst);
1039
        *dst = udst;
1040
    } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
1041
        borrow = __builtin_ia32_sbb_u64(borrow, src0, src1, &ulldst);
1042
        *dst = ulldst;
1043
    } else if (digits < binary_digits<wider_t>::value) {
1044
        tmp = static_cast<wider_t>(src1);
1045
        tmp += static_cast<wider_t>(static_cast<bool>(borrow));
1046
        borrow = tmp > static_cast<wider_t>(src0);
1047
        *dst = static_cast<wider_t>(src0) - tmp;
1048
    } else {
1049
        borrow = _subborrow(borrow, src0, src1, dst, std::ignore);
1050
    }
1051
    return borrow;
1052
}
1053

1054
// Subtracts src1 to src0 and returns the new borrow bit with other intrinsics
1055
template <class B, class T, class>
1056
constexpr B _subborrow(const B& borrow, T src0, T src1, T* dst) noexcept
1057
{
1058
    static_assert(binary_digits<T>::value, "");
1059
    using wider_t = typename _wider_type<T>::type;
1060
    constexpr T digits = binary_digits<T>::value;
1061
    wider_t tmp = 0;
1062
    unsigned int udst = 0;
1063
    unsigned long long int ulldst = 0;
1064
    B flag = borrow;
1065
    if (digits == std::numeric_limits<unsigned int>::digits) {
1066
        flag = __builtin_ia32_subborrow_u32(borrow, src0, src1, &udst);
1067
        *dst = udst;
1068
    } else if (digits == std::numeric_limits<unsigned long long int>::digits) {
1069
        flag = __builtin_ia32_subborrow_u64(borrow, src0, src1, &ulldst);
1070
        *dst = ulldst;
1071
    } else if (digits < binary_digits<wider_t>::value) {
1072
        tmp = static_cast<wider_t>(src1);
1073
        tmp += static_cast<wider_t>(static_cast<bool>(borrow));
1074
        flag = tmp > static_cast<wider_t>(src0);
1075
        *dst = static_cast<wider_t>(src0) - tmp;
1076
    } else {
1077
        flag = _subborrow(borrow, src0, src1, dst, std::ignore);
1078
    }
1079
    return flag;
1080
}
1081

1082
// Subtracts src1 to src0 and returns the new borrow bit without intrinsics
1083
template <class B, class T, class... X>
1084
constexpr B _subborrow(B borrow, T src0, T src1, T* dst, X...) noexcept
1085
{
1086
    static_assert(binary_digits<T>::value, "");
1087
    *dst = src0 - (src1 + static_cast<T>(static_cast<bool>(borrow)));
1088
    return borrow ? src1 >= src0 : src1 > src0;
1089
}
1090
// -------------------------------------------------------------------------- //
1091

1092
#endif
1093

1094
// -------- IMPLEMENTATION DETAILS: INSTRUCTIONS: MULTIWORD MULTIPLY -------- //
1095
// Multiplies src0 and src1 and gets the full result with compiler intrinsics
1096
template <class T, class T128>
1097
constexpr T _mulx(T src0, T src1, T* hi) noexcept
1098
{
1099
    static_assert(binary_digits<T>::value, "");
1100
    using wider_t = typename _wider_type<T>::type;
1101
    constexpr T digits = binary_digits<T>::value;
1102
    wider_t tmp = 0;
1103
    T128 tmp128 = 0;
1104
    T lo = 0;
1105
    if (digits == std::numeric_limits<std::uint64_t>::digits) {
1106
        tmp128 = static_cast<T128>(src0) * static_cast<T128>(src1);
1107
        *hi = tmp128 >> digits;
1108
        lo = tmp128;
1109
    } else if (digits + digits == binary_digits<wider_t>::value) {
1110
        tmp = static_cast<wider_t>(src0) * static_cast<wider_t>(src1);
1111
        *hi = tmp >> digits;
1112
        lo = tmp;
1113
    } else {
1114
        lo = _mulx(src0, src1, hi, std::ignore);
1115
    }
1116
    return lo;
1117
}
1118

1119
// Multiplies src0 and src1 and gets the full result without compiler intrinsics
1120
template <class T, class... X>
1121
constexpr T _mulx(T src0, T src1, T* hi, X...) noexcept
1122
{
1123
    static_assert(binary_digits<T>::value, "");
1124
    constexpr T digits = binary_digits<T>::value;
1125
    constexpr T offset = digits / 2;
1126
    constexpr T ones = ~static_cast<T>(0);
1127
    const T lsbs0 = src0 & static_cast<T>(ones >> (digits - offset));
1128
    const T msbs0 = src0 >> offset;
1129
    const T lsbs1 = src1 & static_cast<T>(ones >> (digits - offset));
1130
    const T msbs1 = src1 >> offset;
1131
    const T llsbs = lsbs0 * lsbs1;
1132
    const T mlsbs = msbs0 * lsbs1;
1133
    const T lmsbs = lsbs0 * msbs1;
1134
    const T mi = mlsbs + lmsbs;
1135
    const T lo = llsbs + static_cast<T>(mi << offset);
1136
    const T lcarry = lo < llsbs || lo < static_cast<T>(mi << offset);
1137
    const T mcarry = static_cast<T>(mi < mlsbs || mi < lmsbs) << offset;
1138
    *hi = static_cast<T>(mi >> offset) + msbs0 * msbs1 + mcarry + lcarry;
1139
    return lo;
1140
}
1141
// -------------------------------------------------------------------------- //
1142

1143

1144

1145
// ========================================================================== //
1146
}  // namespace bit
1147
#endif // _BIT_DETAILS_HPP_INCLUDED
1148
// ========================================================================== //
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc