• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

stillwater-sc / universal / 25993334081

17 May 2026 02:05PM UTC coverage: 84.041% (+0.008%) from 84.033%
25993334081

Pull #858

github

web-flow
Merge 7f0ff1d7b into 54e554753
Pull Request #858: feat: operator>> hygiene + ereal nan/inf for decimal/elastic family (Phase E of #835)

75 of 80 new or added lines in 6 files covered. (93.75%)

32 existing lines in 3 files now uncovered.

46603 of 55453 relevant lines covered (84.04%)

6434118.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.25
/include/sw/universal/number/dfloat/dfloat_impl.hpp
1
#pragma once
2
// dfloat_impl.hpp: implementation of an IEEE 754-2008 decimal floating-point number system
3
//
4
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
5
// SPDX-License-Identifier: MIT
6
//
7
// This file is part of the universal numbers project, which is released under an MIT Open Source license.
8
#include <cstdint>
9
#include <cstring>
10
#include <cmath>
11
#include <string>
12
#include <sstream>
13
#include <iostream>
14
#include <iomanip>
15
#include <algorithm>
16

17
// supporting types and functions
18
#include <universal/native/ieee754.hpp>
19
#include <universal/number/shared/nan_encoding.hpp>
20
#include <universal/number/shared/infinite_encoding.hpp>
21
#include <universal/number/shared/specific_value_encoding.hpp>
22
// dfloat exception structure
23
#include <universal/number/dfloat/exceptions.hpp>
24
// DPD (Densely Packed Decimal) codec
25
#include <universal/number/dfloat/dpd_codec.hpp>
26
// blockbinary for encoding storage and significand arithmetic
27
#include <universal/internal/blockbinary/blockbinary.hpp>
28
// sw::bit_cast for constexpr -0.0 detection in convert_ieee754
29
#include <universal/utility/bit_cast.hpp>
30

31
namespace sw { namespace universal {
32

33
///////////////////////////////////////////////////////////////////////////////
34
// Internal helpers for BID encoding
35
//
36
// IEEE 754-2008 decimal format layout:
37
//   [sign(1)] [combination(5)] [exponent_continuation(w)] [trailing_significand(t)]
38
//
39
// Total bits: nbits = 1 + 5 + w + t
40
// where w = es (exponent continuation bits)
41
//       t = nbits - 1 - 5 - w
42
//
43
// Combination field (5 bits: abcde):
44
//   ab != 11: exponent MSBs = ab, MSD (most significant digit) = 0cde (0-7)
45
//   ab == 11 && c != 1: exponent MSBs = cd, MSD = 100e (8 or 9)
46
//   11110: +/- infinity
47
//   11111: NaN (quiet or signaling based on trailing significand MSB)
48
//
49
// BID encoding: trailing significand stored as binary integer
50
// DPD encoding: trailing significand stored as densely packed decimal (10-bit declets)
51

52
// Compute number of bits needed for a decimal32/64/128 configuration
53
// decimal32:  ndigits=7,  es=6   -> nbits = 1 + 5 + 6  + 20  = 32
54
// decimal64:  ndigits=16, es=8   -> nbits = 1 + 5 + 8  + 50  = 64
55
// decimal128: ndigits=34, es=12  -> nbits = 1 + 5 + 12 + 110 = 128
56

57
///////////////////////////////////////////////////////////////////////////////
58
// power_of_10: constexpr power-of-10 helpers
59
static constexpr uint64_t _pow10_table[20] = {
60
        1ull,
61
        10ull,
62
        100ull,
63
        1000ull,
64
        10000ull,
65
        100000ull,
66
        1000000ull,
67
        10000000ull,
68
        100000000ull,
69
        1000000000ull,
70
        10000000000ull,
71
        100000000000ull,
72
        1000000000000ull,
73
        10000000000000ull,
74
        100000000000000ull,
75
        1000000000000000ull,
76
        10000000000000000ull,
77
        100000000000000000ull,
78
        1000000000000000000ull,
79
        10000000000000000000ull
80
};
81

82
static constexpr uint64_t pow10_64(unsigned n) {
1,124✔
83
        return _pow10_table[n]; // n >= 20 is undefined: array bounds enforced by compiler in constexpr
1,124✔
84
}
85

86
// count decimal digits of a uint64_t
87
static constexpr unsigned count_decimal_digits(uint64_t v) {
88
        if (v == 0) return 1;
89
        unsigned d = 0;
90
        while (v > 0) { v /= 10; ++d; }
91
        return d;
92
}
93

94

95
// constexpr ceil(log2(10^n)) - bits needed to represent 10^n in binary
96
// This is the number of trailing significand bits for BID encoding
97
static constexpr unsigned bid_trailing_bits(unsigned n) {
98
        // 10^n values and their bit widths
99
        // We compute ceil(log2(10^n)) = floor(log2(10^n - 1)) + 1
100
        // Using the identity: ceil(n * log2(10)) where log2(10) ~= 3.321928
101
        // Approximate with integer arithmetic: ceil(n * 3322 / 1000)
102
        if (n == 0) return 0;
103
        return static_cast<unsigned>((static_cast<uint64_t>(n) * 3322u + 999u) / 1000u);
104
}
105

106
// DPD trailing bits: (ndigits-1)/3 declets of 10 bits + remainder
107
static constexpr unsigned dpd_trailing_bits(unsigned ndigits_minus_1) {
108
        unsigned full_declets = ndigits_minus_1 / 3;
109
        unsigned remainder = ndigits_minus_1 % 3;
110
        unsigned bits = full_declets * 10;
111
        if (remainder == 1) bits += 4;
112
        else if (remainder == 2) bits += 7;
113
        return bits;
114
}
115

116
///////////////////////////////////////////////////////////////////////////////
117
// dfloat: IEEE 754-2008 decimal floating-point number
118
//
119
// Template parameters:
120
//   ndigits  - number of decimal precision digits (p)
121
//   es       - exponent continuation bits (w)
122
//   Encoding - BID or DPD
123
//   bt       - block type for storage
124
//
125
template<unsigned _ndigits, unsigned _es, DecimalEncoding _Encoding = DecimalEncoding::BID, typename bt = std::uint32_t>
126
class dfloat {
127
public:
128
        static constexpr unsigned ndigits  = _ndigits;             // precision in decimal digits (p)
129
        static constexpr unsigned es       = _es;                  // exponent continuation bits (w)
130
        static constexpr DecimalEncoding encoding = _Encoding;
131
        static constexpr unsigned combBits = 5u;                   // combination field bits
132
        static constexpr unsigned t        = (encoding == DecimalEncoding::BID)
133
                ? bid_trailing_bits(ndigits - 1)
134
                : dpd_trailing_bits(ndigits - 1);
135
        static constexpr unsigned nbits    = 1u + combBits + es + t;
136
        static constexpr int      bias     = (3 << (es - 1)) + static_cast<int>(ndigits) - 2;
137
        static constexpr int      emax     = (3 << es) - 1 - bias;   // max biased exponent
138
        static constexpr int      emin     = -bias;                    // min biased exponent
139

140
        // Significand arithmetic type: blockbinary with enough bits for any ndigits
141
        // Signed is required because blockbinary::longdivision() requires it.
142
        // The sign bit is unused headroom since significands are always >= 0.
143
        static constexpr unsigned sig_bits = 4 * ndigits + 8;
144
        using significand_t = blockbinary<sig_bits, bt, BinaryNumberType::Signed>;
145

146
        // Wide significand for overflow-free multiplication
147
        using wide_significand_t = blockbinary<2 * sig_bits, bt, BinaryNumberType::Signed>;
148

149
        // Helper: power of 10 returning significand_t
150
        static constexpr significand_t pow10_s(unsigned n) {
2,615✔
151
                significand_t result(1);
2,615✔
152
                significand_t ten(10);
2,615✔
153
                for (unsigned i = 0; i < n; ++i) result *= ten;
26,296✔
154
                return result;
2,849✔
155
        }
156

157
        // Helper: count decimal digits of a significand_t
158
        static constexpr unsigned count_digits_s(const significand_t& v) {
758✔
159
                if (v.iszero()) return 1;
758✔
160
                unsigned count = 0;
758✔
161
                significand_t tmp(v);
758✔
162
                significand_t ten(10);
758✔
163
                while (!tmp.iszero()) { tmp /= ten; ++count; }
2,262✔
164
                return count;
758✔
165
        }
166

167
        // Helper: significand_t to string
168
        static std::string sig_to_string(const significand_t& v) {
86✔
169
                return to_decimal(v);
86✔
170
        }
171

172
        typedef bt BlockType;
173

174
        // Encoding storage type: blockbinary with Unsigned encoding
175
        using encoding_t = blockbinary<nbits, bt, BinaryNumberType::Unsigned>;
176

177
        /// trivial constructor
178
        dfloat() = default;
179

180
        dfloat(const dfloat&) = default;
181
        dfloat(dfloat&&) = default;
182

183
        dfloat& operator=(const dfloat&) = default;
184
        dfloat& operator=(dfloat&&) = default;
185

186
        // converting constructors
187
        constexpr dfloat(const std::string& stringRep) { clear(); assign(stringRep); }
1✔
188

189
        // specific value constructor
190
        constexpr dfloat(const SpecificValue code) noexcept {
8✔
191
                clear();
8✔
192
                switch (code) {
8✔
193
                case SpecificValue::maxpos:
4✔
194
                        maxpos();
4✔
195
                        break;
4✔
196
                case SpecificValue::minpos:
4✔
197
                        minpos();
4✔
198
                        break;
4✔
199
                case SpecificValue::zero:
×
200
                default:
201
                        zero();
×
202
                        break;
×
203
                case SpecificValue::minneg:
×
204
                        minneg();
×
205
                        break;
×
206
                case SpecificValue::maxneg:
×
207
                        maxneg();
×
208
                        break;
×
209
                case SpecificValue::infpos:
×
210
                        setinf(false);
×
211
                        break;
×
212
                case SpecificValue::infneg:
×
213
                        setinf(true);
×
214
                        break;
×
215
                case SpecificValue::nar:
×
216
                case SpecificValue::qnan:
217
                        setnan(NAN_TYPE_QUIET);
×
218
                        break;
×
219
                case SpecificValue::snan:
×
220
                        setnan(NAN_TYPE_SIGNALLING);
×
221
                        break;
×
222
                }
223
        }
8✔
224

225
        // initializers for native types
226
        constexpr explicit dfloat(signed char iv)        noexcept { clear(); *this = iv; }
227
        constexpr explicit dfloat(short iv)              noexcept { clear(); *this = iv; }
228
        constexpr explicit dfloat(int iv)                noexcept { clear(); *this = iv; }
23✔
229
        constexpr explicit dfloat(long iv)               noexcept { clear(); *this = iv; }
230
        constexpr explicit dfloat(long long iv)          noexcept { clear(); *this = iv; }
231
        constexpr explicit dfloat(char iv)               noexcept { clear(); *this = iv; }
232
        constexpr explicit dfloat(unsigned short iv)     noexcept { clear(); *this = iv; }
233
        constexpr explicit dfloat(unsigned int iv)       noexcept { clear(); *this = iv; }
6✔
234
        constexpr explicit dfloat(unsigned long iv)      noexcept { clear(); *this = iv; }
235
        constexpr explicit dfloat(unsigned long long iv) noexcept { clear(); *this = iv; }
236
        constexpr explicit dfloat(float iv)              noexcept { clear(); *this = iv; }
237
        constexpr explicit dfloat(double iv)             noexcept { clear(); *this = iv; }
407✔
238

239
        // assignment operators for native types
240
        constexpr dfloat& operator=(signed char rhs)        noexcept { return convert_signed(rhs); }
241
        constexpr dfloat& operator=(short rhs)              noexcept { return convert_signed(rhs); }
242
        constexpr dfloat& operator=(int rhs)                noexcept { return convert_signed(rhs); }
29✔
243
        constexpr dfloat& operator=(long rhs)               noexcept { return convert_signed(rhs); }
244
        constexpr dfloat& operator=(long long rhs)          noexcept { return convert_signed(rhs); }
245
        // Plain `char` may be signed or unsigned per platform; route through the
246
        // signed conversion via integer promotion so dfloat(char(-1)) on signed-char
247
        // targets yields -1, not UCHAR_MAX.
248
        constexpr dfloat& operator=(char rhs)               noexcept { return convert_signed(static_cast<int>(rhs)); }
249
        constexpr dfloat& operator=(unsigned short rhs)     noexcept { return convert_unsigned(rhs); }
250
        constexpr dfloat& operator=(unsigned int rhs)       noexcept { return convert_unsigned(rhs); }
6✔
251
        constexpr dfloat& operator=(unsigned long rhs)      noexcept { return convert_unsigned(rhs); }
252
        constexpr dfloat& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); }
253
        constexpr dfloat& operator=(float rhs)              noexcept { return convert_ieee754(rhs); }
254
        constexpr dfloat& operator=(double rhs)             noexcept { return convert_ieee754(rhs); }
407✔
255

256
        // conversion operators
257
        constexpr explicit operator float()           const noexcept { return float(convert_to_double()); }
258
        constexpr explicit operator double()          const noexcept { return convert_to_double(); }
389✔
259

260
#if LONG_DOUBLE_SUPPORT
261
        constexpr explicit dfloat(long double iv)           noexcept { clear(); *this = iv; }
262
        constexpr dfloat& operator=(long double rhs)        noexcept { return convert_ieee754(double(rhs)); }
263
        constexpr explicit operator long double()     const noexcept { return (long double)convert_to_double(); }
264
#endif
265

266
        // prefix operators
267
        constexpr dfloat operator-() const {
29✔
268
                dfloat negated(*this);
29✔
269
                if (!negated.iszero()) {
29✔
270
                        negated.setsign(!negated.sign());
24✔
271
                }
272
                return negated;
29✔
273
        }
274

275
        // arithmetic operators
276
        constexpr dfloat& operator+=(const dfloat& rhs) {
208✔
277
                // unpack both operands
278
                bool lhs_sign, rhs_sign;
279
                int lhs_exp, rhs_exp;
280
                significand_t lhs_sig, rhs_sig;
281
                unpack(lhs_sign, lhs_exp, lhs_sig);
208✔
282
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
208✔
283

284
                // handle special values
285
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
208✔
286
                if (isinf() && rhs.isinf()) {
207✔
287
                        if (lhs_sign != rhs_sign) { setnan(NAN_TYPE_QUIET); return *this; } // inf + (-inf) = NaN
1✔
288
                        return *this; // same sign inf
×
289
                }
290
                if (isinf()) return *this;
206✔
291
                if (rhs.isinf()) { *this = rhs; return *this; }
205✔
292
                if (rhs.iszero()) return *this;
205✔
293
                if (iszero()) { *this = rhs; return *this; }
201✔
294

295
                // align exponents by scaling the higher-exponent significand UP
296
                // result exponent = min(lhs_exp, rhs_exp)
297
                int shift = lhs_exp - rhs_exp;
197✔
298
                int abs_shift = (shift >= 0) ? shift : -shift;
197✔
299

300
                // When the magnitude difference exceeds the precision, the smaller
301
                // operand cannot contribute any digits to the result -- short-circuit.
302
                if (abs_shift >= static_cast<int>(ndigits)) {
197✔
303
                        if (shift > 0) return *this;       // lhs dominates
×
304
                        *this = rhs; return *this;         // rhs dominates
×
305
                }
306

307
                int result_exp;
308
                bool result_sign;
309
                significand_t abs_sig;
310

311
                // Unified path using blockbinary significand_t
312
                significand_t aligned_lhs(lhs_sig);
197✔
313
                significand_t aligned_rhs(rhs_sig);
197✔
314
                significand_t ten(10);
197✔
315

316
                if (shift >= 0) {
197✔
317
                        result_exp = rhs_exp;
158✔
318
                        for (int i = 0; i < shift; ++i) aligned_lhs *= ten;
268✔
319
                }
320
                else {
321
                        result_exp = lhs_exp;
39✔
322
                        for (int i = 0; i < -shift; ++i) aligned_rhs *= ten;
119✔
323
                }
324

325
                if (lhs_sign == rhs_sign) {
197✔
326
                        abs_sig = aligned_lhs + aligned_rhs;
110✔
327
                        result_sign = lhs_sign;
110✔
328
                }
329
                else {
330
                        if (aligned_lhs >= aligned_rhs) {
87✔
331
                                abs_sig = aligned_lhs - aligned_rhs;
59✔
332
                                result_sign = lhs_sign;
59✔
333
                        }
334
                        else {
335
                                abs_sig = aligned_rhs - aligned_lhs;
28✔
336
                                result_sign = rhs_sign;
28✔
337
                        }
338
                }
339

340
                // normalize to ndigits precision
341
                normalize_and_pack(result_sign, result_exp, abs_sig);
197✔
342
                return *this;
197✔
343
        }
344
        constexpr dfloat& operator-=(const dfloat& rhs) {
63✔
345
                dfloat neg(rhs);
63✔
346
                if (!neg.iszero()) neg.setsign(!neg.sign());
63✔
347
                return operator+=(neg);
126✔
348
        }
349
        constexpr dfloat& operator*=(const dfloat& rhs) {
99✔
350
                bool lhs_sign, rhs_sign;
351
                int lhs_exp, rhs_exp;
352
                significand_t lhs_sig, rhs_sig;
353
                unpack(lhs_sign, lhs_exp, lhs_sig);
99✔
354
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
99✔
355

356
                // handle special values
357
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
99✔
358
                if (isinf() || rhs.isinf()) {
98✔
359
                        if (iszero() || rhs.iszero()) { setnan(NAN_TYPE_QUIET); return *this; } // 0 * inf = NaN
2✔
360
                        setinf(lhs_sign != rhs_sign);
1✔
361
                        return *this;
1✔
362
                }
363
                if (iszero() || rhs.iszero()) { setzero(); return *this; }
96✔
364

365
                bool result_sign = (lhs_sign != rhs_sign);
93✔
366
                int result_exp = lhs_exp + rhs_exp;
93✔
367

368
                // Wide multiplication: urmul returns blockbinary<2*sig_bits>
369
                wide_significand_t wide = urmul(lhs_sig, rhs_sig);
93✔
370
                wide_significand_t ten_w(10);
93✔
371

372
                // Count digits in wide result and trim to ndigits
373
                // Use a helper to count digits of the wide result
374
                unsigned wd = 0;
93✔
375
                {
376
                        wide_significand_t tmp(wide);
93✔
377
                        if (tmp.iszero()) { wd = 1; }
93✔
378
                        else { while (!tmp.iszero()) { tmp /= ten_w; ++wd; } }
243✔
379
                }
380
                while (wd > ndigits) {
93✔
381
                        wide /= ten_w;
×
382
                        result_exp++;
×
383
                        wd--;
×
384
                }
385

386
                // Truncate wide result to significand_t
387
                significand_t result_sig;
388
                result_sig.assign(wide);
93✔
389

390
                normalize_and_pack(result_sign, result_exp, result_sig);
93✔
391
                return *this;
93✔
392
        }
393
        constexpr dfloat& operator/=(const dfloat& rhs) {
25✔
394
                bool lhs_sign, rhs_sign;
395
                int lhs_exp, rhs_exp;
396
                significand_t lhs_sig, rhs_sig;
397
                unpack(lhs_sign, lhs_exp, lhs_sig);
25✔
398
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
25✔
399

400
                // handle special values
401
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
25✔
402
                if (isinf() && rhs.isinf()) { setnan(NAN_TYPE_QUIET); return *this; }
24✔
403
                if (rhs.iszero()) {
23✔
404
#if DFLOAT_THROW_ARITHMETIC_EXCEPTION
405
                        // Throw is ill-formed in a constant expression; gate so callers
406
                        // using DFLOAT_THROW_ARITHMETIC_EXCEPTION=0 can still divide
407
                        // inside constexpr (returns NaN/+/-inf as IEEE 754-2008 mandates).
408
                        if (!std::is_constant_evaluated()) {
409
                                throw dfloat_divide_by_zero();
410
                        }
411
                        if (iszero()) { setnan(NAN_TYPE_QUIET); return *this; } // 0/0
412
                        setinf(lhs_sign != rhs_sign);
413
                        return *this;
414
#else
415
                        if (iszero()) { setnan(NAN_TYPE_QUIET); return *this; } // 0/0
2✔
416
                        setinf(lhs_sign != rhs_sign);
1✔
417
                        return *this;
1✔
418
#endif
419
                }
420
                if (iszero()) { setzero(); return *this; }
21✔
421
                if (isinf()) { setsign(lhs_sign != rhs_sign); return *this; }
20✔
422

423
                bool result_sign = (lhs_sign != rhs_sign);
19✔
424
                int result_exp = lhs_exp - rhs_exp;
19✔
425

426
                // Unified iterative long division using blockbinary
427
                significand_t remainder(lhs_sig);
19✔
428
                significand_t quotient(0);
19✔
429
                significand_t ten(10);
19✔
430
                for (unsigned i = 0; i < ndigits; ++i) {
179✔
431
                        remainder *= ten;
160✔
432
                        quotient = quotient * ten + remainder / rhs_sig;
160✔
433
                        remainder = remainder % rhs_sig;
160✔
434
                }
435
                result_exp -= static_cast<int>(ndigits);
19✔
436

437
                normalize_and_pack(result_sign, result_exp, quotient);
19✔
438
                return *this;
19✔
439
        }
440

441
        // unary operators: advance to next/previous representable value
442
        constexpr dfloat& operator++() {
2✔
443
                if (isnan() || isinf()) return *this;
2✔
444
                if (iszero()) { *this = dfloat(SpecificValue::minpos); return *this; }
2✔
445
                bool s; int exp; significand_t sig;
446
                unpack(s, exp, sig);
2✔
447
                // Normalize significand to exactly ndigits decimal digits
448
                // so that incrementing by 1 gives the true next representable value.
449
                significand_t lo_bound = pow10_s(ndigits - 1);
2✔
450
                significand_t hi_bound = pow10_s(ndigits);
2✔
451
                while (sig < lo_bound && exp > emin) {
14✔
452
                        sig *= significand_t(10);
12✔
453
                        --exp;
12✔
454
                }
455
                if (s) {
2✔
456
                        // Negative: next = closer to zero = decrement magnitude
457
                        sig -= significand_t(1);
×
458
                        if (sig.iszero()) { setzero(); return *this; }
×
459
                        if (sig < lo_bound) {
×
460
                                sig = hi_bound - significand_t(1);
×
461
                                --exp;
×
462
                                if (exp < emin) { setzero(); return *this; }
×
463
                        }
464
                } else {
465
                        // Positive: next = increment significand
466
                        sig += significand_t(1);
2✔
467
                        if (sig >= hi_bound) {
2✔
468
                                sig = lo_bound;
×
469
                                ++exp;
×
470
                                if (exp > emax) { setinf(false); return *this; }
×
471
                        }
472
                }
473
                pack(s, exp, sig);
2✔
474
                return *this;
2✔
475
        }
476
        constexpr dfloat operator++(int) {
477
                dfloat tmp(*this);
478
                operator++();
479
                return tmp;
480
        }
481
        constexpr dfloat& operator--() {
2✔
482
                if (isnan() || isinf()) return *this;
2✔
483
                if (iszero()) { *this = dfloat(SpecificValue::minneg); return *this; }
2✔
484
                bool s; int exp; significand_t sig;
485
                unpack(s, exp, sig);
2✔
486
                // Normalize significand to exactly ndigits decimal digits
487
                significand_t lo_bound = pow10_s(ndigits - 1);
2✔
488
                significand_t hi_bound = pow10_s(ndigits);
2✔
489
                while (sig < lo_bound && exp > emin) {
14✔
490
                        sig *= significand_t(10);
12✔
491
                        --exp;
12✔
492
                }
493
                if (s) {
2✔
494
                        // Negative: previous = farther from zero = increment magnitude
495
                        sig += significand_t(1);
×
496
                        if (sig >= hi_bound) {
×
497
                                sig = lo_bound;
×
498
                                ++exp;
×
499
                                if (exp > emax) { setinf(true); return *this; }
×
500
                        }
501
                } else {
502
                        // Positive: previous = decrement significand
503
                        sig -= significand_t(1);
2✔
504
                        if (sig.iszero()) { setzero(); return *this; }
2✔
505
                        if (sig < lo_bound) {
2✔
506
                                sig = hi_bound - significand_t(1);
2✔
507
                                --exp;
2✔
508
                                if (exp < emin) { setzero(); return *this; }
2✔
509
                        }
510
                }
511
                pack(s, exp, sig);
2✔
512
                return *this;
2✔
513
        }
514
        constexpr dfloat operator--(int) {
1✔
515
                dfloat tmp(*this);
1✔
516
                operator--();
1✔
517
                return tmp;
1✔
518
        }
519

520
        // modifiers
521
        constexpr void clear() noexcept {
1,255✔
522
                _encoding.clear();
1,255✔
523
        }
1,255✔
524
        constexpr void setzero() noexcept { clear(); }
45✔
525

526
        constexpr void setinf(bool negative = true) noexcept {
6✔
527
                clear();
6✔
528
                // combination field = 11110 -> bits: sign | 11110 | 0...0
529
                // set sign
530
                setbit(nbits - 1, negative);
6✔
531
                // set combination field bits to 11110
532
                unsigned combStart = nbits - 2; // MSB of combination
6✔
533
                setbit(combStart,     true);   // a = 1
6✔
534
                setbit(combStart - 1, true);   // b = 1
6✔
535
                setbit(combStart - 2, true);   // c = 1
6✔
536
                setbit(combStart - 3, true);   // d = 1
6✔
537
                setbit(combStart - 4, false);  // e = 0
6✔
538
        }
6✔
539

540
        constexpr void setnan(int NaNType = NAN_TYPE_SIGNALLING) noexcept {
10✔
541
                clear();
10✔
542
                // combination field = 11111
543
                unsigned combStart = nbits - 2;
10✔
544
                setbit(combStart,     true);
10✔
545
                setbit(combStart - 1, true);
10✔
546
                setbit(combStart - 2, true);
10✔
547
                setbit(combStart - 3, true);
10✔
548
                setbit(combStart - 4, true);
10✔
549
                if (NaNType == NAN_TYPE_QUIET) {
10✔
550
                        // set MSB of trailing significand for quiet NaN
551
                        if (t > 0) setbit(t - 1, true);
10✔
552
                }
553
        }
10✔
554

555
        constexpr void setsign(bool negative = true) noexcept {
91✔
556
                setbit(nbits - 1, negative);
91✔
557
        }
91✔
558

559
        // use un-interpreted raw bits to set the value of the dfloat
560
        constexpr void setbits(uint64_t value) noexcept {
561
                _encoding.setbits(value);
562
        }
563

564
        // create specific number system values of interest
565
        constexpr dfloat& maxpos() noexcept {
4✔
566
                clear();
4✔
567
                significand_t max_sig = pow10_s(ndigits) - significand_t(1);
4✔
568
                pack(false, emax, max_sig);
4✔
569
                return *this;
4✔
570
        }
571
        constexpr dfloat& minpos() noexcept {
4✔
572
                clear();
4✔
573
                pack(false, emin, significand_t(1));
4✔
574
                return *this;
4✔
575
        }
576
        constexpr dfloat& zero() noexcept {
×
577
                clear();
×
578
                return *this;
×
579
        }
580
        constexpr dfloat& minneg() noexcept {
×
581
                clear();
×
582
                pack(true, emin, significand_t(1));
×
583
                return *this;
×
584
        }
585
        constexpr dfloat& maxneg() noexcept {
×
586
                clear();
×
587
                significand_t max_sig = pow10_s(ndigits) - significand_t(1);
×
588
                pack(true, emax, max_sig);
×
589
                return *this;
×
590
        }
591

592
        dfloat& assign(const std::string& txt) {
13✔
593
                clear();
13✔
594
                if (txt.empty()) return *this;
13✔
595

596
                // Skip leading whitespace
597
                size_t pos = 0;
13✔
598
                while (pos < txt.size() && std::isspace(static_cast<unsigned char>(txt[pos]))) ++pos;
13✔
599
                if (pos >= txt.size()) return *this;
13✔
600

601
                // Check for sign
602
                bool negative = false;
13✔
603
                if (txt[pos] == '-') { negative = true; ++pos; }
13✔
604
                else if (txt[pos] == '+') { ++pos; }
10✔
605

606
                // Check for special values (case-insensitive)
607
                std::string rest = txt.substr(pos);
13✔
608
                if (rest.size() >= 3) {
13✔
609
                        char c0 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[0])));
12✔
610
                        char c1 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[1])));
12✔
611
                        char c2 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[2])));
12✔
612
                        if (c0 == 'i' && c1 == 'n' && c2 == 'f') { setinf(negative); return *this; }
12✔
613
                        if (c0 == 'n' && c1 == 'a' && c2 == 'n') { setnan(NAN_TYPE_QUIET); return *this; }
8✔
614
                }
615

616
                // Parse decimal digits, collecting significand and tracking decimal point
617
                // Input forms: "123", "123.456", ".456", "123.", "123.456e-78", "123e5"
618
                significand_t sig(0);
6✔
619
                significand_t ten(10);
6✔
620
                unsigned digit_count = 0;
6✔
621
                int decimal_exponent = 0;
6✔
622
                bool seen_dot = false;
6✔
623
                int frac_digits = 0;
6✔
624

625
                // Parse integer and fractional parts
626
                while (pos < txt.size()) {
33✔
627
                        char ch = txt[pos];
29✔
628
                        if (ch == '.') {
29✔
629
                                if (seen_dot) break; // second dot ends parsing
5✔
630
                                seen_dot = true;
5✔
631
                                ++pos;
5✔
632
                                continue;
5✔
633
                        }
634
                        if (ch >= '0' && ch <= '9') {
24✔
635
                                if (digit_count < ndigits) {
22✔
636
                                        sig = sig * ten + significand_t(static_cast<long long>(ch - '0'));
22✔
637
                                        digit_count++;
22✔
638
                                }
639
                                else {
640
                                        // Beyond precision: count but don't store
641
                                        if (!seen_dot) decimal_exponent++;
×
642
                                }
643
                                if (seen_dot) frac_digits++;
22✔
644
                                ++pos;
22✔
645
                                continue;
22✔
646
                        }
647
                        break; // non-digit, non-dot ends the mantissa
2✔
648
                }
649

650
                // The significand represents: sig * 10^(-frac_digits)
651
                // So the base exponent before any explicit exponent is -frac_digits
652
                decimal_exponent -= frac_digits;
6✔
653

654
                // Parse optional exponent: e/E followed by optional sign and digits
655
                if (pos < txt.size() && (txt[pos] == 'e' || txt[pos] == 'E')) {
6✔
656
                        ++pos;
2✔
657
                        bool exp_neg = false;
2✔
658
                        if (pos < txt.size() && txt[pos] == '-') { exp_neg = true; ++pos; }
2✔
659
                        else if (pos < txt.size() && txt[pos] == '+') { ++pos; }
1✔
660

661
                        int exp_val = 0;
2✔
662
                        while (pos < txt.size() && txt[pos] >= '0' && txt[pos] <= '9') {
5✔
663
                                exp_val = exp_val * 10 + (txt[pos] - '0');
3✔
664
                                ++pos;
3✔
665
                        }
666
                        decimal_exponent += exp_neg ? -exp_val : exp_val;
2✔
667
                }
668

669
                // Remove trailing zeros from significand (normalize)
670
                while (!sig.iszero() && digit_count > 1) {
7✔
671
                        significand_t remainder = sig % ten;
5✔
672
                        if (!remainder.iszero()) break;
5✔
673
                        sig /= ten;
1✔
674
                        decimal_exponent++;
1✔
675
                        digit_count--;
1✔
676
                }
677

678
                if (sig.iszero()) {
6✔
679
                        setzero();
1✔
680
                        if (negative) setsign(true);
1✔
681
                        return *this;
1✔
682
                }
683

684
                normalize_and_pack(negative, decimal_exponent, sig);
5✔
685
                return *this;
5✔
686
        }
13✔
687

688
        // selectors
689
        constexpr bool sign() const noexcept {
1,441✔
690
                return getbit(nbits - 1);
1,441✔
691
        }
692

693
        constexpr bool iszero() const noexcept {
2,482✔
694
                // zero when all bits except sign are 0
695
                // Check all bits except the sign bit (nbits-1)
696
                for (unsigned i = 0; i < nbits - 1; ++i) {
7,187✔
697
                        if (_encoding.at(i)) return false;
7,103✔
698
                }
699
                return true;
84✔
700
        }
701

702
        constexpr bool isone() const noexcept {
2✔
703
                bool s; int e; significand_t sig;
704
                unpack(s, e, sig);
2✔
705
                return !s && (sig == significand_t(1)) && (e == 0);
2✔
706
        }
707

708
        constexpr bool ispos() const noexcept { return !sign(); }
709
        constexpr bool isneg() const noexcept { return sign(); }
710

711
        constexpr bool isinf() const noexcept {
2,601✔
712
                // combination field == 11110
713
                unsigned combStart = nbits - 2;
2,601✔
714
                return getbit(combStart) && getbit(combStart - 1) &&
2,837✔
715
                       getbit(combStart - 2) && getbit(combStart - 3) &&
2,890✔
716
                       !getbit(combStart - 4);
2,654✔
717
        }
718

719
        constexpr bool isnan() const noexcept {
2,401✔
720
                // combination field == 11111
721
                unsigned combStart = nbits - 2;
2,401✔
722
                return getbit(combStart) && getbit(combStart - 1) &&
2,627✔
723
                       getbit(combStart - 2) && getbit(combStart - 3) &&
2,675✔
724
                       getbit(combStart - 4);
2,449✔
725
        }
726

727
        constexpr bool isnan(int NaNType) const noexcept {
728
                if (!isnan()) return false;
729
                if (NaNType == NAN_TYPE_QUIET) {
730
                        return (t > 0) ? getbit(t - 1) : true;
731
                }
732
                else {
733
                        return (t > 0) ? !getbit(t - 1) : true;
734
                }
735
        }
736

737
        constexpr int scale() const noexcept {
12✔
738
                if (iszero() || isinf() || isnan()) return 0;
12✔
739
                bool s; int e; significand_t sig;
740
                unpack(s, e, sig);
12✔
741
                // scale in powers of 10
742
                return e + static_cast<int>(count_digits_s(sig)) - 1;
12✔
743
        }
744

745
        // Format modes for str()
746
        enum class FmtMode { automatic, fixed, scientific };
747

748
        // convert to string
749
        // precision: number of significant digits (0 = ndigits)
750
        // mode: automatic (default), fixed, or scientific
751
        std::string str(size_t precision = 0, FmtMode mode = FmtMode::automatic) const {
99✔
752
                if (isnan()) return std::string("nan");
103✔
753
                if (isinf()) return sign() ? std::string("-inf") : std::string("inf");
127✔
754
                if (iszero()) return sign() ? std::string("-0") : std::string("0");
84✔
755

756
                bool s; int e; significand_t sig;
757
                unpack(s, e, sig);
81✔
758

759
                // value = (-1)^s * sig * 10^e
760
                std::string digits = sig_to_string(sig);
81✔
761
                int num_digits = static_cast<int>(digits.size());
81✔
762
                int decimal_pos = num_digits + e; // position of decimal point from left
81✔
763

764
                // Determine effective precision (number of significant digits to show)
765
                size_t prec = (precision > 0) ? precision : static_cast<size_t>(ndigits);
81✔
766
                // Trim digits to requested precision
767
                if (digits.size() > prec) {
81✔
768
                        digits.resize(prec);
×
769
                }
770
                num_digits = static_cast<int>(digits.size());
81✔
771

772
                // Determine format mode
773
                // automatic: use scientific when the exponent would produce more than
774
                //            ndigits leading/trailing zeros, otherwise use fixed
775
                if (mode == FmtMode::automatic) {
81✔
776
                        if (decimal_pos > static_cast<int>(ndigits) || decimal_pos < -static_cast<int>(ndigits / 2)) {
81✔
777
                                mode = FmtMode::scientific;
35✔
778
                        }
779
                        else {
780
                                mode = FmtMode::fixed;
46✔
781
                        }
782
                }
783

784
                std::string result;
81✔
785
                if (s) result = "-";
81✔
786

787
                if (mode == FmtMode::scientific) {
81✔
788
                        // Scientific notation: d.ddd...e+/-NNN
789
                        result += digits[0];
35✔
790
                        if (num_digits > 1) {
35✔
791
                                result += '.';
18✔
792
                                result += digits.substr(1);
18✔
793
                        }
794
                        // exponent = decimal_pos - 1 (since we placed decimal after first digit)
795
                        int sci_exp = decimal_pos - 1;
35✔
796
                        result += 'e';
35✔
797
                        if (sci_exp >= 0) {
35✔
798
                                result += '+';
17✔
799
                        }
800
                        result += std::to_string(sci_exp);
35✔
801
                }
802
                else {
803
                        // Fixed notation
804
                        if (decimal_pos <= 0) {
46✔
805
                                // value < 1: 0.000...digits
806
                                result += "0.";
11✔
807
                                for (int i = 0; i < -decimal_pos; ++i) result += '0';
11✔
808
                                result += digits;
11✔
809
                        }
810
                        else if (decimal_pos >= num_digits) {
35✔
811
                                // integer value
812
                                result += digits;
25✔
813
                                for (int i = 0; i < decimal_pos - num_digits; ++i) result += '0';
42✔
814
                                result += ".0";
25✔
815
                        }
816
                        else {
817
                                // mixed: some digits before and after decimal
818
                                result += digits.substr(0, static_cast<size_t>(decimal_pos));
10✔
819
                                result += '.';
10✔
820
                                result += digits.substr(static_cast<size_t>(decimal_pos));
10✔
821
                        }
822
                }
823

824
                return result;
81✔
825
        }
81✔
826

827
        ///////////////////////////////////////////////////////////////////
828
        // Bit access (public for free functions like to_binary, color_print)
829
        constexpr bool getbit(unsigned pos) const noexcept {
60,343✔
830
                if (pos >= nbits) return false;
60,343✔
831
                return _encoding.at(pos);
60,343✔
832
        }
833

834
        ///////////////////////////////////////////////////////////////////
835
        // Unpacking / Packing helpers (public for testing)
836

837
        // Unpack the dfloat into sign, unbiased exponent, and significand integer
838
        constexpr void unpack(bool& s, int& exponent, significand_t& significand) const noexcept {
1,176✔
839
                s = sign();
1,176✔
840
                if (iszero()) { exponent = 0; significand = 0; return; }
1,176✔
841
                if (isinf() || isnan()) { exponent = 0; significand = 0; return; }
1,158✔
842

843
                // Extract combination field (5 bits)
844
                unsigned combStart = nbits - 2;
1,147✔
845
                bool a = getbit(combStart);
1,147✔
846
                bool b = getbit(combStart - 1);
1,147✔
847
                bool c = getbit(combStart - 2);
1,147✔
848
                bool d = getbit(combStart - 3);
1,147✔
849
                bool e_bit = getbit(combStart - 4);
1,147✔
850

851
                unsigned exp_msbs;
852
                unsigned msd; // most significant digit
853

854
                if (!(a && b)) {
1,147✔
855
                        // ab != 11: exp MSBs = ab, MSD = 0cde
856
                        exp_msbs = (a ? 2u : 0u) + (b ? 1u : 0u);
1,115✔
857
                        msd = (c ? 4u : 0u) + (d ? 2u : 0u) + (e_bit ? 1u : 0u);
1,115✔
858
                }
859
                else {
860
                        // ab == 11, c determines large digit vs special
861
                        // cd are exp MSBs, MSD = 100e (digit 8 or 9)
862
                        exp_msbs = (c ? 2u : 0u) + (d ? 1u : 0u);
32✔
863
                        msd = 8u + (e_bit ? 1u : 0u);
32✔
864
                }
865

866
                // Extract exponent continuation (es bits after combination field)
867
                unsigned exp_cont = 0;
1,147✔
868
                unsigned bitpos = nbits - 1 - 1 - combBits; // first bit of exponent continuation
1,147✔
869
                for (unsigned i = 0; i < es; ++i) {
8,863✔
870
                        if (getbit(bitpos - i)) {
7,716✔
871
                                exp_cont |= (1u << (es - 1 - i));
3,486✔
872
                        }
873
                }
874

875
                unsigned biased_exp = (exp_msbs << es) | exp_cont;
1,147✔
876
                exponent = static_cast<int>(biased_exp) - bias;
1,147✔
877

878
                // Extract trailing significand (t bits) using blockbinary
879
                if constexpr (encoding == DecimalEncoding::BID) {
880
                        // Read trailing bits directly from encoding into a significand_t
881
                        significand_t trailing(0);
1,050✔
882
                        for (unsigned i = 0; i < t; ++i) {
30,810✔
883
                                if (getbit(i)) trailing.setbit(i, true);
29,760✔
884
                        }
885
                        significand = significand_t(static_cast<long long>(msd)) * pow10_s(ndigits - 1) + trailing;
1,050✔
886
                }
887
                else {
888
                        // DPD: decode declets from trailing bits
889
                        significand = dpd_decode_trailing_wide(msd);
97✔
890
                }
891
        }
892

893
protected:
894
        encoding_t _encoding;
895

896
        ///////////////////////////////////////////////////////////////////
897
        // Bit manipulation helpers
898
        constexpr void setbit(unsigned pos, bool value) noexcept {
30,939✔
899
                if (pos >= nbits) return;
30,939✔
900
                _encoding.setbit(pos, value);
30,939✔
901
        }
902

903
        ///////////////////////////////////////////////////////////////////
904
        // Pack sign, unbiased exponent, and significand into the dfloat encoding
905
        constexpr void pack(bool s, int exponent, const significand_t& significand) noexcept {
728✔
906
                clear();
728✔
907
                if (significand.iszero()) return; // zero
728✔
908

909
                // Determine MSD and trailing
910
                significand_t msd_val = significand / pow10_s(ndigits - 1);
728✔
911
                unsigned msd = static_cast<unsigned>(static_cast<long long>(msd_val));
728✔
912

913
                unsigned biased_exp = static_cast<unsigned>(exponent + bias);
728✔
914

915
                // Encode sign
916
                setbit(nbits - 1, s);
728✔
917

918
                // Encode combination field
919
                unsigned exp_msbs = (biased_exp >> es) & 0x3u;
728✔
920
                unsigned combStart = nbits - 2;
728✔
921

922
                if (msd < 8) {
728✔
923
                        setbit(combStart,     (exp_msbs >> 1) & 1);
716✔
924
                        setbit(combStart - 1, exp_msbs & 1);
716✔
925
                        setbit(combStart - 2, (msd >> 2) & 1);
716✔
926
                        setbit(combStart - 3, (msd >> 1) & 1);
716✔
927
                        setbit(combStart - 4, msd & 1);
716✔
928
                }
929
                else {
930
                        setbit(combStart,     true);
12✔
931
                        setbit(combStart - 1, true);
12✔
932
                        setbit(combStart - 2, (exp_msbs >> 1) & 1);
12✔
933
                        setbit(combStart - 3, exp_msbs & 1);
12✔
934
                        setbit(combStart - 4, msd & 1);
12✔
935
                }
936

937
                // Encode exponent continuation (es bits)
938
                unsigned exp_cont = biased_exp & ((1u << es) - 1u);
728✔
939
                unsigned bitpos = nbits - 1 - 1 - combBits;
728✔
940
                for (unsigned i = 0; i < es; ++i) {
5,562✔
941
                        setbit(bitpos - i, (exp_cont >> (es - 1 - i)) & 1);
4,834✔
942
                }
943

944
                // Encode trailing significand (t bits)
945
                if constexpr (encoding == DecimalEncoding::BID) {
946
                        significand_t trailing = significand % pow10_s(ndigits - 1);
657✔
947
                        // Extract bits from blockbinary significand_t and write into encoding
948
                        for (unsigned i = 0; i < t; ++i) {
17,997✔
949
                                setbit(i, trailing.at(i));
17,340✔
950
                        }
951
                }
952
                else {
953
                        // DPD encoding: encode and write declets directly into bits
954
                        dpd_encode_trailing_wide(significand);
71✔
955
                }
956
        }
957

958
        ///////////////////////////////////////////////////////////////////
959
        // Normalize significand to ndigits and pack
960
        constexpr void normalize_and_pack(bool s, int exponent, significand_t significand) noexcept {
733✔
961
                if (significand.iszero()) { setzero(); if (s) setsign(true); return; }
733✔
962

963
                // Normalize: ensure significand has exactly ndigits digits
964
                significand_t ten(10);
716✔
965
                unsigned digits = count_digits_s(significand);
716✔
966
                while (digits > ndigits) {
730✔
967
                        significand /= ten;
14✔
968
                        exponent++;
14✔
969
                        digits--;
14✔
970
                }
971
                // No need to scale up - smaller significands are valid
972

973
                // Check for overflow/underflow
974
                if (exponent > emax) {
716✔
975
                        setinf(s);
×
976
                        return;
×
977
                }
978
                if (exponent < emin) {
716✔
979
                        // underflow to zero
980
                        setzero();
×
981
                        if (s) setsign(true);
×
982
                        return;
×
983
                }
984

985
                pack(s, exponent, significand);
716✔
986
        }
987

988
        ///////////////////////////////////////////////////////////////////
989
        // DPD encode/decode helpers (unified for all widths)
990

991
        // DPD decode: read declets directly from encoding bits
992
        constexpr significand_t dpd_decode_trailing_wide(unsigned msd) const noexcept {
97✔
993
                significand_t result(0);
97✔
994
                significand_t multiplier(1);
97✔
995
                significand_t thousand(1000);
97✔
996
                unsigned remaining = ndigits - 1;
97✔
997
                unsigned bit_offset = 0;
97✔
998

999
                while (remaining >= 3) {
666✔
1000
                        // Read 10-bit declet from bit_offset
1001
                        uint16_t declet = 0;
569✔
1002
                        for (unsigned b = 0; b < 10; ++b) {
6,259✔
1003
                                if (getbit(bit_offset + b)) declet |= static_cast<uint16_t>(1u << b);
5,690✔
1004
                        }
1005
                        unsigned value = dpd_decode(declet);
569✔
1006
                        result += significand_t(static_cast<long long>(value)) * multiplier;
569✔
1007
                        multiplier *= thousand;
569✔
1008
                        bit_offset += 10;
569✔
1009
                        remaining -= 3;
569✔
1010
                }
1011

1012
                return significand_t(static_cast<long long>(msd)) * pow10_s(ndigits - 1) + result;
97✔
1013
        }
1014

1015
        // DPD encode: write declets directly into encoding bits
1016
        constexpr void dpd_encode_trailing_wide(const significand_t& significand) noexcept {
71✔
1017
                significand_t msd_factor = pow10_s(ndigits - 1);
71✔
1018
                significand_t trailing_val = significand % msd_factor;
71✔
1019
                significand_t thousand(1000);
71✔
1020
                unsigned remaining = ndigits - 1;
71✔
1021
                unsigned bit_offset = 0;
71✔
1022

1023
                while (remaining >= 3) {
492✔
1024
                        significand_t group_bb = trailing_val % thousand;
421✔
1025
                        unsigned group = static_cast<unsigned>(static_cast<long long>(group_bb));
421✔
1026
                        trailing_val /= thousand;
421✔
1027
                        uint16_t declet = dpd_encode(group);
421✔
1028
                        for (unsigned b = 0; b < 10; ++b) {
4,631✔
1029
                                setbit(bit_offset + b, (declet >> b) & 1);
4,210✔
1030
                        }
1031
                        bit_offset += 10;
421✔
1032
                        remaining -= 3;
421✔
1033
                }
1034
        }
71✔
1035

1036
        ///////////////////////////////////////////////////////////////////
1037
        // Conversion helpers
1038

1039
        // Convert native IEEE-754 double to dfloat
1040
        //
1041
        // Constexpr-safe: replaces std::isnan/isinf/signbit/fabs/floor/log10/pow/round
1042
        // with constexpr equivalents.  See dfixpnt PR #803 for the same pattern.
1043
        //
1044
        // - NaN detection: rhs != rhs (NaN is the only value not equal to itself)
1045
        // - Infinity detection: |rhs| > DBL_MAX is not constexpr-friendly; we instead
1046
        //   check rhs - rhs (NaN for inf-inf, 0 for finite) -- but rhs-rhs is NaN
1047
        //   only on infinity AND on NaN. We've already filtered NaN above, so a
1048
        //   non-zero rhs whose 2*rhs equals rhs (only true for +/-inf) is infinite.
1049
        //   Simpler: check whether rhs > std::numeric_limits<double>::max() (which
1050
        //   IS constexpr on GCC/Clang).
1051
        // - Sign of -0.0: cannot be detected by 'rhs < 0' alone; use std::signbit
1052
        //   under runtime, and rely on -0.0 == 0.0 short-circuit (-0.0 cleared
1053
        //   here intentionally; sign is set when rhs < 0 in the non-zero path).
1054
        // - log10/pow/floor: replaced by integer loops over the pow10_64 table.
1055
        // - round: replaced by static_cast<uint64_t>(scaled + 0.5) trick (dfixpnt).
1056
        constexpr dfloat& convert_ieee754(double rhs) noexcept {
407✔
1057
                // NaN: only value where x != x
1058
                if (rhs != rhs) {
407✔
1059
                        setnan(NAN_TYPE_QUIET);
×
1060
                        return *this;
×
1061
                }
1062
                // Infinity: x > DBL_MAX or x < -DBL_MAX (numeric_limits constexpr)
1063
                constexpr double dbl_max = std::numeric_limits<double>::max();
407✔
1064
                if (rhs > dbl_max) { setinf(false); return *this; }
407✔
1065
                if (rhs < -dbl_max) { setinf(true); return *this; }
407✔
1066
                if (rhs == 0.0) {
407✔
1067
                        setzero();
19✔
1068
                        // Detect -0.0 by inspecting the sign bit directly.  sw::bit_cast
1069
                        // is constexpr only when the compiler exposes std::bit_cast or
1070
                        // __builtin_bit_cast; on older toolchains it falls back to a
1071
                        // non-constexpr memcpy implementation (see utility/bit_cast.hpp).
1072
                        // Guard so convert_ieee754() stays constexpr-clean everywhere;
1073
                        // platforms without constexpr bit_cast lose the -0.0 sign in
1074
                        // constant-evaluated calls (acceptable: -0.0 is rarely material
1075
                        // and the runtime path still preserves it via std::signbit).
1076
                        if constexpr (sw::is_bit_cast_constexpr_v) {
1077
                                if ((sw::bit_cast<uint64_t>(rhs) >> 63) != 0u) setsign(true);
19✔
1078
                        }
1079
                        else {
1080
                                if (!std::is_constant_evaluated()) {
1081
                                        if (std::signbit(rhs)) setsign(true);
1082
                                }
1083
                        }
1084
                        return *this;
19✔
1085
                }
1086

1087
                bool negative = (rhs < 0);
388✔
1088
                double abs_val = negative ? -rhs : rhs;
388✔
1089

1090
                // Compute floor(log10(abs_val)) without std::log10/floor.
1091
                // Double has ~15-17 significant digits. Scale by powers of 10 to
1092
                // bracket abs_val into [1, 10).
1093
                int dec_exp = 0;
388✔
1094
                double v = abs_val;
388✔
1095
                while (v >= 10.0) { v /= 10.0; ++dec_exp; }
687✔
1096
                while (v <  1.0)  { v *= 10.0; --dec_exp; }
458✔
1097
                // Now v in [1, 10) and abs_val == v * 10^dec_exp (modulo rounding).
1098

1099
                // Scale to get min(ndigits, 17) significant digits (double precision limit)
1100
                unsigned effective_digits = (ndigits < 17) ? ndigits : 17;
388✔
1101
                int target_exp = dec_exp - static_cast<int>(effective_digits) + 1;
388✔
1102
                // Compute scaled = abs_val / 10^target_exp.  decimal64/128 admit
1103
                // |target_exp| well above 19 (decimal128 has emax = 6144, so for
1104
                // extreme abs_val, target_exp - effective_digits + 1 can be ~6128).
1105
                // Apply the scaling in chunks of 10^19 (largest exact integer power
1106
                // of 10 fitting in uint64_t) so we never saturate.
1107
                double scaled = abs_val;
388✔
1108
                if (target_exp >= 0) {
388✔
1109
                        unsigned remaining = static_cast<unsigned>(target_exp);
8✔
1110
                        while (remaining >= 19u) {
8✔
1111
                                scaled /= static_cast<double>(pow10_64(19));
×
1112
                                remaining -= 19u;
×
1113
                        }
1114
                        if (remaining > 0u) {
8✔
1115
                                scaled /= static_cast<double>(pow10_64(remaining));
×
1116
                        }
1117
                }
1118
                else {
1119
                        unsigned remaining = static_cast<unsigned>(-target_exp);
380✔
1120
                        while (remaining >= 19u) {
380✔
1121
                                scaled *= static_cast<double>(pow10_64(19));
×
1122
                                remaining -= 19u;
×
1123
                        }
1124
                        if (remaining > 0u) {
380✔
1125
                                scaled *= static_cast<double>(pow10_64(remaining));
380✔
1126
                        }
1127
                }
1128
                // Round-half-up via floor(x + 0.5).  scaled is positive here.
1129
                // Cast to uint64_t truncates (same as std::round for positive values
1130
                // after the +0.5 nudge).
1131
                uint64_t sig_narrow = static_cast<uint64_t>(scaled + 0.5);
388✔
1132

1133
                // Adjust if rounding pushed us over
1134
                uint64_t limit = pow10_64(effective_digits);
388✔
1135
                if (sig_narrow >= limit) {
388✔
1136
                        sig_narrow /= 10;
×
1137
                        target_exp++;
×
1138
                }
1139
                // Remove trailing zeros
1140
                while (sig_narrow > 0 && (sig_narrow % 10) == 0) {
3,152✔
1141
                        sig_narrow /= 10;
2,764✔
1142
                        target_exp++;
2,764✔
1143
                }
1144

1145
                normalize_and_pack(negative, target_exp, significand_t(static_cast<long long>(sig_narrow)));
388✔
1146
                return *this;
388✔
1147
        }
1148

1149

1150
        // Convert dfloat to native IEEE-754 double
1151
        //
1152
        // Constexpr-safe for sig_bits <= 64 (decimal32, decimal64).  For wider
1153
        // significands (decimal128, sig_bits > 64) the implementation falls back
1154
        // to std::strtod, which is not constexpr -- such instantiations cannot
1155
        // participate in constexpr conversion and the wide path is fenced under
1156
        // !std::is_constant_evaluated().  The pow(10, e) call is replaced by a
1157
        // constexpr loop using the pow10_64 table.
1158
        constexpr double convert_to_double() const noexcept {
389✔
1159
                if (isnan()) return std::numeric_limits<double>::quiet_NaN();
389✔
1160
                if (isinf()) return sign() ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
389✔
1161
                if (iszero()) return sign() ? -0.0 : 0.0;
389✔
1162

1163
                bool s; int e; significand_t sig;
1164
                unpack(s, e, sig);
356✔
1165

1166
                // value = (-1)^s * sig * 10^e
1167
                // For ndigits <= 19, sig fits in uint64_t; for wider, use string conversion
1168
                double sig_d = 0.0;
356✔
1169
                if constexpr (sig_bits <= 64) {
1170
                        sig_d = static_cast<double>(static_cast<unsigned long long>(sig));
282✔
1171
                }
1172
                else {
1173
                        // Wide path (decimal128): not constexpr-safe.  Guarded so
1174
                        // constexpr callers using the narrow path still compile.
1175
                        if (!std::is_constant_evaluated()) {
74✔
1176
                                std::string sig_str = to_decimal(sig);
74✔
1177
                                sig_d = std::strtod(sig_str.c_str(), nullptr);
74✔
1178
                        }
74✔
1179
                        else {
1180
                                // Constant-evaluated wide path: fall back to a digit-by-digit
1181
                                // accumulation (loses precision below ~17 digits, but constexpr
1182
                                // evaluation of decimal128 is a known limitation -- callers
1183
                                // should use decimal32/decimal64 in constexpr contexts).
1184
                                significand_t v(sig);
×
1185
                                significand_t ten(10);
×
1186
                                double scale = 1.0;
×
1187
                                while (!v.iszero()) {
×
1188
                                        significand_t r = v % ten;
×
1189
                                        v /= ten;
×
1190
                                        sig_d += static_cast<double>(static_cast<unsigned long long>(r)) * scale;
×
1191
                                        scale *= 10.0;
×
1192
                                }
1193
                        }
1194
                }
1195
                // Replace std::pow(10.0, e) without accumulating FP rounding error.
1196
                // For |e| < 20 the exponent fits in the pow10_64 table, so we can
1197
                // scale via a single multiply or divide by an exact integer power of
1198
                // ten -- this preserves the property that, e.g., 3000000 / 1000000 ==
1199
                // 3.0 exactly in double.  Loop fallback for |e| >= 20 (decimal128).
1200
                double result;
1201
                if (e >= 0) {
356✔
1202
                        if (e < 20) {
256✔
1203
                                result = sig_d * static_cast<double>(pow10_64(static_cast<unsigned>(e)));
255✔
1204
                        }
1205
                        else {
1206
                                double scale_factor = static_cast<double>(pow10_64(19));
1✔
1207
                                for (int i = 19; i < e; ++i) scale_factor *= 10.0;
72✔
1208
                                result = sig_d * scale_factor;
1✔
1209
                        }
1210
                }
1211
                else {
1212
                        int abs_e = -e;
100✔
1213
                        if (abs_e < 20) {
100✔
1214
                                result = sig_d / static_cast<double>(pow10_64(static_cast<unsigned>(abs_e)));
99✔
1215
                        }
1216
                        else {
1217
                                double scale_factor = static_cast<double>(pow10_64(19));
1✔
1218
                                for (int i = 19; i < abs_e; ++i) scale_factor *= 10.0;
14✔
1219
                                result = sig_d / scale_factor;
1✔
1220
                        }
1221
                }
1222
                return s ? -result : result;
356✔
1223
        }
1224

1225
        constexpr dfloat& convert_signed(int64_t v) noexcept {
29✔
1226
                if (0 == v) {
29✔
1227
                        setzero();
3✔
1228
                        return *this;
3✔
1229
                }
1230
                bool negative = (v < 0);
26✔
1231
                // Compute |v| as uint64_t without ever negating an int64_t -- the
1232
                // negation of INT64_MIN overflows.  Use the unsigned-arithmetic
1233
                // identity |INT64_MIN| = -(v + 1) + 1 (each step stays in range).
1234
                uint64_t abs_v = negative
26✔
1235
                        ? (static_cast<uint64_t>(-(v + 1)) + 1ull)
26✔
1236
                        : static_cast<uint64_t>(v);
1237

1238
                // Remove trailing zeros
1239
                int exponent = 0;
26✔
1240
                while (abs_v > 0 && (abs_v % 10) == 0) {
32✔
1241
                        abs_v /= 10;
6✔
1242
                        exponent++;
6✔
1243
                }
1244

1245
                // Load the full uint64_t magnitude into the significand without
1246
                // narrowing through long long (which would corrupt values above
1247
                // LLONG_MAX -- see the unsigned conversion below).
1248
                significand_t sig;
1249
                sig.setbits(abs_v);
26✔
1250
                normalize_and_pack(negative, exponent, sig);
26✔
1251
                return *this;
26✔
1252
        }
1253

1254
        constexpr dfloat& convert_unsigned(uint64_t v) noexcept {
6✔
1255
                if (0 == v) {
6✔
1256
                        setzero();
1✔
1257
                        return *this;
1✔
1258
                }
1259

1260
                int exponent = 0;
5✔
1261
                while (v > 0 && (v % 10) == 0) {
9✔
1262
                        v /= 10;
4✔
1263
                        exponent++;
4✔
1264
                }
1265

1266
                // Load the full uint64_t magnitude.  significand_t is signed for
1267
                // blockbinary's longdivision contract, but the sign bit is unused
1268
                // headroom (significands are always >= 0); setbits accepts the
1269
                // raw bits without narrowing through long long.
1270
                significand_t sig;
1271
                sig.setbits(v);
5✔
1272
                normalize_and_pack(false, exponent, sig);
5✔
1273
                return *this;
5✔
1274
        }
1275

1276
private:
1277

1278
        // dfloat - dfloat logic comparisons
1279
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1280
        friend constexpr bool operator==(const dfloat<N, E, Enc, B>& lhs, const dfloat<N, E, Enc, B>& rhs);
1281

1282
        // dfloat - literal logic comparisons
1283
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1284
        friend constexpr bool operator==(const dfloat<N, E, Enc, B>& lhs, const double rhs);
1285

1286
        // literal - dfloat logic comparisons
1287
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1288
        friend constexpr bool operator==(const double lhs, const dfloat<N, E, Enc, B>& rhs);
1289
};
1290

1291

1292
////////////////////////    helper functions   /////////////////////////////////
1293

1294
// divide dfloat a and b and return result argument
1295
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1296
void divide(const dfloat<ndigits, es, Encoding, BlockType>& a, const dfloat<ndigits, es, Encoding, BlockType>& b, dfloat<ndigits, es, Encoding, BlockType>& quotient) {
1297
        quotient = a;
1298
        quotient /= b;
1299
}
1300

1301
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1302
inline std::string to_binary(const dfloat<ndigits, es, Encoding, BlockType>& number, bool nibbleMarker = false) {
83✔
1303
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1304
        std::stringstream s;
83✔
1305

1306
        // sign bit
1307
        s << "0b" << (number.sign() ? '1' : '0') << '.';
83✔
1308

1309
        // combination field (5 bits)
1310
        unsigned combStart = Dfloat::nbits - 2;
83✔
1311
        for (unsigned i = 0; i < Dfloat::combBits; ++i) {
498✔
1312
                s << (number.getbit(combStart - i) ? '1' : '0');
415✔
1313
        }
1314
        s << '.';
83✔
1315

1316
        // exponent continuation (es bits)
1317
        unsigned expStart = Dfloat::nbits - 1 - 1 - Dfloat::combBits;
83✔
1318
        for (unsigned i = 0; i < es; ++i) {
685✔
1319
                s << (number.getbit(expStart - i) ? '1' : '0');
602✔
1320
        }
1321
        s << '.';
83✔
1322

1323
        // trailing significand (t bits, MSB first)
1324
        for (int i = static_cast<int>(Dfloat::t) - 1; i >= 0; --i) {
3,303✔
1325
                s << (number.getbit(static_cast<unsigned>(i)) ? '1' : '0');
3,220✔
1326
                if (nibbleMarker && i > 0 && (i % 4 == 0)) s << '\'';
3,220✔
1327
        }
1328

1329
        return s.str();
166✔
1330
}
83✔
1331

1332
// native semantic representation: radix-10, shows decimal coefficient and exponent
1333
// Format: +DDDDDDDDDDDDDDDDe+EEE (fixed-width for visual alignment)
1334
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1335
inline std::string to_native(const dfloat<ndigits, es, Encoding, BlockType>& number, bool = false) {
1336
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1337
        std::stringstream s;
1338

1339
        if (number.isnan()) { s << "NaN"; return s.str(); }
1340
        if (number.isinf()) { s << (number.sign() ? "-inf" : "+inf"); return s.str(); }
1341
        if (number.iszero()) {
1342
                s << (number.sign() ? '-' : '+');
1343
                s << std::string(ndigits, '0') << "e+0";
1344
                return s.str();
1345
        }
1346

1347
        bool sign; int exp; typename Dfloat::significand_t sig;
1348
        number.unpack(sign, exp, sig);
1349

1350
        s << (sign ? '-' : '+');
1351

1352
        // Convert significand to decimal string, left-pad to ndigits
1353
        std::string digits = Dfloat::sig_to_string(sig);
1354
        while (digits.size() < ndigits) digits = "0" + digits;
1355

1356
        s << digits << 'e' << std::showpos << exp;
1357
        return s.str();
1358
}
1359

1360
////////////////////////    DFLOAT functions   /////////////////////////////////
1361

1362
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1363
constexpr dfloat<ndigits, es, Encoding, BlockType> abs(const dfloat<ndigits, es, Encoding, BlockType>& a) {
1364
        dfloat<ndigits, es, Encoding, BlockType> result(a);
1365
        result.setsign(false);
1366
        return result;
1367
}
1368

1369
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1370
constexpr dfloat<ndigits, es, Encoding, BlockType> fabs(dfloat<ndigits, es, Encoding, BlockType> a) {
1371
        a.setsign(false);
1372
        return a;
1373
}
1374

1375

1376
////////////////////////  stream operators   /////////////////////////////////
1377

1378
// generate a dfloat format ASCII format
1379
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1380
inline std::ostream& operator<<(std::ostream& ostr, const dfloat<ndigits, es, Encoding, BlockType>& i) {
99✔
1381
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1382
        using FmtMode = typename Dfloat::FmtMode;
1383

1384
        std::streamsize prec = ostr.precision();
99✔
1385
        std::streamsize width = ostr.width();
99✔
1386
        std::ios_base::fmtflags ff = ostr.flags();
99✔
1387

1388
        // Map iostream format flags to dfloat FmtMode
1389
        FmtMode mode = FmtMode::automatic;
99✔
1390
        bool scientific = (ff & std::ios_base::scientific) == std::ios_base::scientific;
99✔
1391
        bool fixed      = (ff & std::ios_base::fixed) == std::ios_base::fixed;
99✔
1392
        if (scientific && !fixed) mode = FmtMode::scientific;
99✔
1393
        else if (fixed && !scientific) mode = FmtMode::fixed;
99✔
1394

1395
        // Default to ndigits precision so all stored digits are shown.
1396
        // The iostream default precision is 6, which would silently truncate
1397
        // exact decimal digits. Only use the stream precision when the user
1398
        // has explicitly set scientific or fixed mode.
1399
        size_t effective_prec = (scientific || fixed)
99✔
1400
                ? static_cast<size_t>(prec)
198✔
1401
                : 0;  // 0 tells str() to use ndigits
1402

1403
        std::string representation = i.str(effective_prec, mode);
99✔
1404

1405
        // Handle setw and alignment
1406
        std::streamsize repWidth = static_cast<std::streamsize>(representation.size());
99✔
1407
        if (width > repWidth) {
99✔
1408
                std::streamsize diff = width - repWidth;
16✔
1409
                char fill = ostr.fill();
16✔
1410
                if ((ff & std::ios_base::left) == std::ios_base::left) {
16✔
1411
                        representation.append(static_cast<size_t>(diff), fill);
×
1412
                }
1413
                else {
1414
                        representation.insert(0, static_cast<size_t>(diff), fill);
16✔
1415
                }
1416
        }
1417

1418
        return ostr << representation;
198✔
1419
}
99✔
1420

1421
// read an ASCII dfloat format
1422
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1423
inline std::istream& operator>>(std::istream& istr, dfloat<ndigits, es, Encoding, BlockType>& p) {
2✔
1424
        std::string txt;
2✔
1425
        if (!(istr >> txt)) {
2✔
1426
                // extraction failed (already-bad stream or EOF); failbit set by >>.
1427
                return istr;
1✔
1428
        }
1429
        if (!parse(txt, p)) {
1✔
1430
                std::cerr << "unable to parse -" << txt << "- into a dfloat value\n";
1✔
1431
                istr.setstate(std::ios::failbit);
1✔
1432
        }
1433
        return istr;
1✔
1434
}
2✔
1435

1436
////////////////// string operators
1437

1438
// read a dfloat ASCII format and make a dfloat out of it
1439
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1440
bool parse(const std::string& number, dfloat<ndigits, es, Encoding, BlockType>& value) {
19✔
1441
        if (number.empty()) return false;
19✔
1442
        // Pre-validate: the input must match the dfloat grammar. Without this
1443
        // guard, assign() silently accepts garbage and produces zero, so the
1444
        // operator>> path would never set failbit on inputs like "not-a-number"
1445
        // or "1.5abc".
1446
        {
1447
                std::size_t pos = 0;
19✔
1448
                while (pos < number.size() && std::isspace(static_cast<unsigned char>(number[pos]))) ++pos;
19✔
1449
                if (pos >= number.size()) return false;
19✔
1450
                if (number[pos] == '+' || number[pos] == '-') ++pos;
19✔
1451
                if (pos >= number.size()) return false;
19✔
1452

1453
                // Branch on first character (after sign): special-value token or
1454
                // a decimal floating-point literal.
1455
                const char c0 = number[pos];
19✔
1456
                if (c0 == 'i' || c0 == 'I' || c0 == 'n' || c0 == 'N') {
19✔
1457
                        // Expect "inf" / "infinity" / "nan" (case-insensitive). assign()
1458
                        // only inspects the first 3 letters; we tolerate trailing letters
1459
                        // only when they spell "infinity" or are absent.
1460
                        auto lc = [&number](std::size_t i) {
41✔
1461
                                return static_cast<char>(std::tolower(static_cast<unsigned char>(number[i])));
32✔
1462
                        };
1463
                        if (pos + 3 > number.size()) return false;
11✔
1464
                        const char a = lc(pos), b = lc(pos + 1), c = lc(pos + 2);
9✔
1465
                        const bool is_inf = (a == 'i' && b == 'n' && c == 'f');
9✔
1466
                        const bool is_nan = (a == 'n' && b == 'a' && c == 'n');
9✔
1467
                        if (!is_inf && !is_nan) return false;
9✔
1468
                        // After the 3 letters, allow nothing (nan/inf), or the rest of "infinity".
1469
                        std::size_t after = pos + 3;
7✔
1470
                        if (after < number.size()) {
7✔
1471
                                if (is_inf
1✔
1472
                                 && after + 5 == number.size()
1✔
1473
                                 && lc(after) == 'i' && lc(after + 1) == 'n' && lc(after + 2) == 'i'
1✔
1474
                                 && lc(after + 3) == 't' && lc(after + 4) == 'y') {
2✔
1475
                                        // "infinity" -- ok
1476
                                }
1477
                                else {
NEW
1478
                                        return false;
×
1479
                                }
1480
                        }
1481
                }
7✔
1482
                else if ((c0 >= '0' && c0 <= '9') || c0 == '.') {
10✔
1483
                        // Decimal floating-point literal: digits . digits [eE [+-] digits]
1484
                        bool seen_digit = false;
8✔
1485
                        bool seen_dot   = false;
8✔
1486
                        while (pos < number.size()) {
28✔
1487
                                char ch = number[pos];
25✔
1488
                                if (ch >= '0' && ch <= '9') { seen_digit = true; ++pos; continue; }
25✔
1489
                                if (ch == '.') {
10✔
1490
                                        if (seen_dot) return false;
6✔
1491
                                        seen_dot = true;
5✔
1492
                                        ++pos;
5✔
1493
                                        continue;
5✔
1494
                                }
1495
                                break;
4✔
1496
                        }
1497
                        if (!seen_digit) return false;
7✔
1498
                        if (pos < number.size() && (number[pos] == 'e' || number[pos] == 'E')) {
7✔
1499
                                ++pos;
3✔
1500
                                if (pos < number.size() && (number[pos] == '+' || number[pos] == '-')) ++pos;
3✔
1501
                                bool seen_exp_digit = false;
3✔
1502
                                while (pos < number.size() && number[pos] >= '0' && number[pos] <= '9') {
4✔
1503
                                        seen_exp_digit = true;
1✔
1504
                                        ++pos;
1✔
1505
                                }
1506
                                if (!seen_exp_digit) return false;
3✔
1507
                        }
1508
                        if (pos != number.size()) return false;  // trailing junk
5✔
1509
                }
4✔
1510
                else {
1511
                        return false;
2✔
1512
                }
1513
        }
1514
        value.assign(number);
11✔
1515
        return true;
11✔
1516
}
1517

1518

1519
//////////////////////////////////////////////////////////////////////////////////////////////////////
1520
// dfloat - dfloat binary logic operators
1521

1522
// equal: precondition is that the storage is properly nulled in all arithmetic paths
1523
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1524
constexpr bool operator==(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
13✔
1525
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1526
        // NaN != anything (including itself)
1527
        if (lhs.isnan() || rhs.isnan()) return false;
13✔
1528
        // both zero (ignoring sign)
1529
        if (lhs.iszero() && rhs.iszero()) return true;
12✔
1530
        // compare unpacked values
1531
        bool ls, rs; int le, re;
1532
        typename Dfloat::significand_t lsig, rsig;
1533
        lhs.unpack(ls, le, lsig);
11✔
1534
        rhs.unpack(rs, re, rsig);
11✔
1535
        return (ls == rs) && (le == re) && (lsig == rsig);
11✔
1536
}
1537

1538
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1539
constexpr bool operator!=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1540
        return !operator==(lhs, rhs);
3✔
1541
}
1542

1543
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1544
constexpr bool operator< (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
19✔
1545
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1546
        // NaN is unordered
1547
        if (lhs.isnan() || rhs.isnan()) return false;
19✔
1548
        // handle infinities
1549
        if (lhs.isinf() && rhs.isinf()) {
19✔
1550
                return lhs.sign() && !rhs.sign(); // -inf < +inf
×
1551
        }
1552
        if (lhs.isinf()) return lhs.sign();  // -inf < anything
19✔
1553
        if (rhs.isinf()) return !rhs.sign(); // anything < +inf
19✔
1554

1555
        // handle zeros
1556
        if (lhs.iszero() && rhs.iszero()) return false;
19✔
1557
        if (lhs.iszero()) return !rhs.sign(); // 0 < positive
19✔
1558
        if (rhs.iszero()) return lhs.sign();  // negative < 0
18✔
1559

1560
        // both nonzero, non-special
1561
        bool ls = lhs.sign(), rs = rhs.sign();
17✔
1562
        if (ls != rs) return ls; // negative < positive
17✔
1563

1564
        // same sign: compare magnitudes
1565
        bool ls_ign; int le; typename Dfloat::significand_t lsig;
1566
        bool rs_ign; int re; typename Dfloat::significand_t rsig;
1567
        lhs.unpack(ls_ign, le, lsig);
15✔
1568
        rhs.unpack(rs_ign, re, rsig);
15✔
1569

1570
        // normalize to same scale for comparison
1571
        int l_scale = le + static_cast<int>(Dfloat::count_digits_s(lsig)) - 1;
15✔
1572
        int r_scale = re + static_cast<int>(Dfloat::count_digits_s(rsig)) - 1;
15✔
1573

1574
        if (l_scale != r_scale) {
15✔
1575
                // higher scale means larger magnitude
1576
                return ls ? (l_scale > r_scale) : (l_scale < r_scale);
5✔
1577
        }
1578

1579
        // same overall scale: compare significands at same exponent
1580
        // Align to same exponent by adjusting significands
1581
        typename Dfloat::significand_t ten(10);
10✔
1582
        if (le < re) {
10✔
1583
                int diff = re - le;
×
1584
                if (diff < static_cast<int>(ndigits)) {
×
1585
                        for (int i = 0; i < diff; ++i) rsig *= ten;
×
1586
                }
1587
        }
1588
        else if (re < le) {
10✔
1589
                int diff = le - re;
×
1590
                if (diff < static_cast<int>(ndigits)) {
×
1591
                        for (int i = 0; i < diff; ++i) lsig *= ten;
×
1592
                }
1593
        }
1594

1595
        return ls ? (lsig > rsig) : (lsig < rsig);
10✔
1596
}
1597

1598
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1599
constexpr bool operator> (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
6✔
1600
        return operator< (rhs, lhs);
6✔
1601
}
1602

1603
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1604
constexpr bool operator<=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1605
        return operator< (lhs, rhs) || operator==(lhs, rhs);
3✔
1606
}
1607

1608
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1609
constexpr bool operator>=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1610
        // NaN-safe pattern: avoid !operator< (which is true for NaN comparisons).
1611
        return operator>(lhs, rhs) || operator==(lhs, rhs);
3✔
1612
}
1613

1614
//////////////////////////////////////////////////////////////////////////////////////////////////////
1615
// dfloat - literal binary logic operators
1616
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1617
constexpr bool operator==(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1618
        return operator==(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1619
}
1620

1621
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1622
constexpr bool operator!=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1623
        return !operator==(lhs, rhs);
1624
}
1625

1626
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1627
constexpr bool operator< (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1628
        return operator<(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1629
}
1630

1631
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1632
constexpr bool operator> (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1633
        return operator< (dfloat<ndigits, es, Encoding, BlockType>(rhs), lhs);
1634
}
1635

1636
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1637
constexpr bool operator<=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1638
        return operator< (lhs, rhs) || operator==(lhs, rhs);
1639
}
1640

1641
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1642
constexpr bool operator>=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1643
        return operator>(lhs, rhs) || operator==(lhs, rhs);
1644
}
1645

1646
//////////////////////////////////////////////////////////////////////////////////////////////////////
1647
// literal - dfloat binary logic operators
1648
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1649
constexpr bool operator==(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1650
        return operator==(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1651
}
1652

1653
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1654
constexpr bool operator!=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1655
        return !operator==(lhs, rhs);
1656
}
1657

1658
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1659
constexpr bool operator< (const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1660
        return operator<(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1661
}
1662

1663
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1664
constexpr bool operator> (const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1665
        return operator< (rhs, lhs);
1666
}
1667

1668
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1669
constexpr bool operator<=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1670
        return operator< (lhs, rhs) || operator==(lhs, rhs);
1671
}
1672

1673
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1674
constexpr bool operator>=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1675
        return operator>(lhs, rhs) || operator==(lhs, rhs);
1676
}
1677

1678
//////////////////////////////////////////////////////////////////////////////////////////////////////
1679
// dfloat - dfloat binary arithmetic operators
1680
// BINARY ADDITION
1681
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1682
constexpr dfloat<ndigits, es, Encoding, BlockType> operator+(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
125✔
1683
        dfloat<ndigits, es, Encoding, BlockType> sum(lhs);
125✔
1684
        sum += rhs;
125✔
1685
        return sum;
125✔
1686
}
1687
// BINARY SUBTRACTION
1688
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1689
constexpr dfloat<ndigits, es, Encoding, BlockType> operator-(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
63✔
1690
        dfloat<ndigits, es, Encoding, BlockType> diff(lhs);
63✔
1691
        diff -= rhs;
63✔
1692
        return diff;
63✔
1693
}
1694
// BINARY MULTIPLICATION
1695
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1696
constexpr dfloat<ndigits, es, Encoding, BlockType> operator*(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
99✔
1697
        dfloat<ndigits, es, Encoding, BlockType> mul(lhs);
99✔
1698
        mul *= rhs;
99✔
1699
        return mul;
99✔
1700
}
1701
// BINARY DIVISION
1702
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1703
constexpr dfloat<ndigits, es, Encoding, BlockType> operator/(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
25✔
1704
        dfloat<ndigits, es, Encoding, BlockType> ratio(lhs);
25✔
1705
        ratio /= rhs;
25✔
1706
        return ratio;
25✔
1707
}
1708

1709
//////////////////////////////////////////////////////////////////////////////////////////////////////
1710
// dfloat - literal binary arithmetic operators
1711
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1712
constexpr dfloat<ndigits, es, Encoding, BlockType> operator+(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1713
        return operator+(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1714
}
1715
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1716
constexpr dfloat<ndigits, es, Encoding, BlockType> operator-(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1717
        return operator-(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1718
}
1719
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1720
constexpr dfloat<ndigits, es, Encoding, BlockType> operator*(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1721
        return operator*(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1722
}
1723
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1724
constexpr dfloat<ndigits, es, Encoding, BlockType> operator/(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1725
        return operator/(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1726
}
1727

1728
//////////////////////////////////////////////////////////////////////////////////////////////////////
1729
// literal - dfloat binary arithmetic operators
1730
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1731
constexpr dfloat<ndigits, es, Encoding, BlockType> operator+(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1732
        return operator+(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1733
}
1734
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1735
constexpr dfloat<ndigits, es, Encoding, BlockType> operator-(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1736
        return operator-(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1737
}
1738
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1739
constexpr dfloat<ndigits, es, Encoding, BlockType> operator*(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1740
        return operator*(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1741
}
1742
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1743
constexpr dfloat<ndigits, es, Encoding, BlockType> operator/(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1744
        return operator/(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1745
}
1746

1747
}} // namespace sw::universal
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc