• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

stillwater-sc / universal / 22510678448

28 Feb 2026 01:44AM UTC coverage: 84.218% (+0.08%) from 84.134%
22510678448

Pull #524

github

web-flow
Merge 005520c16 into ecf343609
Pull Request #524: feat(dfloat): portable blockbinary storage, string I/O, and formatting

207 of 219 new or added lines in 3 files covered. (94.52%)

4 existing lines in 2 files now uncovered.

38938 of 46235 relevant lines covered (84.22%)

6800360.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.81
/include/sw/universal/number/dfloat/dfloat_impl.hpp
1
#pragma once
2
// dfloat_impl.hpp: implementation of an IEEE 754-2008 decimal floating-point number system
3
//
4
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
5
// SPDX-License-Identifier: MIT
6
//
7
// This file is part of the universal numbers project, which is released under an MIT Open Source license.
8
#include <cstdint>
9
#include <cstring>
10
#include <cmath>
11
#include <string>
12
#include <sstream>
13
#include <iostream>
14
#include <iomanip>
15
#include <algorithm>
16

17
// supporting types and functions
18
#include <universal/native/ieee754.hpp>
19
#include <universal/number/shared/nan_encoding.hpp>
20
#include <universal/number/shared/infinite_encoding.hpp>
21
#include <universal/number/shared/specific_value_encoding.hpp>
22
// dfloat exception structure
23
#include <universal/number/dfloat/exceptions.hpp>
24
// DPD (Densely Packed Decimal) codec
25
#include <universal/number/dfloat/dpd_codec.hpp>
26
// blockbinary for encoding storage and significand arithmetic
27
#include <universal/internal/blockbinary/blockbinary.hpp>
28

29
namespace sw { namespace universal {
30

31
///////////////////////////////////////////////////////////////////////////////
32
// Internal helpers for BID encoding
33
//
34
// IEEE 754-2008 decimal format layout:
35
//   [sign(1)] [combination(5)] [exponent_continuation(w)] [trailing_significand(t)]
36
//
37
// Total bits: nbits = 1 + 5 + w + t
38
// where w = es (exponent continuation bits)
39
//       t = nbits - 1 - 5 - w
40
//
41
// Combination field (5 bits: abcde):
42
//   ab != 11: exponent MSBs = ab, MSD (most significant digit) = 0cde (0-7)
43
//   ab == 11 && c != 1: exponent MSBs = cd, MSD = 100e (8 or 9)
44
//   11110: +/- infinity
45
//   11111: NaN (quiet or signaling based on trailing significand MSB)
46
//
47
// BID encoding: trailing significand stored as binary integer
48
// DPD encoding: trailing significand stored as densely packed decimal (10-bit declets)
49

50
// Compute number of bits needed for a decimal32/64/128 configuration
51
// decimal32:  ndigits=7,  es=6   -> nbits = 1 + 5 + 6  + 20  = 32
52
// decimal64:  ndigits=16, es=8   -> nbits = 1 + 5 + 8  + 50  = 64
53
// decimal128: ndigits=34, es=12  -> nbits = 1 + 5 + 12 + 110 = 128
54

55
///////////////////////////////////////////////////////////////////////////////
56
// power_of_10: constexpr power-of-10 helpers
57
static constexpr uint64_t _pow10_table[20] = {
58
        1ull,
59
        10ull,
60
        100ull,
61
        1000ull,
62
        10000ull,
63
        100000ull,
64
        1000000ull,
65
        10000000ull,
66
        100000000ull,
67
        1000000000ull,
68
        10000000000ull,
69
        100000000000ull,
70
        1000000000000ull,
71
        10000000000000ull,
72
        100000000000000ull,
73
        1000000000000000ull,
74
        10000000000000000ull,
75
        100000000000000000ull,
76
        1000000000000000000ull,
77
        10000000000000000000ull
78
};
79

80
static constexpr uint64_t pow10_64(unsigned n) {
398✔
81
        return _pow10_table[n]; // n >= 20 is undefined: array bounds enforced by compiler in constexpr
398✔
82
}
83

84
// count decimal digits of a uint64_t
85
static constexpr unsigned count_decimal_digits(uint64_t v) {
86
        if (v == 0) return 1;
87
        unsigned d = 0;
88
        while (v > 0) { v /= 10; ++d; }
89
        return d;
90
}
91

92

93
// constexpr ceil(log2(10^n)) - bits needed to represent 10^n in binary
94
// This is the number of trailing significand bits for BID encoding
95
static constexpr unsigned bid_trailing_bits(unsigned n) {
96
        // 10^n values and their bit widths
97
        // We compute ceil(log2(10^n)) = floor(log2(10^n - 1)) + 1
98
        // Using the identity: ceil(n * log2(10)) where log2(10) ≈ 3.321928
99
        // Approximate with integer arithmetic: ceil(n * 3322 / 1000)
100
        if (n == 0) return 0;
101
        return static_cast<unsigned>((static_cast<uint64_t>(n) * 3322u + 999u) / 1000u);
102
}
103

104
// DPD trailing bits: (ndigits-1)/3 declets of 10 bits + remainder
105
static constexpr unsigned dpd_trailing_bits(unsigned ndigits_minus_1) {
106
        unsigned full_declets = ndigits_minus_1 / 3;
107
        unsigned remainder = ndigits_minus_1 % 3;
108
        unsigned bits = full_declets * 10;
109
        if (remainder == 1) bits += 4;
110
        else if (remainder == 2) bits += 7;
111
        return bits;
112
}
113

114
///////////////////////////////////////////////////////////////////////////////
115
// dfloat: IEEE 754-2008 decimal floating-point number
116
//
117
// Template parameters:
118
//   ndigits  - number of decimal precision digits (p)
119
//   es       - exponent continuation bits (w)
120
//   Encoding - BID or DPD
121
//   bt       - block type for storage
122
//
123
template<unsigned _ndigits, unsigned _es, DecimalEncoding _Encoding = DecimalEncoding::BID, typename bt = std::uint32_t>
124
class dfloat {
125
public:
126
        static constexpr unsigned ndigits  = _ndigits;             // precision in decimal digits (p)
127
        static constexpr unsigned es       = _es;                  // exponent continuation bits (w)
128
        static constexpr DecimalEncoding encoding = _Encoding;
129
        static constexpr unsigned combBits = 5u;                   // combination field bits
130
        static constexpr unsigned t        = (encoding == DecimalEncoding::BID)
131
                ? bid_trailing_bits(ndigits - 1)
132
                : dpd_trailing_bits(ndigits - 1);
133
        static constexpr unsigned nbits    = 1u + combBits + es + t;
134
        static constexpr int      bias     = (3 << (es - 1)) + static_cast<int>(ndigits) - 2;
135
        static constexpr int      emax     = (3 << es) - 1 - bias;   // max biased exponent
136
        static constexpr int      emin     = -bias;                    // min biased exponent
137

138
        // Significand arithmetic type: blockbinary with enough bits for any ndigits
139
        // Signed is required because blockbinary::longdivision() requires it.
140
        // The sign bit is unused headroom since significands are always >= 0.
141
        static constexpr unsigned sig_bits = 4 * ndigits + 8;
142
        using significand_t = blockbinary<sig_bits, bt, BinaryNumberType::Signed>;
143

144
        // Wide significand for overflow-free multiplication
145
        using wide_significand_t = blockbinary<2 * sig_bits, bt, BinaryNumberType::Signed>;
146

147
        // Helper: power of 10 returning significand_t
148
        static significand_t pow10_s(unsigned n) {
2,804✔
149
                significand_t result(1);
2,804✔
150
                significand_t ten(10);
2,804✔
151
                for (unsigned i = 0; i < n; ++i) result *= ten;
29,205✔
152
                return result;
3,091✔
153
        }
154

155
        // Helper: count decimal digits of a significand_t
156
        static unsigned count_digits_s(const significand_t& v) {
825✔
157
                if (v.iszero()) return 1;
825✔
158
                unsigned count = 0;
825✔
159
                significand_t tmp(v);
825✔
160
                significand_t ten(10);
825✔
161
                while (!tmp.iszero()) { tmp /= ten; ++count; }
2,504✔
162
                return count;
825✔
163
        }
164

165
        // Helper: significand_t to string
166
        static std::string sig_to_string(const significand_t& v) {
84✔
167
                return to_decimal(v);
84✔
168
        }
169

170
        typedef bt BlockType;
171

172
        // Encoding storage type: blockbinary with Unsigned encoding
173
        using encoding_t = blockbinary<nbits, bt, BinaryNumberType::Unsigned>;
174

175
        /// trivial constructor
176
        dfloat() = default;
177

178
        dfloat(const dfloat&) = default;
179
        dfloat(dfloat&&) = default;
180

181
        dfloat& operator=(const dfloat&) = default;
182
        dfloat& operator=(dfloat&&) = default;
183

184
        // converting constructors
185
        constexpr dfloat(const std::string& stringRep) { clear(); assign(stringRep); }
1✔
186

187
        // specific value constructor
188
        constexpr dfloat(const SpecificValue code) noexcept {
73✔
189
                clear();
73✔
190
                switch (code) {
73✔
191
                case SpecificValue::maxpos:
13✔
192
                        maxpos();
13✔
193
                        break;
13✔
194
                case SpecificValue::minpos:
12✔
195
                        minpos();
12✔
196
                        break;
12✔
197
                case SpecificValue::zero:
1✔
198
                default:
199
                        zero();
1✔
200
                        break;
1✔
201
                case SpecificValue::minneg:
6✔
202
                        minneg();
6✔
203
                        break;
6✔
204
                case SpecificValue::maxneg:
6✔
205
                        maxneg();
6✔
206
                        break;
6✔
207
                case SpecificValue::infpos:
15✔
208
                        setinf(false);
15✔
209
                        break;
15✔
210
                case SpecificValue::infneg:
11✔
211
                        setinf(true);
11✔
212
                        break;
11✔
213
                case SpecificValue::nar:
8✔
214
                case SpecificValue::qnan:
215
                        setnan(NAN_TYPE_QUIET);
8✔
216
                        break;
8✔
217
                case SpecificValue::snan:
1✔
218
                        setnan(NAN_TYPE_SIGNALLING);
1✔
219
                        break;
1✔
220
                }
221
        }
73✔
222

223
        // initializers for native types
224
        explicit dfloat(signed char iv)           noexcept { clear(); *this = iv; }
225
        explicit dfloat(short iv)                 noexcept { clear(); *this = iv; }
226
        explicit dfloat(int iv)                   noexcept { clear(); *this = iv; }
89✔
227
        explicit dfloat(long iv)                  noexcept { clear(); *this = iv; }
228
        explicit dfloat(long long iv)             noexcept { clear(); *this = iv; }
229
        explicit dfloat(char iv)                  noexcept { clear(); *this = iv; }
230
        explicit dfloat(unsigned short iv)        noexcept { clear(); *this = iv; }
231
        explicit dfloat(unsigned int iv)          noexcept { clear(); *this = iv; }
6✔
232
        explicit dfloat(unsigned long iv)         noexcept { clear(); *this = iv; }
233
        explicit dfloat(unsigned long long iv)    noexcept { clear(); *this = iv; }
234
        explicit dfloat(float iv)                 noexcept { clear(); *this = iv; }
2✔
235
        explicit dfloat(double iv)                noexcept { clear(); *this = iv; }
416✔
236

237
        // assignment operators for native types
238
        dfloat& operator=(signed char rhs)        noexcept { return convert_signed(rhs); }
239
        dfloat& operator=(short rhs)              noexcept { return convert_signed(rhs); }
240
        dfloat& operator=(int rhs)                noexcept { return convert_signed(rhs); }
95✔
241
        dfloat& operator=(long rhs)               noexcept { return convert_signed(rhs); }
242
        dfloat& operator=(long long rhs)          noexcept { return convert_signed(rhs); }
243
        dfloat& operator=(char rhs)               noexcept { return convert_unsigned(rhs); }
244
        dfloat& operator=(unsigned short rhs)     noexcept { return convert_unsigned(rhs); }
245
        dfloat& operator=(unsigned int rhs)       noexcept { return convert_unsigned(rhs); }
6✔
246
        dfloat& operator=(unsigned long rhs)      noexcept { return convert_unsigned(rhs); }
247
        dfloat& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); }
248
        dfloat& operator=(float rhs)              noexcept { return convert_ieee754(rhs); }
2✔
249
        dfloat& operator=(double rhs)             noexcept { return convert_ieee754(rhs); }
416✔
250

251
        // conversion operators
252
        explicit operator float()           const noexcept { return float(convert_to_double()); }
253
        explicit operator double()          const noexcept { return convert_to_double(); }
389✔
254

255
#if LONG_DOUBLE_SUPPORT
256
        explicit dfloat(long double iv)           noexcept { clear(); *this = iv; }
257
        dfloat& operator=(long double rhs)        noexcept { return convert_ieee754(double(rhs)); }
258
        explicit operator long double()     const noexcept { return (long double)convert_to_double(); }
259
#endif
260

261
        // prefix operators
262
        dfloat operator-() const {
29✔
263
                dfloat negated(*this);
29✔
264
                if (!negated.iszero()) {
29✔
265
                        negated.setsign(!negated.sign());
24✔
266
                }
267
                return negated;
29✔
268
        }
269

270
        // arithmetic operators
271
        dfloat& operator+=(const dfloat& rhs) {
212✔
272
                // unpack both operands
273
                bool lhs_sign, rhs_sign;
274
                int lhs_exp, rhs_exp;
275
                significand_t lhs_sig, rhs_sig;
276
                unpack(lhs_sign, lhs_exp, lhs_sig);
212✔
277
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
212✔
278

279
                // handle special values
280
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
212✔
281
                if (isinf() && rhs.isinf()) {
211✔
282
                        if (lhs_sign != rhs_sign) { setnan(NAN_TYPE_QUIET); return *this; } // inf + (-inf) = NaN
1✔
283
                        return *this; // same sign inf
×
284
                }
285
                if (isinf()) return *this;
210✔
286
                if (rhs.isinf()) { *this = rhs; return *this; }
209✔
287
                if (rhs.iszero()) return *this;
209✔
288
                if (iszero()) { *this = rhs; return *this; }
205✔
289

290
                // align exponents by scaling the higher-exponent significand UP
291
                // result exponent = min(lhs_exp, rhs_exp)
292
                int shift = lhs_exp - rhs_exp;
201✔
293
                int abs_shift = (shift >= 0) ? shift : -shift;
201✔
294

295
                // When the magnitude difference exceeds the precision, the smaller
296
                // operand cannot contribute any digits to the result -- short-circuit.
297
                if (abs_shift >= static_cast<int>(ndigits)) {
201✔
298
                        if (shift > 0) return *this;       // lhs dominates
×
299
                        *this = rhs; return *this;         // rhs dominates
×
300
                }
301

302
                int result_exp;
303
                bool result_sign;
304
                significand_t abs_sig;
305

306
                // Unified path using blockbinary significand_t
307
                significand_t aligned_lhs(lhs_sig);
201✔
308
                significand_t aligned_rhs(rhs_sig);
201✔
309
                significand_t ten(10);
201✔
310

311
                if (shift >= 0) {
201✔
312
                        result_exp = rhs_exp;
162✔
313
                        for (int i = 0; i < shift; ++i) aligned_lhs *= ten;
302✔
314
                }
315
                else {
316
                        result_exp = lhs_exp;
39✔
317
                        for (int i = 0; i < -shift; ++i) aligned_rhs *= ten;
119✔
318
                }
319

320
                if (lhs_sign == rhs_sign) {
201✔
321
                        abs_sig = aligned_lhs + aligned_rhs;
112✔
322
                        result_sign = lhs_sign;
112✔
323
                }
324
                else {
325
                        if (aligned_lhs >= aligned_rhs) {
89✔
326
                                abs_sig = aligned_lhs - aligned_rhs;
61✔
327
                                result_sign = lhs_sign;
61✔
328
                        }
329
                        else {
330
                                abs_sig = aligned_rhs - aligned_lhs;
28✔
331
                                result_sign = rhs_sign;
28✔
332
                        }
333
                }
334

335
                // normalize to ndigits precision
336
                normalize_and_pack(result_sign, result_exp, abs_sig);
201✔
337
                return *this;
201✔
338
        }
339
        dfloat& operator-=(const dfloat& rhs) {
65✔
340
                dfloat neg(rhs);
65✔
341
                if (!neg.iszero()) neg.setsign(!neg.sign());
65✔
342
                return operator+=(neg);
130✔
343
        }
344
        dfloat& operator*=(const dfloat& rhs) {
99✔
345
                bool lhs_sign, rhs_sign;
346
                int lhs_exp, rhs_exp;
347
                significand_t lhs_sig, rhs_sig;
348
                unpack(lhs_sign, lhs_exp, lhs_sig);
99✔
349
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
99✔
350

351
                // handle special values
352
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
99✔
353
                if (isinf() || rhs.isinf()) {
98✔
354
                        if (iszero() || rhs.iszero()) { setnan(NAN_TYPE_QUIET); return *this; } // 0 * inf = NaN
2✔
355
                        setinf(lhs_sign != rhs_sign);
1✔
356
                        return *this;
1✔
357
                }
358
                if (iszero() || rhs.iszero()) { setzero(); return *this; }
96✔
359

360
                bool result_sign = (lhs_sign != rhs_sign);
93✔
361
                int result_exp = lhs_exp + rhs_exp;
93✔
362

363
                // Wide multiplication: urmul returns blockbinary<2*sig_bits>
364
                wide_significand_t wide = urmul(lhs_sig, rhs_sig);
93✔
365
                wide_significand_t ten_w(10);
93✔
366

367
                // Count digits in wide result and trim to ndigits
368
                // Use a helper to count digits of the wide result
369
                unsigned wd = 0;
93✔
370
                {
371
                        wide_significand_t tmp(wide);
93✔
372
                        if (tmp.iszero()) { wd = 1; }
93✔
373
                        else { while (!tmp.iszero()) { tmp /= ten_w; ++wd; } }
273✔
374
                }
375
                while (wd > ndigits) {
97✔
376
                        wide /= ten_w;
4✔
377
                        result_exp++;
4✔
378
                        wd--;
4✔
379
                }
380

381
                // Truncate wide result to significand_t
382
                significand_t result_sig;
383
                result_sig.assign(wide);
93✔
384

385
                normalize_and_pack(result_sign, result_exp, result_sig);
93✔
386
                return *this;
93✔
387
        }
388
        dfloat& operator/=(const dfloat& rhs) {
25✔
389
                bool lhs_sign, rhs_sign;
390
                int lhs_exp, rhs_exp;
391
                significand_t lhs_sig, rhs_sig;
392
                unpack(lhs_sign, lhs_exp, lhs_sig);
25✔
393
                rhs.unpack(rhs_sign, rhs_exp, rhs_sig);
25✔
394

395
                // handle special values
396
                if (isnan() || rhs.isnan()) { setnan(NAN_TYPE_QUIET); return *this; }
25✔
397
                if (isinf() && rhs.isinf()) { setnan(NAN_TYPE_QUIET); return *this; }
24✔
398
                if (rhs.iszero()) {
23✔
399
#if DFLOAT_THROW_ARITHMETIC_EXCEPTION
400
                        throw dfloat_divide_by_zero();
401
#else
402
                        if (iszero()) { setnan(NAN_TYPE_QUIET); return *this; } // 0/0
2✔
403
                        setinf(lhs_sign != rhs_sign);
1✔
404
                        return *this;
1✔
405
#endif
406
                }
407
                if (iszero()) { setzero(); return *this; }
21✔
408
                if (isinf()) { setsign(lhs_sign != rhs_sign); return *this; }
20✔
409

410
                bool result_sign = (lhs_sign != rhs_sign);
19✔
411
                int result_exp = lhs_exp - rhs_exp;
19✔
412

413
                // Unified iterative long division using blockbinary
414
                significand_t remainder(lhs_sig);
19✔
415
                significand_t quotient(0);
19✔
416
                significand_t ten(10);
19✔
417
                for (unsigned i = 0; i < ndigits; ++i) {
179✔
418
                        remainder *= ten;
160✔
419
                        quotient = quotient * ten + remainder / rhs_sig;
160✔
420
                        remainder = remainder % rhs_sig;
160✔
421
                }
422
                result_exp -= static_cast<int>(ndigits);
19✔
423

424
                normalize_and_pack(result_sign, result_exp, quotient);
19✔
425
                return *this;
19✔
426
        }
427

428
        // unary operators
429
        dfloat& operator++() {
2✔
430
                *this += dfloat(1);
2✔
431
                return *this;
2✔
432
        }
433
        dfloat operator++(int) {
434
                dfloat tmp(*this);
435
                operator++();
436
                return tmp;
437
        }
438
        dfloat& operator--() {
2✔
439
                *this -= dfloat(1);
2✔
440
                return *this;
2✔
441
        }
442
        dfloat operator--(int) {
1✔
443
                dfloat tmp(*this);
1✔
444
                operator--();
1✔
445
                return tmp;
1✔
446
        }
447

448
        // modifiers
449
        void clear() noexcept {
1,546✔
450
                _encoding.clear();
1,546✔
451
        }
1,546✔
452
        void setzero() noexcept { clear(); }
55✔
453

454
        void setinf(bool negative = true) noexcept {
28✔
455
                clear();
28✔
456
                // combination field = 11110 -> bits: sign | 11110 | 0...0
457
                // set sign
458
                setbit(nbits - 1, negative);
28✔
459
                // set combination field bits to 11110
460
                unsigned combStart = nbits - 2; // MSB of combination
28✔
461
                setbit(combStart,     true);   // a = 1
28✔
462
                setbit(combStart - 1, true);   // b = 1
28✔
463
                setbit(combStart - 2, true);   // c = 1
28✔
464
                setbit(combStart - 3, true);   // d = 1
28✔
465
                setbit(combStart - 4, false);  // e = 0
28✔
466
        }
28✔
467

468
        void setnan(int NaNType = NAN_TYPE_SIGNALLING) noexcept {
16✔
469
                clear();
16✔
470
                // combination field = 11111
471
                unsigned combStart = nbits - 2;
16✔
472
                setbit(combStart,     true);
16✔
473
                setbit(combStart - 1, true);
16✔
474
                setbit(combStart - 2, true);
16✔
475
                setbit(combStart - 3, true);
16✔
476
                setbit(combStart - 4, true);
16✔
477
                if (NaNType == NAN_TYPE_QUIET) {
16✔
478
                        // set MSB of trailing significand for quiet NaN
479
                        if (t > 0) setbit(t - 1, true);
15✔
480
                }
481
        }
16✔
482

483
        void setsign(bool negative = true) noexcept {
93✔
484
                setbit(nbits - 1, negative);
93✔
485
        }
93✔
486

487
        // use un-interpreted raw bits to set the value of the dfloat
488
        inline void setbits(uint64_t value) noexcept {
489
                _encoding.setbits(value);
490
        }
491

492
        // create specific number system values of interest
493
        dfloat& maxpos() noexcept {
13✔
494
                clear();
13✔
495
                significand_t max_sig = pow10_s(ndigits) - significand_t(1);
13✔
496
                pack(false, emax, max_sig);
13✔
497
                return *this;
13✔
498
        }
499
        dfloat& minpos() noexcept {
12✔
500
                clear();
12✔
501
                pack(false, emin, significand_t(1));
12✔
502
                return *this;
12✔
503
        }
504
        dfloat& zero() noexcept {
1✔
505
                clear();
1✔
506
                return *this;
1✔
507
        }
508
        dfloat& minneg() noexcept {
6✔
509
                clear();
6✔
510
                pack(true, emin, significand_t(1));
6✔
511
                return *this;
6✔
512
        }
513
        dfloat& maxneg() noexcept {
6✔
514
                clear();
6✔
515
                significand_t max_sig = pow10_s(ndigits) - significand_t(1);
6✔
516
                pack(true, emax, max_sig);
6✔
517
                return *this;
6✔
518
        }
519

520
        dfloat& assign(const std::string& txt) {
2✔
521
                clear();
2✔
522
                if (txt.empty()) return *this;
2✔
523

524
                // Skip leading whitespace
525
                size_t pos = 0;
2✔
526
                while (pos < txt.size() && std::isspace(static_cast<unsigned char>(txt[pos]))) ++pos;
2✔
527
                if (pos >= txt.size()) return *this;
2✔
528

529
                // Check for sign
530
                bool negative = false;
2✔
531
                if (txt[pos] == '-') { negative = true; ++pos; }
2✔
532
                else if (txt[pos] == '+') { ++pos; }
1✔
533

534
                // Check for special values (case-insensitive)
535
                std::string rest = txt.substr(pos);
2✔
536
                if (rest.size() >= 3) {
2✔
537
                        char c0 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[0])));
2✔
538
                        char c1 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[1])));
2✔
539
                        char c2 = static_cast<char>(std::tolower(static_cast<unsigned char>(rest[2])));
2✔
540
                        if (c0 == 'i' && c1 == 'n' && c2 == 'f') { setinf(negative); return *this; }
2✔
541
                        if (c0 == 'n' && c1 == 'a' && c2 == 'n') { setnan(NAN_TYPE_QUIET); return *this; }
2✔
542
                }
543

544
                // Parse decimal digits, collecting significand and tracking decimal point
545
                // Input forms: "123", "123.456", ".456", "123.", "123.456e-78", "123e5"
546
                significand_t sig(0);
2✔
547
                significand_t ten(10);
2✔
548
                unsigned digit_count = 0;
2✔
549
                int decimal_exponent = 0;
2✔
550
                bool seen_dot = false;
2✔
551
                int frac_digits = 0;
2✔
552

553
                // Parse integer and fractional parts
554
                while (pos < txt.size()) {
17✔
555
                        char ch = txt[pos];
16✔
556
                        if (ch == '.') {
16✔
557
                                if (seen_dot) break; // second dot ends parsing
2✔
558
                                seen_dot = true;
2✔
559
                                ++pos;
2✔
560
                                continue;
2✔
561
                        }
562
                        if (ch >= '0' && ch <= '9') {
14✔
563
                                if (digit_count < ndigits) {
13✔
564
                                        sig = sig * ten + significand_t(static_cast<long long>(ch - '0'));
13✔
565
                                        digit_count++;
13✔
566
                                }
567
                                else {
568
                                        // Beyond precision: count but don't store
NEW
569
                                        if (!seen_dot) decimal_exponent++;
×
570
                                }
571
                                if (seen_dot) frac_digits++;
13✔
572
                                ++pos;
13✔
573
                                continue;
13✔
574
                        }
575
                        break; // non-digit, non-dot ends the mantissa
1✔
576
                }
577

578
                // The significand represents: sig * 10^(-frac_digits)
579
                // So the base exponent before any explicit exponent is -frac_digits
580
                decimal_exponent -= frac_digits;
2✔
581

582
                // Parse optional exponent: e/E followed by optional sign and digits
583
                if (pos < txt.size() && (txt[pos] == 'e' || txt[pos] == 'E')) {
2✔
584
                        ++pos;
1✔
585
                        bool exp_neg = false;
1✔
586
                        if (pos < txt.size() && txt[pos] == '-') { exp_neg = true; ++pos; }
1✔
NEW
587
                        else if (pos < txt.size() && txt[pos] == '+') { ++pos; }
×
588

589
                        int exp_val = 0;
1✔
590
                        while (pos < txt.size() && txt[pos] >= '0' && txt[pos] <= '9') {
3✔
591
                                exp_val = exp_val * 10 + (txt[pos] - '0');
2✔
592
                                ++pos;
2✔
593
                        }
594
                        decimal_exponent += exp_neg ? -exp_val : exp_val;
1✔
595
                }
596

597
                // Remove trailing zeros from significand (normalize)
598
                while (!sig.iszero() && digit_count > 1) {
2✔
599
                        significand_t remainder = sig % ten;
2✔
600
                        if (!remainder.iszero()) break;
2✔
NEW
601
                        sig /= ten;
×
NEW
602
                        decimal_exponent++;
×
NEW
603
                        digit_count--;
×
604
                }
605

606
                if (sig.iszero()) {
2✔
NEW
607
                        setzero();
×
NEW
608
                        if (negative) setsign(true);
×
NEW
609
                        return *this;
×
610
                }
611

612
                normalize_and_pack(negative, decimal_exponent, sig);
2✔
613
                return *this;
2✔
614
        }
2✔
615

616
        // selectors
617
        bool sign() const noexcept {
1,438✔
618
                return getbit(nbits - 1);
1,438✔
619
        }
620

621
        bool iszero() const noexcept {
2,479✔
622
                // zero when all bits except sign are 0
623
                // Check all bits except the sign bit (nbits-1)
624
                for (unsigned i = 0; i < nbits - 1; ++i) {
7,240✔
625
                        if (_encoding.at(i)) return false;
7,154✔
626
                }
627
                return true;
86✔
628
        }
629

630
        bool isone() const noexcept {
2✔
631
                bool s; int e; significand_t sig;
632
                unpack(s, e, sig);
2✔
633
                return !s && (sig == significand_t(1)) && (e == 0);
2✔
634
        }
635

636
        bool ispos() const noexcept { return !sign(); }
637
        bool isneg() const noexcept { return sign(); }
638

639
        bool isinf() const noexcept {
2,603✔
640
                // combination field == 11110
641
                unsigned combStart = nbits - 2;
2,603✔
642
                return getbit(combStart) && getbit(combStart - 1) &&
2,823✔
643
                       getbit(combStart - 2) && getbit(combStart - 3) &&
2,872✔
644
                       !getbit(combStart - 4);
2,652✔
645
        }
646

647
        bool isnan() const noexcept {
2,396✔
648
                // combination field == 11111
649
                unsigned combStart = nbits - 2;
2,396✔
650
                return getbit(combStart) && getbit(combStart - 1) &&
2,608✔
651
                       getbit(combStart - 2) && getbit(combStart - 3) &&
2,653✔
652
                       getbit(combStart - 4);
2,441✔
653
        }
654

655
        bool isnan(int NaNType) const noexcept {
656
                if (!isnan()) return false;
657
                if (NaNType == NAN_TYPE_QUIET) {
658
                        return (t > 0) ? getbit(t - 1) : true;
659
                }
660
                else {
661
                        return (t > 0) ? !getbit(t - 1) : true;
662
                }
663
        }
664

665
        int scale() const noexcept {
12✔
666
                if (iszero() || isinf() || isnan()) return 0;
12✔
667
                bool s; int e; significand_t sig;
668
                unpack(s, e, sig);
12✔
669
                // scale in powers of 10
670
                return e + static_cast<int>(count_digits_s(sig)) - 1;
12✔
671
        }
672

673
        // Format modes for str()
674
        enum class FmtMode { automatic, fixed, scientific };
675

676
        // convert to string
677
        // precision: number of significant digits (0 = ndigits)
678
        // mode: automatic (default), fixed, or scientific
679
        std::string str(size_t precision = 0, FmtMode mode = FmtMode::automatic) const {
99✔
680
                if (isnan()) return std::string("nan");
103✔
681
                if (isinf()) return sign() ? std::string("-inf") : std::string("inf");
127✔
682
                if (iszero()) return sign() ? std::string("-0") : std::string("0");
88✔
683

684
                bool s; int e; significand_t sig;
685
                unpack(s, e, sig);
79✔
686

687
                // value = (-1)^s * sig * 10^e
688
                std::string digits = sig_to_string(sig);
79✔
689
                int num_digits = static_cast<int>(digits.size());
79✔
690
                int decimal_pos = num_digits + e; // position of decimal point from left
79✔
691

692
                // Determine effective precision (number of significant digits to show)
693
                size_t prec = (precision > 0) ? precision : static_cast<size_t>(ndigits);
79✔
694
                // Trim digits to requested precision
695
                if (digits.size() > prec) {
79✔
NEW
696
                        digits.resize(prec);
×
697
                }
698
                num_digits = static_cast<int>(digits.size());
79✔
699

700
                // Determine format mode
701
                // automatic: use scientific when the exponent would produce more than
702
                //            ndigits leading/trailing zeros, otherwise use fixed
703
                if (mode == FmtMode::automatic) {
79✔
704
                        if (decimal_pos > static_cast<int>(ndigits) || decimal_pos < -static_cast<int>(ndigits / 2)) {
79✔
705
                                mode = FmtMode::scientific;
35✔
706
                        }
707
                        else {
708
                                mode = FmtMode::fixed;
44✔
709
                        }
710
                }
711

712
                std::string result;
79✔
713
                if (s) result = "-";
79✔
714

715
                if (mode == FmtMode::scientific) {
79✔
716
                        // Scientific notation: d.ddd...e+/-NNN
717
                        result += digits[0];
35✔
718
                        if (num_digits > 1) {
35✔
719
                                result += '.';
18✔
720
                                result += digits.substr(1);
18✔
721
                        }
722
                        // exponent = decimal_pos - 1 (since we placed decimal after first digit)
723
                        int sci_exp = decimal_pos - 1;
35✔
724
                        result += 'e';
35✔
725
                        if (sci_exp >= 0) {
35✔
726
                                result += '+';
17✔
727
                        }
728
                        result += std::to_string(sci_exp);
35✔
729
                }
730
                else {
731
                        // Fixed notation
732
                        if (decimal_pos <= 0) {
44✔
733
                                // value < 1: 0.000...digits
734
                                result += "0.";
9✔
735
                                for (int i = 0; i < -decimal_pos; ++i) result += '0';
9✔
736
                                result += digits;
9✔
737
                        }
738
                        else if (decimal_pos >= num_digits) {
35✔
739
                                // integer value
740
                                result += digits;
27✔
741
                                for (int i = 0; i < decimal_pos - num_digits; ++i) result += '0';
44✔
742
                                result += ".0";
27✔
743
                        }
744
                        else {
745
                                // mixed: some digits before and after decimal
746
                                result += digits.substr(0, static_cast<size_t>(decimal_pos));
8✔
747
                                result += '.';
8✔
748
                                result += digits.substr(static_cast<size_t>(decimal_pos));
8✔
749
                        }
750
                }
751

752
                return result;
79✔
753
        }
79✔
754

755
        ///////////////////////////////////////////////////////////////////
756
        // Bit access (public for free functions like to_binary, color_print)
757
        bool getbit(unsigned pos) const noexcept {
60,231✔
758
                if (pos >= nbits) return false;
60,231✔
759
                return _encoding.at(pos);
60,231✔
760
        }
761

762
        ///////////////////////////////////////////////////////////////////
763
        // Unpacking / Packing helpers (public for testing)
764

765
        // Unpack the dfloat into sign, unbiased exponent, and significand integer
766
        void unpack(bool& s, int& exponent, significand_t& significand) const noexcept {
1,174✔
767
                s = sign();
1,174✔
768
                if (iszero()) { exponent = 0; significand = 0; return; }
1,174✔
769
                if (isinf() || isnan()) { exponent = 0; significand = 0; return; }
1,156✔
770

771
                // Extract combination field (5 bits)
772
                unsigned combStart = nbits - 2;
1,145✔
773
                bool a = getbit(combStart);
1,145✔
774
                bool b = getbit(combStart - 1);
1,145✔
775
                bool c = getbit(combStart - 2);
1,145✔
776
                bool d = getbit(combStart - 3);
1,145✔
777
                bool e_bit = getbit(combStart - 4);
1,145✔
778

779
                unsigned exp_msbs;
780
                unsigned msd; // most significant digit
781

782
                if (!(a && b)) {
1,145✔
783
                        // ab != 11: exp MSBs = ab, MSD = 0cde
784
                        exp_msbs = (a ? 2u : 0u) + (b ? 1u : 0u);
1,115✔
785
                        msd = (c ? 4u : 0u) + (d ? 2u : 0u) + (e_bit ? 1u : 0u);
1,115✔
786
                }
787
                else {
788
                        // ab == 11, c determines large digit vs special
789
                        // cd are exp MSBs, MSD = 100e (digit 8 or 9)
790
                        exp_msbs = (c ? 2u : 0u) + (d ? 1u : 0u);
30✔
791
                        msd = 8u + (e_bit ? 1u : 0u);
30✔
792
                }
793

794
                // Extract exponent continuation (es bits after combination field)
795
                unsigned exp_cont = 0;
1,145✔
796
                unsigned bitpos = nbits - 1 - 1 - combBits; // first bit of exponent continuation
1,145✔
797
                for (unsigned i = 0; i < es; ++i) {
8,849✔
798
                        if (getbit(bitpos - i)) {
7,704✔
799
                                exp_cont |= (1u << (es - 1 - i));
3,476✔
800
                        }
801
                }
802

803
                unsigned biased_exp = (exp_msbs << es) | exp_cont;
1,145✔
804
                exponent = static_cast<int>(biased_exp) - bias;
1,145✔
805

806
                // Extract trailing significand (t bits) using blockbinary
807
                if constexpr (encoding == DecimalEncoding::BID) {
808
                        // Read trailing bits directly from encoding into a significand_t
809
                        significand_t trailing(0);
1,048✔
810
                        for (unsigned i = 0; i < t; ++i) {
30,768✔
811
                                if (getbit(i)) trailing.setbit(i, true);
29,720✔
812
                        }
813
                        significand = significand_t(static_cast<long long>(msd)) * pow10_s(ndigits - 1) + trailing;
1,048✔
814
                }
815
                else {
816
                        // DPD: decode declets from trailing bits
817
                        significand = dpd_decode_trailing_wide(msd);
97✔
818
                }
819
        }
820

821
protected:
822
        encoding_t _encoding;
823

824
        ///////////////////////////////////////////////////////////////////
825
        // Bit manipulation helpers
826
        void setbit(unsigned pos, bool value) noexcept {
36,548✔
827
                if (pos >= nbits) return;
36,548✔
828
                _encoding.setbit(pos, value);
36,548✔
829
        }
830

831
        ///////////////////////////////////////////////////////////////////
832
        // Pack sign, unbiased exponent, and significand into the dfloat encoding
833
        void pack(bool s, int exponent, const significand_t& significand) noexcept {
820✔
834
                clear();
820✔
835
                if (significand.iszero()) return; // zero
820✔
836

837
                // Determine MSD and trailing
838
                significand_t msd_val = significand / pow10_s(ndigits - 1);
820✔
839
                unsigned msd = static_cast<unsigned>(static_cast<long long>(msd_val));
820✔
840

841
                unsigned biased_exp = static_cast<unsigned>(exponent + bias);
820✔
842

843
                // Encode sign
844
                setbit(nbits - 1, s);
820✔
845

846
                // Encode combination field
847
                unsigned exp_msbs = (biased_exp >> es) & 0x3u;
820✔
848
                unsigned combStart = nbits - 2;
820✔
849

850
                if (msd < 8) {
820✔
851
                        setbit(combStart,     (exp_msbs >> 1) & 1);
795✔
852
                        setbit(combStart - 1, exp_msbs & 1);
795✔
853
                        setbit(combStart - 2, (msd >> 2) & 1);
795✔
854
                        setbit(combStart - 3, (msd >> 1) & 1);
795✔
855
                        setbit(combStart - 4, msd & 1);
795✔
856
                }
857
                else {
858
                        setbit(combStart,     true);
25✔
859
                        setbit(combStart - 1, true);
25✔
860
                        setbit(combStart - 2, (exp_msbs >> 1) & 1);
25✔
861
                        setbit(combStart - 3, exp_msbs & 1);
25✔
862
                        setbit(combStart - 4, msd & 1);
25✔
863
                }
864

865
                // Encode exponent continuation (es bits)
866
                unsigned exp_cont = biased_exp & ((1u << es) - 1u);
820✔
867
                unsigned bitpos = nbits - 1 - 1 - combBits;
820✔
868
                for (unsigned i = 0; i < es; ++i) {
6,362✔
869
                        setbit(bitpos - i, (exp_cont >> (es - 1 - i)) & 1);
5,542✔
870
                }
871

872
                // Encode trailing significand (t bits)
873
                if constexpr (encoding == DecimalEncoding::BID) {
874
                        significand_t trailing = significand % pow10_s(ndigits - 1);
747✔
875
                        // Extract bits from blockbinary significand_t and write into encoding
876
                        for (unsigned i = 0; i < t; ++i) {
22,227✔
877
                                setbit(i, trailing.at(i));
21,480✔
878
                        }
879
                }
880
                else {
881
                        // DPD encoding: encode and write declets directly into bits
882
                        dpd_encode_trailing_wide(significand);
73✔
883
                }
884
        }
885

886
        ///////////////////////////////////////////////////////////////////
887
        // Normalize significand to ndigits and pack
888
        void normalize_and_pack(bool s, int exponent, significand_t significand) noexcept {
802✔
889
                if (significand.iszero()) { setzero(); if (s) setsign(true); return; }
802✔
890

891
                // Normalize: ensure significand has exactly ndigits digits
892
                significand_t ten(10);
783✔
893
                unsigned digits = count_digits_s(significand);
783✔
894
                while (digits > ndigits) {
799✔
895
                        significand /= ten;
16✔
896
                        exponent++;
16✔
897
                        digits--;
16✔
898
                }
899
                // No need to scale up - smaller significands are valid
900

901
                // Check for overflow/underflow
902
                if (exponent > emax) {
783✔
903
                        setinf(s);
×
904
                        return;
×
905
                }
906
                if (exponent < emin) {
783✔
907
                        // underflow to zero
908
                        setzero();
×
909
                        if (s) setsign(true);
×
910
                        return;
×
911
                }
912

913
                pack(s, exponent, significand);
783✔
914
        }
915

916
        ///////////////////////////////////////////////////////////////////
917
        // DPD encode/decode helpers (unified for all widths)
918

919
        // DPD decode: read declets directly from encoding bits
920
        significand_t dpd_decode_trailing_wide(unsigned msd) const noexcept {
97✔
921
                significand_t result(0);
97✔
922
                significand_t multiplier(1);
97✔
923
                significand_t thousand(1000);
97✔
924
                unsigned remaining = ndigits - 1;
97✔
925
                unsigned bit_offset = 0;
97✔
926

927
                while (remaining >= 3) {
666✔
928
                        // Read 10-bit declet from bit_offset
929
                        uint16_t declet = 0;
569✔
930
                        for (unsigned b = 0; b < 10; ++b) {
6,259✔
931
                                if (getbit(bit_offset + b)) declet |= static_cast<uint16_t>(1u << b);
5,690✔
932
                        }
933
                        unsigned value = dpd_decode(declet);
569✔
934
                        result += significand_t(static_cast<long long>(value)) * multiplier;
569✔
935
                        multiplier *= thousand;
569✔
936
                        bit_offset += 10;
569✔
937
                        remaining -= 3;
569✔
938
                }
939

940
                return significand_t(static_cast<long long>(msd)) * pow10_s(ndigits - 1) + result;
97✔
941
        }
942

943
        // DPD encode: write declets directly into encoding bits
944
        void dpd_encode_trailing_wide(const significand_t& significand) noexcept {
73✔
945
                significand_t msd_factor = pow10_s(ndigits - 1);
73✔
946
                significand_t trailing_val = significand % msd_factor;
73✔
947
                significand_t thousand(1000);
73✔
948
                unsigned remaining = ndigits - 1;
73✔
949
                unsigned bit_offset = 0;
73✔
950

951
                while (remaining >= 3) {
498✔
952
                        significand_t group_bb = trailing_val % thousand;
425✔
953
                        unsigned group = static_cast<unsigned>(static_cast<long long>(group_bb));
425✔
954
                        trailing_val /= thousand;
425✔
955
                        uint16_t declet = dpd_encode(group);
425✔
956
                        for (unsigned b = 0; b < 10; ++b) {
4,675✔
957
                                setbit(bit_offset + b, (declet >> b) & 1);
4,250✔
958
                        }
959
                        bit_offset += 10;
425✔
960
                        remaining -= 3;
425✔
961
                }
962
        }
73✔
963

964
        ///////////////////////////////////////////////////////////////////
965
        // Conversion helpers
966

967
        // Convert native IEEE-754 double to dfloat
968
        dfloat& convert_ieee754(double rhs) noexcept {
418✔
969
                if (std::isnan(rhs)) {
418✔
970
                        setnan(NAN_TYPE_QUIET);
×
971
                        return *this;
×
972
                }
973
                if (std::isinf(rhs)) {
418✔
974
                        setinf(rhs < 0);
×
975
                        return *this;
×
976
                }
977
                if (rhs == 0.0) {
418✔
978
                        setzero();
20✔
979
                        if (std::signbit(rhs)) setsign(true);
20✔
980
                        return *this;
20✔
981
                }
982

983
                bool negative = (rhs < 0);
398✔
984
                double abs_val = std::fabs(rhs);
398✔
985

986
                // Convert to decimal significand and exponent
987
                // Double has ~15-17 significant digits, so the significand from double
988
                // always fits in uint64_t regardless of ndigits.
989
                int dec_exp = 0;
398✔
990
                if (abs_val != 0.0) {
398✔
991
                        dec_exp = static_cast<int>(std::floor(std::log10(abs_val)));
398✔
992
                }
993

994
                // Scale to get min(ndigits, 17) significant digits (double precision limit)
995
                unsigned effective_digits = (ndigits < 17) ? ndigits : 17;
398✔
996
                int target_exp = dec_exp - static_cast<int>(effective_digits) + 1;
398✔
997
                double scaled = abs_val / std::pow(10.0, static_cast<double>(target_exp));
398✔
998
                uint64_t sig_narrow = static_cast<uint64_t>(std::round(scaled));
398✔
999

1000
                // Adjust if rounding pushed us over
1001
                uint64_t limit = pow10_64(effective_digits);
398✔
1002
                if (sig_narrow >= limit) {
398✔
1003
                        sig_narrow /= 10;
×
1004
                        target_exp++;
×
1005
                }
1006
                // Remove trailing zeros
1007
                while (sig_narrow > 0 && (sig_narrow % 10) == 0) {
3,195✔
1008
                        sig_narrow /= 10;
2,797✔
1009
                        target_exp++;
2,797✔
1010
                }
1011

1012
                normalize_and_pack(negative, target_exp, significand_t(static_cast<long long>(sig_narrow)));
398✔
1013
                return *this;
398✔
1014
        }
1015

1016
        // Convert dfloat to native IEEE-754 double
1017
        double convert_to_double() const noexcept {
389✔
1018
                if (isnan()) return std::numeric_limits<double>::quiet_NaN();
389✔
1019
                if (isinf()) return sign() ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
389✔
1020
                if (iszero()) return sign() ? -0.0 : 0.0;
389✔
1021

1022
                bool s; int e; significand_t sig;
1023
                unpack(s, e, sig);
356✔
1024

1025
                // value = (-1)^s * sig * 10^e
1026
                // For ndigits <= 19, sig fits in uint64_t; for wider, use string conversion
1027
                double sig_d;
1028
                if constexpr (sig_bits <= 64) {
1029
                        sig_d = static_cast<double>(static_cast<unsigned long long>(sig));
282✔
1030
                }
1031
                else {
1032
                        // Use the string representation for wide significands
1033
                        std::string sig_str = to_decimal(sig);
74✔
1034
                        sig_d = std::strtod(sig_str.c_str(), nullptr);
74✔
1035
                }
74✔
1036
                double result = sig_d * std::pow(10.0, static_cast<double>(e));
356✔
1037
                return s ? -result : result;
356✔
1038
        }
1039

1040
        dfloat& convert_signed(int64_t v) noexcept {
95✔
1041
                if (0 == v) {
95✔
1042
                        setzero();
11✔
1043
                        return *this;
11✔
1044
                }
1045
                bool negative = (v < 0);
84✔
1046
                uint64_t abs_v = static_cast<uint64_t>(negative ? -v : v);
84✔
1047

1048
                // Remove trailing zeros
1049
                int exponent = 0;
84✔
1050
                while (abs_v > 0 && (abs_v % 10) == 0) {
112✔
1051
                        abs_v /= 10;
28✔
1052
                        exponent++;
28✔
1053
                }
1054

1055
                normalize_and_pack(negative, exponent, significand_t(static_cast<long long>(abs_v)));
84✔
1056
                return *this;
84✔
1057
        }
1058

1059
        dfloat& convert_unsigned(uint64_t v) noexcept {
6✔
1060
                if (0 == v) {
6✔
1061
                        setzero();
1✔
1062
                        return *this;
1✔
1063
                }
1064

1065
                int exponent = 0;
5✔
1066
                while (v > 0 && (v % 10) == 0) {
9✔
1067
                        v /= 10;
4✔
1068
                        exponent++;
4✔
1069
                }
1070

1071
                normalize_and_pack(false, exponent, significand_t(static_cast<long long>(v)));
5✔
1072
                return *this;
5✔
1073
        }
1074

1075
private:
1076

1077
        // dfloat - dfloat logic comparisons
1078
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1079
        friend bool operator==(const dfloat<N, E, Enc, B>& lhs, const dfloat<N, E, Enc, B>& rhs);
1080

1081
        // dfloat - literal logic comparisons
1082
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1083
        friend bool operator==(const dfloat<N, E, Enc, B>& lhs, const double rhs);
1084

1085
        // literal - dfloat logic comparisons
1086
        template<unsigned N, unsigned E, DecimalEncoding Enc, typename B>
1087
        friend bool operator==(const double lhs, const dfloat<N, E, Enc, B>& rhs);
1088
};
1089

1090

1091
////////////////////////    helper functions   /////////////////////////////////
1092

1093
// divide dfloat a and b and return result argument
1094
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1095
void divide(const dfloat<ndigits, es, Encoding, BlockType>& a, const dfloat<ndigits, es, Encoding, BlockType>& b, dfloat<ndigits, es, Encoding, BlockType>& quotient) {
1096
        quotient = a;
1097
        quotient /= b;
1098
}
1099

1100
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1101
inline std::string to_binary(const dfloat<ndigits, es, Encoding, BlockType>& number, bool nibbleMarker = false) {
83✔
1102
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1103
        std::stringstream s;
83✔
1104

1105
        // sign bit
1106
        s << (number.sign() ? '1' : '0') << '.';
83✔
1107

1108
        // combination field (5 bits)
1109
        unsigned combStart = Dfloat::nbits - 2;
83✔
1110
        for (unsigned i = 0; i < Dfloat::combBits; ++i) {
498✔
1111
                s << (number.getbit(combStart - i) ? '1' : '0');
415✔
1112
        }
1113
        s << '.';
83✔
1114

1115
        // exponent continuation (es bits)
1116
        unsigned expStart = Dfloat::nbits - 1 - 1 - Dfloat::combBits;
83✔
1117
        for (unsigned i = 0; i < es; ++i) {
685✔
1118
                s << (number.getbit(expStart - i) ? '1' : '0');
602✔
1119
        }
1120
        s << '.';
83✔
1121

1122
        // trailing significand (t bits, MSB first)
1123
        for (int i = static_cast<int>(Dfloat::t) - 1; i >= 0; --i) {
3,303✔
1124
                s << (number.getbit(static_cast<unsigned>(i)) ? '1' : '0');
3,220✔
1125
                if (nibbleMarker && i > 0 && (i % 4 == 0)) s << '\'';
3,220✔
1126
        }
1127

1128
        return s.str();
166✔
1129
}
83✔
1130

1131
////////////////////////    DFLOAT functions   /////////////////////////////////
1132

1133
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1134
inline dfloat<ndigits, es, Encoding, BlockType> abs(const dfloat<ndigits, es, Encoding, BlockType>& a) {
1135
        dfloat<ndigits, es, Encoding, BlockType> result(a);
1136
        result.setsign(false);
1137
        return result;
1138
}
1139

1140
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1141
inline dfloat<ndigits, es, Encoding, BlockType> fabs(dfloat<ndigits, es, Encoding, BlockType> a) {
1142
        a.setsign(false);
1143
        return a;
1144
}
1145

1146

1147
////////////////////////  stream operators   /////////////////////////////////
1148

1149
// generate a dfloat format ASCII format
1150
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1151
inline std::ostream& operator<<(std::ostream& ostr, const dfloat<ndigits, es, Encoding, BlockType>& i) {
99✔
1152
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1153
        using FmtMode = typename Dfloat::FmtMode;
1154

1155
        std::streamsize prec = ostr.precision();
99✔
1156
        std::streamsize width = ostr.width();
99✔
1157
        std::ios_base::fmtflags ff = ostr.flags();
99✔
1158

1159
        // Map iostream format flags to dfloat FmtMode
1160
        FmtMode mode = FmtMode::automatic;
99✔
1161
        bool scientific = (ff & std::ios_base::scientific) == std::ios_base::scientific;
99✔
1162
        bool fixed      = (ff & std::ios_base::fixed) == std::ios_base::fixed;
99✔
1163
        if (scientific && !fixed) mode = FmtMode::scientific;
99✔
1164
        else if (fixed && !scientific) mode = FmtMode::fixed;
99✔
1165

1166
        // Default to ndigits precision so all stored digits are shown.
1167
        // The iostream default precision is 6, which would silently truncate
1168
        // exact decimal digits. Only use the stream precision when the user
1169
        // has explicitly set scientific or fixed mode.
1170
        size_t effective_prec = (scientific || fixed)
99✔
1171
                ? static_cast<size_t>(prec)
198✔
1172
                : 0;  // 0 tells str() to use ndigits
1173

1174
        std::string representation = i.str(effective_prec, mode);
99✔
1175

1176
        // Handle setw and alignment
1177
        std::streamsize repWidth = static_cast<std::streamsize>(representation.size());
99✔
1178
        if (width > repWidth) {
99✔
1179
                std::streamsize diff = width - repWidth;
16✔
1180
                char fill = ostr.fill();
16✔
1181
                if ((ff & std::ios_base::left) == std::ios_base::left) {
16✔
NEW
1182
                        representation.append(static_cast<size_t>(diff), fill);
×
1183
                }
1184
                else {
1185
                        representation.insert(0, static_cast<size_t>(diff), fill);
16✔
1186
                }
1187
        }
1188

1189
        return ostr << representation;
198✔
1190
}
99✔
1191

1192
// read an ASCII dfloat format
1193
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1194
inline std::istream& operator>>(std::istream& istr, dfloat<ndigits, es, Encoding, BlockType>& p) {
1195
        std::string txt;
1196
        istr >> txt;
1197
        if (!parse(txt, p)) {
1198
                std::cerr << "unable to parse -" << txt << "- into a dfloat value\n";
1199
        }
1200
        return istr;
1201
}
1202

1203
////////////////// string operators
1204

1205
// read a dfloat ASCII format and make a dfloat out of it
1206
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1207
bool parse(const std::string& number, dfloat<ndigits, es, Encoding, BlockType>& value) {
1208
        if (number.empty()) return false;
1209
        value.assign(number);
1210
        return true;
1211
}
1212

1213

1214
//////////////////////////////////////////////////////////////////////////////////////////////////////
1215
// dfloat - dfloat binary logic operators
1216

1217
// equal: precondition is that the storage is properly nulled in all arithmetic paths
1218
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1219
inline bool operator==(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
11✔
1220
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1221
        // NaN != anything (including itself)
1222
        if (lhs.isnan() || rhs.isnan()) return false;
11✔
1223
        // both zero (ignoring sign)
1224
        if (lhs.iszero() && rhs.iszero()) return true;
10✔
1225
        // compare unpacked values
1226
        bool ls, rs; int le, re;
1227
        typename Dfloat::significand_t lsig, rsig;
1228
        lhs.unpack(ls, le, lsig);
9✔
1229
        rhs.unpack(rs, re, rsig);
9✔
1230
        return (ls == rs) && (le == re) && (lsig == rsig);
9✔
1231
}
1232

1233
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1234
inline bool operator!=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1235
        return !operator==(lhs, rhs);
3✔
1236
}
1237

1238
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1239
inline bool operator< (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
19✔
1240
        using Dfloat = dfloat<ndigits, es, Encoding, BlockType>;
1241
        // NaN is unordered
1242
        if (lhs.isnan() || rhs.isnan()) return false;
19✔
1243
        // handle infinities
1244
        if (lhs.isinf() && rhs.isinf()) {
19✔
1245
                return lhs.sign() && !rhs.sign(); // -inf < +inf
×
1246
        }
1247
        if (lhs.isinf()) return lhs.sign();  // -inf < anything
19✔
1248
        if (rhs.isinf()) return !rhs.sign(); // anything < +inf
19✔
1249

1250
        // handle zeros
1251
        if (lhs.iszero() && rhs.iszero()) return false;
19✔
1252
        if (lhs.iszero()) return !rhs.sign(); // 0 < positive
19✔
1253
        if (rhs.iszero()) return lhs.sign();  // negative < 0
18✔
1254

1255
        // both nonzero, non-special
1256
        bool ls = lhs.sign(), rs = rhs.sign();
17✔
1257
        if (ls != rs) return ls; // negative < positive
17✔
1258

1259
        // same sign: compare magnitudes
1260
        bool ls_ign; int le; typename Dfloat::significand_t lsig;
1261
        bool rs_ign; int re; typename Dfloat::significand_t rsig;
1262
        lhs.unpack(ls_ign, le, lsig);
15✔
1263
        rhs.unpack(rs_ign, re, rsig);
15✔
1264

1265
        // normalize to same scale for comparison
1266
        int l_scale = le + static_cast<int>(Dfloat::count_digits_s(lsig)) - 1;
15✔
1267
        int r_scale = re + static_cast<int>(Dfloat::count_digits_s(rsig)) - 1;
15✔
1268

1269
        if (l_scale != r_scale) {
15✔
1270
                // higher scale means larger magnitude
1271
                return ls ? (l_scale > r_scale) : (l_scale < r_scale);
5✔
1272
        }
1273

1274
        // same overall scale: compare significands at same exponent
1275
        // Align to same exponent by adjusting significands
1276
        typename Dfloat::significand_t ten(10);
10✔
1277
        if (le < re) {
10✔
1278
                int diff = re - le;
×
1279
                if (diff < static_cast<int>(ndigits)) {
×
NEW
1280
                        for (int i = 0; i < diff; ++i) rsig *= ten;
×
1281
                }
1282
        }
1283
        else if (re < le) {
10✔
1284
                int diff = le - re;
×
1285
                if (diff < static_cast<int>(ndigits)) {
×
NEW
1286
                        for (int i = 0; i < diff; ++i) lsig *= ten;
×
1287
                }
1288
        }
1289

1290
        return ls ? (lsig > rsig) : (lsig < rsig);
10✔
1291
}
1292

1293
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1294
inline bool operator> (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1295
        return operator< (rhs, lhs);
3✔
1296
}
1297

1298
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1299
inline bool operator<=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1300
        return operator< (lhs, rhs) || operator==(lhs, rhs);
3✔
1301
}
1302

1303
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1304
inline bool operator>=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
3✔
1305
        return !operator< (lhs, rhs);
3✔
1306
}
1307

1308
//////////////////////////////////////////////////////////////////////////////////////////////////////
1309
// dfloat - literal binary logic operators
1310
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1311
inline bool operator==(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1312
        return operator==(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1313
}
1314

1315
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1316
inline bool operator!=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1317
        return !operator==(lhs, rhs);
1318
}
1319

1320
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1321
inline bool operator< (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1322
        return operator<(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1323
}
1324

1325
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1326
inline bool operator> (const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1327
        return operator< (dfloat<ndigits, es, Encoding, BlockType>(rhs), lhs);
1328
}
1329

1330
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1331
inline bool operator<=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1332
        return operator< (lhs, rhs) || operator==(lhs, rhs);
1333
}
1334

1335
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1336
inline bool operator>=(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1337
        return !operator< (lhs, rhs);
1338
}
1339

1340
//////////////////////////////////////////////////////////////////////////////////////////////////////
1341
// literal - dfloat binary logic operators
1342
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1343
inline bool operator==(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1344
        return operator==(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1345
}
1346

1347
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1348
inline bool operator!=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1349
        return !operator==(lhs, rhs);
1350
}
1351

1352
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1353
inline bool operator< (const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1354
        return operator<(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1355
}
1356

1357
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1358
inline bool operator> (const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1359
        return operator< (rhs, lhs);
1360
}
1361

1362
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1363
inline bool operator<=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1364
        return operator< (lhs, rhs) || operator==(lhs, rhs);
1365
}
1366

1367
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1368
inline bool operator>=(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1369
        return !operator< (lhs, rhs);
1370
}
1371

1372
//////////////////////////////////////////////////////////////////////////////////////////////////////
1373
// dfloat - dfloat binary arithmetic operators
1374
// BINARY ADDITION
1375
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1376
inline dfloat<ndigits, es, Encoding, BlockType> operator+(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
125✔
1377
        dfloat<ndigits, es, Encoding, BlockType> sum(lhs);
125✔
1378
        sum += rhs;
125✔
1379
        return sum;
125✔
1380
}
1381
// BINARY SUBTRACTION
1382
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1383
inline dfloat<ndigits, es, Encoding, BlockType> operator-(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
63✔
1384
        dfloat<ndigits, es, Encoding, BlockType> diff(lhs);
63✔
1385
        diff -= rhs;
63✔
1386
        return diff;
63✔
1387
}
1388
// BINARY MULTIPLICATION
1389
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1390
inline dfloat<ndigits, es, Encoding, BlockType> operator*(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
99✔
1391
        dfloat<ndigits, es, Encoding, BlockType> mul(lhs);
99✔
1392
        mul *= rhs;
99✔
1393
        return mul;
99✔
1394
}
1395
// BINARY DIVISION
1396
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1397
inline dfloat<ndigits, es, Encoding, BlockType> operator/(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
25✔
1398
        dfloat<ndigits, es, Encoding, BlockType> ratio(lhs);
25✔
1399
        ratio /= rhs;
25✔
1400
        return ratio;
25✔
1401
}
1402

1403
//////////////////////////////////////////////////////////////////////////////////////////////////////
1404
// dfloat - literal binary arithmetic operators
1405
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1406
inline dfloat<ndigits, es, Encoding, BlockType> operator+(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1407
        return operator+(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1408
}
1409
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1410
inline dfloat<ndigits, es, Encoding, BlockType> operator-(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1411
        return operator-(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1412
}
1413
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1414
inline dfloat<ndigits, es, Encoding, BlockType> operator*(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1415
        return operator*(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1416
}
1417
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1418
inline dfloat<ndigits, es, Encoding, BlockType> operator/(const dfloat<ndigits, es, Encoding, BlockType>& lhs, const double rhs) {
1419
        return operator/(lhs, dfloat<ndigits, es, Encoding, BlockType>(rhs));
1420
}
1421

1422
//////////////////////////////////////////////////////////////////////////////////////////////////////
1423
// literal - dfloat binary arithmetic operators
1424
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1425
inline dfloat<ndigits, es, Encoding, BlockType> operator+(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1426
        return operator+(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1427
}
1428
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1429
inline dfloat<ndigits, es, Encoding, BlockType> operator-(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1430
        return operator-(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1431
}
1432
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1433
inline dfloat<ndigits, es, Encoding, BlockType> operator*(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1434
        return operator*(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1435
}
1436
template<unsigned ndigits, unsigned es, DecimalEncoding Encoding, typename BlockType>
1437
inline dfloat<ndigits, es, Encoding, BlockType> operator/(const double lhs, const dfloat<ndigits, es, Encoding, BlockType>& rhs) {
1438
        return operator/(dfloat<ndigits, es, Encoding, BlockType>(lhs), rhs);
1439
}
1440

1441
}} // namespace sw::universal
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc