• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

stillwater-sc / universal / 25280163148

03 May 2026 01:14PM UTC coverage: 84.172% (-0.02%) from 84.189%
25280163148

push

github

web-flow
feat(dd_cascade): full constexpr support across all operators (#797)

* feat(dd_cascade): full constexpr support across all operators

Promotes dd_cascade (double-double via floatcascade<2>) to fully
constexpr across construction, arithmetic (+, -, *, /, +=, -=, *=, /=),
comparison (==, !=, <, >, <=, >=), unary negation, and conversion-out
(operator int/long/long long/unsigned*/float/double).  Joins integer
(#720), posit (#718), cfloat (#719), bfloat16 (#725), fixpnt (#721),
lns (#776), areal (#792), dbns (#795), and dd (#796) as a fully
constexpr-compliant Universal type.

Per Epic #723.  This issue's structural challenge was that dd_cascade
arithmetic routes through floatcascade<2>'s expansion_ops layer, whose
generic add_cascades<N> and multiply_cascades<N> templates use
std::sort and std::vector (std::sort is not constexpr in C++20 -- it
becomes constexpr in C++26).  The plan: keep the generic templates
intact (they're still consumed by td_cascade and qd_cascade) and add
non-template constexpr overloads for floatcascade<2>.

floatcascade.hpp changes (expansion_ops namespace):
  - two_sum, fast_two_sum: is_constant_evaluated() dispatch.  Runtime
    branch retains the volatile intermediates (load-bearing for
    aggressive-optimization correctness per
    floatcascade_volatile_hardening.md).  Constexpr branch drops
    volatile (constexpr arithmetic is exact under IEEE-754 already).
  - two_prod: same dispatch.  Constexpr branch uses split-based product
    (FMA-free, since std::fma is not constexpr in C++20); runtime
    keeps std::fma.
  - three_sum, three_sum2: marked constexpr (now-constexpr two_sum).
  - compress_4to2: marked constexpr (pure fast_two_sum chain).
  - renormalize<2> specialization: marked constexpr; std::isinf
    replaced with sw::universal::is_inf_cx (constexpr from #796).
  - **Added constexpr non-template overloads** for floatcascade<2>:
      add_cascades(floatcascade<2>, floatcascade<2>) -> floatcascade<4>
      multiply_c... (continued)

122 of 146 new or added lines in 2 files covered. (83.56%)

15 existing lines in 3 files now uncovered.

45809 of 54423 relevant lines covered (84.17%)

6494008.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.77
/include/sw/universal/internal/floatcascade/floatcascade.hpp
1
#pragma once
2
// floatcascade.hpp: implementation of a multi-component floating-point number system
3
//
4
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
5
// SPDX-License-Identifier: MIT
6
//
7
// This file is part of the universal numbers project, which is released under an MIT Open Source license.
8
#include <algorithm>
9
#include <array>
10
#include <bit>
11
#include <cmath>
12
#include <cstdint>
13
#include <iostream>
14
#include <type_traits>
15
#include <vector>
16

17
// supporting types and functions
18
#include <universal/utility/bit_cast.hpp>
19
#include <universal/native/ieee754.hpp>
20
#include <universal/numerics/error_free_ops.hpp>
21
#include <universal/number/shared/nan_encoding.hpp>
22
#include <universal/number/shared/infinite_encoding.hpp>
23
#include <universal/number/shared/specific_value_encoding.hpp>
24

25
namespace sw::universal {
26

27
// floatcascade: The fundamental building block for multi-component Real approximations
28
template<size_t N>
29
class floatcascade {
30
private:
31
    // Components stored in DECREASING order of magnitude: e[0] >= e[1] >= ... >= e[N-1]  
32
    // Most significant component at e[0], least significant at e[N-1]
33
    // Actual value = e[0] + e[1] + ... + e[N-1]
34
    // evaluation order = e[N-1] + e[N-2] + ... + e[0] to capture any non-trivial tail components
35
    std::array<double, N> e;
36

37
public:
38
    // Constructors
39
    constexpr floatcascade() : e{} {
724,276✔
40
        for (size_t i = 0; i < N; ++i) e[i] = 0.0;
2,931,650✔
41
    }
724,276✔
42
    
43
    explicit constexpr floatcascade(double x) : e{} {
50,510✔
44
        e[0] = x;
50,510✔
45
        for (size_t i = 1; i < N; ++i) e[i] = 0.0;
137,088✔
46
    }
50,510✔
47

48
    // Constructor from array of components
49
    explicit constexpr floatcascade(const std::array<double, N>& components) : e(components) {}
1,809✔
50

51
    // Constructor from smaller cascade (zero-extend)
52
    template<size_t M>
53
    explicit floatcascade(const floatcascade<M>& other) : e{} {
54
        static_assert(M <= N, "Cannot construct larger cascade from smaller one automatically");
55
        for (size_t i = 0; i < M; ++i) e[i] = other[i];
56
        for (size_t i = M; i < N; ++i) e[i] = 0.0;
57
    }
58

59
    // Assignment from smaller cascade
60
    template<size_t M>
61
    floatcascade& operator=(const floatcascade<M>& other) {
62
        static_assert(M <= N, "Cannot assign larger cascade to smaller one automatically");
63
        for (size_t i = 0; i < M; ++i) e[i] = other[i];
64
        for (size_t i = M; i < N; ++i) e[i] = 0.0;
65
        return *this;
66
    }
67

68
    // Component access
69
    constexpr double operator[](size_t i) const noexcept { return e[i]; }
33,780,874✔
70
    constexpr double& operator[](size_t i) { return e[i]; }
50,970,858✔
71
    
72
    // modifiers
73
    constexpr void clear() {
8✔
74
        for (size_t i = 0; i < N; ++i) e[i] = 0.0;
32✔
75
    }
8✔
76
    constexpr void set(double x) {
76✔
77
        e[0] = x;
76✔
78
        for (size_t i = 1; i < N; ++i) e[i] = 0.0;
224✔
79
    }
76✔
80
    constexpr void set(const std::array<double, N>& components) {
81
        e = components;
82
        }
83

84
    // selectors
85
    constexpr size_t size() const noexcept { return N; }
2✔
86
    const std::array<double, N>& data() const noexcept { return e; }
87
    std::array<double, N>& data() { return e; }
1✔
88

89
    constexpr bool iszero()   const noexcept { return testFirstComponent(0.0); }
59,220✔
90
    constexpr bool isone()    const noexcept { return testFirstComponent(1.0); }
37,843✔
91
    // std::signbit is not constexpr in C++20; use bit_cast at compile time
92
    // to preserve the IEEE-754 sign bit (so -0.0 and negative NaNs are
93
    // correctly classified).
94
        BIT_CAST_CONSTEXPR bool ispos()    const noexcept {
95
        if (std::is_constant_evaluated()) {
96
            return (std::bit_cast<std::uint64_t>(e[0]) >> 63) == 0;
97
        }
98
        return !std::signbit(e[0]);
99
    }
100
    BIT_CAST_CONSTEXPR bool isneg()    const noexcept {
148,408✔
101
        if (std::is_constant_evaluated()) {
148,408✔
NEW
102
            return (std::bit_cast<std::uint64_t>(e[0]) >> 63) != 0;
×
103
        }
104
        return std::signbit(e[0]);
148,408✔
105
    }
106
    constexpr bool isnan(int NaNType = NAN_TYPE_EITHER)  const noexcept {
3,411✔
107
        bool negative = isneg();
3,411✔
108
        int nan_type;
109
        bool isNaN = checkNaN(e[0], nan_type);
3,411✔
110
        bool isNegNaN = isNaN && negative;
3,411✔
111
        bool isPosNaN = isNaN && !negative;
3,411✔
112
        return (NaNType == NAN_TYPE_EITHER ? (isNegNaN || isPosNaN) :
3,415✔
113
            (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN :
6✔
114
                (NaNType == NAN_TYPE_QUIET ? isPosNaN : false)));
3,415✔
115
    }
4✔
116
    constexpr bool isinf(int InfType = INF_TYPE_EITHER)  const noexcept {
2,116✔
117
        bool negative = isneg();
2,116✔
118
        int inf_type;
119
        bool isInf = checkInf(e[0], inf_type);
2,116✔
120
        bool isNegInf = isInf && negative;
2,116✔
121
        bool isPosInf = isInf && !negative;
2,116✔
122
        return (InfType == INF_TYPE_EITHER ? (isNegInf || isPosInf) :
2,120✔
123
            (InfType == INF_TYPE_NEGATIVE ? isNegInf :
6✔
124
                (InfType == INF_TYPE_POSITIVE ? isPosInf : false)));
2,120✔
125
        }  
4✔
126

127

128

129
    // Conversion to double (estimate)
130
    constexpr double to_double() const {
160,245✔
131
        double sum{ 0 };
160,245✔
132
        if constexpr (1 == N) {
133
            sum = e[0];
134
        }
135
        else if constexpr (2 == N) {
136
            sum = e[0] + e[1];
7✔
137
        }
138
        else if constexpr (3 == N) {
139
            sum  = e[2] + e[1];
63,732✔
140
            sum += e[0];
63,732✔
141
        }
142
        else if constexpr (4 == N) {
143
            double l = e[3] + e[2];
96,506✔
144
            double r = e[1] + e[0];
96,506✔
145
                        sum = l + r;
96,506✔
146
        }
147
        else if constexpr (5 == N) {
148
            double l = e[4] + e[3] + e[2];
149
            double r = e[1] + e[0];
150
            sum = l + r;
151
        }
152
        else if constexpr (6 == N) {
153
            double p1 = e[5] + e[4];
154
            double p2 = e[3] + e[2];
155
                        p2 += p1;
156
            p1 = e[1] + e[0];
157
            sum = p1 + p2;
158
        }
159
        else {
160
            // general case
161
                        sum = e[N - 1] + e[N - 2];
162
            for (size_t i = 2; i < N; ++i) {
163
                sum += e[N - 1 - i];
164
            }
165
        }
166
        return sum;
160,245✔
167
    }
168

169
    // Basic properties
170

171
    constexpr int sign() const noexcept {
9,396✔
172
                return (e[0] > 0.0) ? 1 : ((e[0] < 0.0) ? -1 : 0);
9,396✔
173
    }
174

175
    constexpr int scale() const noexcept {
176
        return sw::universal::scale(e[0]); 
177
    }
178

179
    // Decimal conversion methods
180
    // Declaration only - implementations after expansion_ops namespace
181
    void to_digits(std::vector<char>& s, int& exponent, int precision) const;
182

183
    std::string to_string(
184
        std::streamsize precision = 7,
185
        std::streamsize width = 15,
186
        bool fixed = false,
187
        bool scientific = true,
188
        bool internal = false,
189
        bool left = false,
190
        bool showpos = false,
191
        bool uppercase = false,
192
        char fill = ' '
193
    ) const;
194

195
    // Parse a decimal string into this floatcascade
196
    // Returns true on success, false on parse error
197
    bool parse(const std::string& str);
198

199
    // Arithmetic operators (implementations after expansion_ops namespace)
200
    floatcascade& operator+=(const floatcascade& rhs) noexcept;
201
    floatcascade& operator-=(const floatcascade& rhs) noexcept;
202
    floatcascade& operator*=(const floatcascade& rhs) noexcept;
203
    floatcascade& operator/=(const floatcascade& rhs) noexcept;
204

205
    // Arithmetic operators with double
206
    floatcascade& operator+=(double rhs) noexcept { return *this += floatcascade(rhs); }
13,161✔
207
    floatcascade& operator-=(double rhs) noexcept { return *this -= floatcascade(rhs); }
208
    floatcascade& operator*=(double rhs) noexcept { return *this *= floatcascade(rhs); }
13,161✔
209
    floatcascade& operator/=(double rhs) noexcept { return *this /= floatcascade(rhs); }
210

211
    // Debug output
212
        template<size_t M>
213
    friend std::string to_tuple(const floatcascade<M>& fc);
214

215
    friend std::ostream& operator<<(std::ostream& os, const floatcascade& fc) {
13✔
216
        os << "floatcascade<" << N << ">[";
13✔
217
        for (size_t i = 0; i < N; ++i) {
65✔
218
            if (i > 0) os << ", ";
52✔
219
            os << fc.e[i];
52✔
220
        }
221
        os << "] ~ " << fc.to_double();
13✔
222
        return os;
13✔
223
    }
224

225
private:
226

227
    // Helper methods for to_digits
228

229
    void round_string(std::vector<char>& s, int precision, int* decimalPoint) const {
×
230
        int nrDigits = precision;
×
231
        // round decimal string and propagate carry
232
        int lastDigit = nrDigits - 1;
×
233
        if (s[static_cast<unsigned>(lastDigit)] >= '5') {
×
234
            int i = nrDigits - 2;
×
235
            s[static_cast<unsigned>(i)]++;
×
236
            while (i > 0 && s[static_cast<unsigned>(i)] > '9') {
×
237
                s[static_cast<unsigned>(i)] -= 10;
×
238
                s[static_cast<unsigned>(--i)]++;
×
239
            }
240
        }
241

242
        // if first digit is 10, shift everything
243
        if (s[0] > '9') {
×
244
            for (int i = precision; i >= 2; --i)
×
245
                s[static_cast<unsigned>(i)] = s[static_cast<unsigned>(i - 1)];
×
246
            s[0u] = '1';
×
247
            s[1u] = '0';
×
248

249
            (*decimalPoint)++;  // increment decimal point
×
250
            ++precision;
×
251
        }
252
    }
×
253

254
    void append_exponent(std::string& str, int exp) const {
1,051✔
255
        str += (exp < 0 ? '-' : '+');
1,051✔
256
        exp = std::abs(exp);
1,051✔
257
        int k;
258
        if (exp >= 100) {
1,051✔
259
            k = (exp / 100);
65✔
260
            str += static_cast<char>('0' + k);
65✔
261
            exp -= 100 * k;
65✔
262
        }
263

264
        k = (exp / 10);
1,051✔
265
        str += static_cast<char>('0' + k);
1,051✔
266
        exp -= 10 * k;
1,051✔
267

268
        str += static_cast<char>('0' + exp);
1,051✔
269
    }
1,051✔
270

271
    constexpr bool testFirstComponent(double v) const noexcept {
97,063✔
272
        if constexpr (2 == N) {
273
            return e[0] == v && e[1] == 0.0;
30,947✔
274
        }
275
        else if constexpr (3 == N) {
276
            return e[0] == v && e[1] == 0.0 && e[2] == 0.0;
30,522✔
277
        }
278
        else if constexpr (4 == N) {
279
            return e[0] == v && e[1] == 0.0 && e[2] == 0.0 && e[3] == 0.0;
35,593✔
280
        }
281
        else if constexpr (5 == N) {
282
            return e[0] == v && e[1] == 0.0 && e[2] == 0.0 && e[3] == 0.0 && e[4] == 0.0;
283
        }
284
        else if constexpr (6 == N) {
285
            return e[0] == v && e[1] == 0.0 && e[2] == 0.0 && e[3] == 0.0 && e[4] == 0.0 && e[5] == 0.0;
286
        }
287
        else {
288
            // general case
289
            if (e[0] != v) return false;
1✔
290
            for (size_t i = 1; i < N; ++i) {
×
291
                if (e[i] != 0.0) return false;
×
292
            }
293
            return true;
×
294
        }
295
    }
296

297
};
298

299
template<size_t N>
300
std::string to_tuple(const floatcascade<N>& fc) {
5✔
301
    std::stringstream ss;
5✔
302
        ss << std::scientific;
5✔
303
        //ss.setprecision(17); // max precision of double
304
    ss << "{ ";
5✔
305
    for (size_t i = 0; i < N; ++i) {
19✔
306
        if (i > 0) ss << ", ";
14✔
307
        ss << fc.e[i];
14✔
308
    }
309
    ss << '}';
5✔
310
    return ss.str();
10✔
311
}
5✔
312

313
template<size_t N>
314
std::string to_scientific(const floatcascade<N>& fc,
2✔
315
    int precision = N * 17,
316
    bool showpos = false,
317
    bool uppercase = false,
318
    bool trailing_zeros = true) {
319
    // precondition: fc is a normalized floatcascade
320

321
    // Handle special cases early
322
    if (fc.isnan(NAN_TYPE_QUIET)) return std::string("qNaN");
2✔
323
    if (fc.isnan(NAN_TYPE_SIGNALLING)) return std::string("sNaN");
2✔
324
    if (fc.isinf(INF_TYPE_POSITIVE)) return std::string("Inf");
2✔
325
    if (fc.isinf(INF_TYPE_NEGATIVE)) return std::string("-Inf");
2✔
326
    if (fc.iszero()) return std::string(showpos ? "+0.0e+0" : "0.0e+0");
2✔
327

328
    // Step 1: Estimate exponent from the most significant non-zero component
329
    double hi = fc[0];
2✔
330
    int exp10 = static_cast<int>(std::floor(std::log10(std::abs(hi))));
2✔
331
    if (!std::isfinite(exp10)) exp10 = 0; // handle log10(0) = -inf case
2✔
332
    double scale = std::pow(10.0, -exp10);
2✔
333

334
        // Step 2: Scale all components so we are in the range [0.0, 10.0)
335
    double scaled[N];
336
    for (size_t i = 0; i < N; ++i) {
6✔
337
        scaled[i] = fc[i] * scale;
4✔
338
    }
339
    double acc = 0.0;
2✔
340
    for (size_t i = 0; (i < N) && (i < 3); ++i) acc += scaled[i];
6✔
341
        bool negative = std::signbit(acc);
2✔
342
        acc = std::abs(acc);
2✔
343

344
    // Step 3: Generate digits iteratively
345
    std::string digits;
2✔
346
        digits.reserve(static_cast<size_t>(precision) + 2); // leading digit + precision digits
2✔
347

348
    for (int i = 0; i <= precision; ++i) {
72✔
349
        int digit = static_cast<int>(acc);
70✔
350
                if (digit > 9) digit = 9; // defensive clamp
70✔
351
        digits.push_back(static_cast<char>('0' + digit));
70✔
352
        acc = (acc - digit) * 10.0;
70✔
353
    }
354

355
        // Step 4: Round last digit and propagate carry (with exponent adjustment)
356
    if (acc >= 5.0) {
2✔
357
                int i = precision;
2✔
358
        for (; i >= 0 && digits[i] == '9'; --i) {
2✔
359
                        digits[i] = '0';
×
360
        }
361
        if (i >= 0) {
2✔
362
            digits[i] += 1; // increment this digit
2✔
363
        }
364
        else {
365
                        // overflowed the leading digit: 9.99... 9 -> 10.0...0
366
            digits.insert(digits.begin(), '1');
×
367
            exp10 += 1;
×
368
                }
369
    }
370

371
    // Step 5: Format string
372
    std::string result;
2✔
373
    if (negative) result += '-';
2✔
374
    else if (showpos) result += '+';
2✔
375

376
    result += digits[0]; // leading digit
2✔
377
    result += '.';
2✔
378
    if (precision > 0) {
2✔
379
        result.append(digits.begin() + 1, digits.begin() + 1 + precision);
2✔
380
        if (trailing_zeros) {
2✔
381
            // digits already has length precision + 1
382
        }
383
        else {
384
                        // trim trailing zeros after decimal point
385
            while (!result.empty() && result.back() == '0') {
×
386
                result.pop_back();
×
387
                        }
388
            if (!result.empty() && result.back() == '.') {
×
389
                result.pop_back(); // remove decimal point if no digits follow
×
390
                        }
391
        }
392
    }
393
    else {
394
                result += '0'; // no precision requested, just add a zero
×
395
    }
396

397
    result += uppercase ? "E" : "e";
2✔
398
    result += (exp10 >= 0 ? "+" : "-");
2✔
399
    result += std::to_string(std::abs(exp10));
2✔
400

401
    return result;
2✔
402
}
2✔
403

404

405
// Core expansion operations - the "engine" for all cascade operations
406
namespace expansion_ops {
407

408
    // Knuth's TWO-SUM: computes a + b = x + y exactly
409
    // Volatile intermediates prevent compiler reordering at runtime; the
410
    // constexpr branch (selected during constant evaluation) drops volatile
411
    // since constexpr arithmetic is exact under IEEE-754 rules.
412
    constexpr inline void two_sum(double a, double b, double& x, double& y) {
41,781,518✔
413
        if (std::is_constant_evaluated()) {
41,781,518✔
NEW
414
            double vx = a + b;
×
NEW
415
            x = vx;
×
NEW
416
            double b_virtual = vx - a;
×
NEW
417
            double a_virtual = vx - b_virtual;
×
NEW
418
            double b_roundoff = b - b_virtual;
×
NEW
419
            double a_roundoff = a - a_virtual;
×
NEW
420
            double vy = a_roundoff + b_roundoff;
×
NEW
421
            y = vy;
×
422
        }
423
        else {
424
            volatile double vx = a + b;
41,781,518✔
425
            x = vx;
41,781,518✔
426
            volatile double b_virtual = vx - a;
41,781,518✔
427
            volatile double a_virtual = vx - b_virtual;
41,781,518✔
428
            volatile double b_roundoff = b - b_virtual;
41,781,518✔
429
            volatile double a_roundoff = a - a_virtual;
41,781,518✔
430
            volatile double vy = a_roundoff + b_roundoff;
41,781,518✔
431
            y = vy;
41,781,518✔
432
        }
433
    }
41,781,518✔
434

435
    // Dekker's FAST-TWO-SUM: assumes |a| >= |b|
436
    constexpr inline void fast_two_sum(double a, double b, double& x, double& y) {
6,866,620✔
437
        if (std::is_constant_evaluated()) {
6,866,620✔
NEW
438
            double vx = a + b;
×
NEW
439
            x = vx;
×
NEW
440
            double vy = b - (vx - a);
×
NEW
441
            y = vy;
×
442
        }
443
        else {
444
            volatile double vx = a + b;
6,866,620✔
445
            x = vx;
6,866,620✔
446
            volatile double vy = b - (vx - a);
6,866,620✔
447
            y = vy;
6,866,620✔
448
        }
449
    }
6,866,620✔
450

451
    // Add single double to N-component cascade, result in (N+1)-component cascade
452
    // Volatiles are handled inside two_sum, so locals don't need to be volatile
453
    template<size_t N>
454
    floatcascade<N+1> grow_expansion(const floatcascade<N>& e, double b) {
1✔
455
        floatcascade<N+1> result;
1✔
456
        double q = b;
1✔
457
        double h;
458

459
        // Process from least significant (end) to most significant (beginning)
460
        for (size_t i = N; i-- > 0; ) {
3✔
461
            two_sum(q, e[i], q, h);
2✔
462
            result[i + 1] = h;  // shift components right
2✔
463
        }
464
        result[0] = q;  // most significant component at [0]
1✔
465

466
        return result;
1✔
467
    }
468

469
    // Add two cascades of same size, result in double-size cascade
470
    template<size_t N>
471
    floatcascade<2 * N> add_cascades(const floatcascade<N>& a, const floatcascade<N>& b) {
599,351✔
472
        // Merge the two N-component cascades
473
        std::array<double, 2 * N> merged;
474
        std::array<double, 2 * N> magnitudes;
475

476
        // Collect all components and their magnitudes
477
        for (size_t i = 0; i < N; ++i) {
2,753,316✔
478
            merged[2 * i] = a[i];
2,153,965✔
479
            merged[2 * i + 1] = b[i];
2,153,965✔
480
            magnitudes[2 * i] = std::abs(a[i]);
2,153,965✔
481
            magnitudes[2 * i + 1] = std::abs(b[i]);
2,153,965✔
482
        }
483

484
        // Sort by magnitude - LARGEST FIRST for decreasing order
485
        for (size_t i = 0; i < 2 * N - 1; ++i) {
4,307,930✔
486
            for (size_t j = 0; j < 2 * N - 1 - i; ++j) {
17,325,700✔
487
                if (magnitudes[j] < magnitudes[j + 1]) {  // Changed: < instead of >
13,617,121✔
488
                    std::swap(merged[j], merged[j + 1]);
850,249✔
489
                    std::swap(magnitudes[j], magnitudes[j + 1]);
850,249✔
490
                }
491
            }
492
        }
493

494
        // Accumulate from smallest to largest (reverse order of sorted array)
495
        // Volatiles are handled inside two_sum, so locals don't need to be volatile
496
        floatcascade<2 * N> result;
599,351✔
497
        double sum = 0.0;
599,351✔
498
        std::vector<double> corrections;
599,351✔
499

500
        // Process from end (smallest) to beginning (largest)
501
        for (int i = 2 * N - 1; i >= 0; --i) {  // Changed: reverse iteration
4,907,281✔
502
            double new_sum, error;
503
            two_sum(sum, merged[static_cast<size_t>(i)], new_sum, error);
4,307,930✔
504

505
            if (error != 0.0) {
4,307,930✔
506
                corrections.push_back(error);
3,263,881✔
507
            }
508
            sum = new_sum;
4,307,930✔
509
        }
510

511
        // Store most significant component first
512
        result[0] = sum;
599,351✔
513

514
        // Store corrections in decreasing order of significance
515
        for (size_t i = 0; i < std::min(corrections.size(), size_t(2 * N - 1)); ++i) {
3,863,232✔
516
            result[i + 1] = corrections[corrections.size() - 1 - i];  // Reverse order
3,263,881✔
517
        }
518

519
        return result;
1,198,702✔
520
    }
599,351✔
521

522
    // Compress cascade to remove unnecessary precision
523
    template<size_t N>
524
    floatcascade<N> compress(const floatcascade<N>& e) {
525
        // Simple compression - could be much more sophisticated
526
        floatcascade<N> result = e;
527

528
        // Remove tiny components that don't contribute significantly
529
        double threshold = std::abs(result.to_double()) * 1e-16;
530
        for (size_t i = 0; i < N; ++i) {
531
            if (std::abs(result[i]) < threshold) {
532
                result[i] = 0.0;
533
            }
534
        }
535

536
        return result;
537
    }
538

539
    // Priest's TWO-PROD: computes a * b = x + y exactly
540
    // Runtime path uses std::fma (hardware FMA on supported targets).
541
    // Constexpr branch falls back to a split-based product (FMA-free) since
542
    // std::fma is not constexpr in C++20.  sw::universal::split is constexpr
543
    // (from error_free_ops.hpp) and gives the exact a_hi/a_lo/b_hi/b_lo
544
    // decomposition that lets us recover the rounding error via
545
    //   y = ((a_hi*b_hi - p) + a_hi*b_lo + a_lo*b_hi) + a_lo*b_lo.
546
    constexpr inline void two_prod(double a, double b, double& x, double& y) {
7,489,477✔
547
        if (std::is_constant_evaluated()) {
7,489,477✔
NEW
548
            double p = a * b;
×
NEW
549
            x = p;
×
NEW
550
            if (sw::universal::is_finite_cx(p)) {
×
NEW
551
                double a_hi = 0.0, a_lo = 0.0, b_hi = 0.0, b_lo = 0.0;
×
NEW
552
                sw::universal::split(a, a_hi, a_lo);
×
NEW
553
                sw::universal::split(b, b_hi, b_lo);
×
NEW
554
                y = ((a_hi * b_hi - p) + a_hi * b_lo + a_lo * b_hi) + a_lo * b_lo;
×
555
            }
556
            else {
NEW
557
                y = 0.0;
×
558
            }
559
        }
560
        else {
561
            volatile double vx = a * b;
7,489,477✔
562
            x = vx;
7,489,477✔
563
            // Use FMA if available for exact error
564
            volatile double vy = std::fma(a, b, -vx);
7,489,477✔
565
            y = vy;
7,489,477✔
566
        }
567
    }
7,489,477✔
568

569
    // THREE-SUM: sum three doubles, accumulate errors
570
    // Volatiles are handled inside two_sum, so temporaries don't need to be volatile
571
    constexpr inline void three_sum(double& a, double& b, double& c) {
325,638✔
572
        double t1{}, t2{}, t3{};
325,638✔
573
        two_sum(a, b, t1, t2);
325,638✔
574
        two_sum(t1, c, a, t3);
325,638✔
575
        two_sum(t2, t3, b, c);
325,638✔
576
    }
325,638✔
577

578
    // THREE-SUM2: variant that doesn't reorder inputs
579
    // Volatiles are handled inside two_sum, so temporaries don't need to be volatile
580
    constexpr inline void three_sum2(double a, double b, double c, double& x, double& y, double& z) {
581
        double t1{}, t2{}, t3{};
582
        two_sum(a, b, t1, t2);
583
        two_sum(t1, c, x, t3);
584
        two_sum(t2, t3, y, z);
585
    }
586

587
    // Renormalize N components to maintain non-overlapping property
588
    // Improved two-phase algorithm based on Hida-Li-Bailey QD library
589
    // Volatiles are handled inside quick_two_sum, so locals don't need to be volatile
590
    //
591
    // Phase 1: Compression - bottom-up accumulation using quick_two_sum
592
    // Phase 2: Conditional refinement - carry propagation with zero detection
593
    //
594
    // This ensures the non-overlapping property: |component[i+1]| ≤ ulp(component[i])/2
595
    // Fixes precision loss in iterative algorithms (e.g., pow() improved from 77-92 bits to 200+ bits)
596

597
    // Generic version for arbitrary N
598
    template<size_t N>
599
    floatcascade<N> renormalize(const floatcascade<N>& e) {
600
        floatcascade<N> result = e;
601
        if (std::isinf(result[0])) return result;
602

603
        // Phase 1: Compression
604
        std::array<double, N> s;
605
        volatile double sum = result[N-1];
606
        for (int i = static_cast<int>(N) - 2; i >= 0; --i) {
607
            sum = quick_two_sum(result[i], sum, s[i+1]);
608
        }
609
        s[0] = sum;
610

611
        // Phase 2: Simple linear propagation for arbitrary N
612
        for (size_t i = 0; i < N-1; ++i) {
613
            result[i] = quick_two_sum(s[i], s[i+1], result[i+1]);
614
        }
615
        result[N-1] = s[N-1];
616

617
        return result;
618
    }
619

620
    // Specialization for N=2 (double-double)
621
    // Replaces std::isinf with sw::universal::is_inf_cx (constexpr in C++20).
622
    template<>
623
    constexpr inline floatcascade<2> renormalize<2>(const floatcascade<2>& e) {
4,600✔
624
        floatcascade<2> result = e;
4,600✔
625
        if (sw::universal::is_inf_cx(result[0])) return result;
4,600✔
626

627
        result[0] = quick_two_sum(result[0], result[1], result[1]);
4,599✔
628
        return result;
4,599✔
629
    }
630

631
    // Specialization for N=3 (triple-double)
632
    template<>
633
    inline floatcascade<3> renormalize<3>(const floatcascade<3>& e) {
300,968✔
634
        floatcascade<3> result = e;
300,968✔
635
        if (std::isinf(result[0])) return result;
300,968✔
636

637
        double s0, s1, s2 = 0.0;
300,968✔
638

639
        // Phase 1: Compression
640
        s0 = quick_two_sum(result[1], result[2], result[2]);
300,968✔
641
        result[0] = quick_two_sum(result[0], s0, result[1]);
300,968✔
642

643
        // Phase 2: Conditional refinement
644
        s0 = result[0];
300,968✔
645
        s1 = result[1];
300,968✔
646

647
        if (s1 != 0.0) {
300,968✔
648
            s1 = quick_two_sum(s1, result[2], s2);
296,091✔
649
        } else {
650
            s0 = quick_two_sum(s0, result[2], s1);
4,877✔
651
        }
652

653
        result[0] = s0;
300,968✔
654
        result[1] = s1;
300,968✔
655
        result[2] = s2;
300,968✔
656
        return result;
300,968✔
657
    }
658

659
    // Specialization for N=4 (quad-double) - matches QD library exactly
660
    template<>
661
    inline floatcascade<4> renormalize<4>(const floatcascade<4>& e) {
252,495✔
662
        floatcascade<4> result = e;
252,495✔
663
        if (std::isinf(result[0])) return result;
252,495✔
664

665
        double s0, s1, s2 = 0.0, s3 = 0.0;
252,495✔
666

667
        // Phase 1: Compression
668
        s0 = quick_two_sum(result[2], result[3], result[3]);
252,495✔
669
        s0 = quick_two_sum(result[1], s0, result[2]);
252,495✔
670
        result[0] = quick_two_sum(result[0], s0, result[1]);
252,495✔
671

672
        // Phase 2: Conditional refinement (matches QD library algorithm exactly)
673
        s0 = result[0];
252,495✔
674
        s1 = result[1];
252,495✔
675

676
        if (s1 != 0.0) {
252,495✔
677
            s1 = quick_two_sum(s1, result[2], s2);
247,797✔
678
            if (s2 != 0.0)
247,797✔
679
                s2 = quick_two_sum(s2, result[3], s3);
241,043✔
680
            else
681
                s1 = quick_two_sum(s1, result[3], s2);
6,754✔
682
        } else {
683
            s0 = quick_two_sum(s0, result[2], s1);
4,698✔
684
            if (s1 != 0.0)
4,698✔
685
                s1 = quick_two_sum(s1, result[3], s2);
×
686
            else
687
                s0 = quick_two_sum(s0, result[3], s1);
4,698✔
688
        }
689

690
        result[0] = s0;
252,495✔
691
        result[1] = s1;
252,495✔
692
        result[2] = s2;
252,495✔
693
        result[3] = s3;
252,495✔
694
        return result;
252,495✔
695
    }
696

697
    /*
698
     * EXPANSION COMPRESSION FUNCTIONS
699
     * ================================
700
     *
701
     * Why Naive Compression Fails
702
     * ---------------------------
703
     * When adding two N-component cascades, add_cascades() produces a 2N-component expansion
704
     * that must be compressed back to N components. The naive approach:
705
     *
706
     *   compressed[last] = result[N-1] + result[N] + result[N+1] + ... + result[2N-1]
707
     *
708
     * is FUNDAMENTALLY BROKEN because it uses floating-point addition without capturing
709
     * rounding errors. Each '+' operation loses precision in the tail bits.
710
     *
711
     * Testing Insights
712
     * ----------------
713
     * The bug manifests most severely in the identity test: (a+b)-a = b
714
     *
715
     * For qd_cascade (8→4 compression):
716
     *   - Naive sum: result[3] + result[4] + result[5] + result[6] + result[7]
717
     *   - Loses cumulative rounding errors across 4 additions
718
     *   - Test FAILED: recovered b had wrong sign and magnitude in component[3]
719
     *   - Error: -1.5e-51 instead of 5e-52 (212-bit precision destroyed)
720
     *
721
     * For td_cascade (6→3 compression):
722
     *   - Naive sum: result[2] + result[3] + result[4] + result[5]
723
     *   - Loses cumulative rounding errors across 3 additions
724
     *   - Test PASSES but only because renormalize() afterward salvages precision
725
     *   - Still incorrect in principle - relies on post-processing to fix errors
726
     *
727
     * For dd_cascade (4→2 compression):
728
     *   - Naive sum: result[1] + result[2] + result[3]
729
     *   - Loses cumulative rounding errors across 2 additions
730
     *   - Test PASSES but same caveat as td_cascade
731
     *
732
     * The Proven Solution: Error-Free Compression
733
     * --------------------------------------------
734
     * Based on Hida, Li, Bailey "Library for Double-Double and Quad-Double Arithmetic"
735
     *
736
     * Two-phase algorithm:
737
     * 1. Bottom-up accumulation: Use fast_two_sum to capture all rounding errors
738
     * 2. Conditional extraction: Dynamically extract non-overlapping components
739
     *
740
     * Key insight: fast_two_sum(a, b, sum, error) guarantees a + b = sum + error EXACTLY
741
     * By chaining these operations and carefully managing where errors flow, we maintain
742
     * the non-overlapping property required for multi-component arithmetic.
743
     *
744
     * Why conditional extraction? Components may cancel to zero during accumulation.
745
     * The algorithm dynamically shifts remaining precision into available slots, ensuring
746
     * we extract the N most significant non-overlapping components.
747
     */
748

749
    // Compress 4-component cascade to 2 components following proven QD algorithm
750
    // Used by dd_cascade for (2+2)→2 compression after addition/subtraction
751
    constexpr inline floatcascade<2> compress_4to2(const floatcascade<4>& e) {
200,815✔
752
        double r0 = e[0];
200,815✔
753
        double r1 = e[1];
200,815✔
754
        double r2 = e[2];
200,815✔
755
        double r3 = e[3];
200,815✔
756

757
        double s0{}, s1{}, s2{};
200,815✔
758

759
        // Phase 1: Bottom-up accumulation using fast_two_sum
760
        fast_two_sum(r2, r3, s0, r3);  // s0 = r2+r3, error->r3
200,815✔
761
        fast_two_sum(r1, s0, s0, r2);  // s0 = r1+s0, error->r2
200,815✔
762
        fast_two_sum(r0, s0, r0, r1);  // r0 = r0+s0, error->r1
200,815✔
763

764
        // Phase 2: Extract 2 non-overlapping components with conditional logic
765
        fast_two_sum(r0, r1, s0, s1);
200,815✔
766
        if (s1 != 0.0) {
200,815✔
767
            fast_two_sum(s1, r2, s1, s2);
184,164✔
768
            if (s2 != 0.0)
184,164✔
769
                s2 += r3;  // Final residual absorbed
148,082✔
770
            else
771
                s1 += r3;
36,082✔
772
        } else {
773
            fast_two_sum(s0, r2, s0, s1);
16,651✔
774
            if (s1 != 0.0)
16,651✔
775
                s1 += r3;
229✔
776
            else
777
                s0 += r3;
16,422✔
778
        }
779

780
        floatcascade<2> result;
200,815✔
781
        result[0] = s0;
200,815✔
782
        result[1] = s1;
200,815✔
783
        return result;
200,815✔
784
    }
785

786
    // Compress 6-component cascade to 3 components following proven QD algorithm
787
    // Used by td_cascade for (3+3)→3 compression after addition/subtraction
788
    inline floatcascade<3> compress_6to3(const floatcascade<6>& e) {
243,439✔
789
        double r0 = e[0];
243,439✔
790
        double r1 = e[1];
243,439✔
791
        double r2 = e[2];
243,439✔
792
        double r3 = e[3];
243,439✔
793
        double r4 = e[4];
243,439✔
794
        double r5 = e[5];
243,439✔
795

796
        double s0, s1, s2 = 0.0, s3;
243,439✔
797

798
        // Phase 1: Bottom-up accumulation using fast_two_sum
799
        fast_two_sum(r4, r5, s0, r5);  // s0 = r4+r5, error->r5
243,439✔
800
        fast_two_sum(r3, s0, s0, r4);  // s0 = r3+s0, error->r4
243,439✔
801
        fast_two_sum(r2, s0, s0, r3);  // s0 = r2+s0, error->r3
243,439✔
802
        fast_two_sum(r1, s0, s0, r2);  // s0 = r1+s0, error->r2
243,439✔
803
        fast_two_sum(r0, s0, r0, r1);  // r0 = r0+s0, error->r1
243,439✔
804

805
        // Phase 2: Extract 3 non-overlapping components with conditional logic
806
        fast_two_sum(r0, r1, s0, s1);
243,439✔
807
        if (s1 != 0.0) {
243,439✔
808
            fast_two_sum(s1, r2, s1, s2);
228,394✔
809
            if (s2 != 0.0) {
228,394✔
810
                fast_two_sum(s2, r3, s2, s3);
220,366✔
811
                if (s3 != 0.0)
220,366✔
812
                    s3 += r4 + r5;  // Final residual absorbed
89,846✔
813
                else
814
                    s2 += r4 + r5;
130,520✔
815
            } else {
816
                fast_two_sum(s1, r3, s1, s2);
8,028✔
817
                if (s2 != 0.0)
8,028✔
818
                    s2 += r4 + r5;
4,553✔
819
                else
820
                    s1 += r4 + r5;
3,475✔
821
            }
822
        } else {
823
            fast_two_sum(s0, r2, s0, s1);
15,045✔
824
            if (s1 != 0.0) {
15,045✔
825
                fast_two_sum(s1, r3, s1, s2);
7,689✔
826
                if (s2 != 0.0)
7,689✔
827
                    s2 += r4 + r5;
799✔
828
                else
829
                    s1 += r4 + r5;
6,890✔
830
            } else {
831
                fast_two_sum(s0, r3, s0, s1);
7,356✔
832
                if (s1 != 0.0)
7,356✔
833
                    s1 += r4 + r5;
54✔
834
                else
835
                    s0 += r4 + r5;
7,302✔
836
            }
837
        }
838

839
        floatcascade<3> result;
243,439✔
840
        result[0] = s0;
243,439✔
841
        result[1] = s1;
243,439✔
842
        result[2] = s2;
243,439✔
843
        return result;
243,439✔
844
    }
845

846
    // Compress 8-component cascade to 4 components following proven QD algorithm
847
    // Used by qd_cascade for (4+4)→4 compression after addition/subtraction
848
    // This implements the same algorithm as sw::universal::renorm(a0,a1,a2,a3,a4,...,a7)
849
    // Based on: Hida, Li, Bailey "Library for Double-Double and Quad-Double Arithmetic"
850
    inline floatcascade<4> compress_8to4(const floatcascade<8>& e) {
355,912✔
851
        // Note: Volatiles are used inside two_sum/fast_two_sum, so we don't need them here
852
        double r0 = e[0];
355,912✔
853
        double r1 = e[1];
355,912✔
854
        double r2 = e[2];
355,912✔
855
        double r3 = e[3];
355,912✔
856
        double r4 = e[4];
355,912✔
857
        double r5 = e[5];
355,912✔
858
        double r6 = e[6];
355,912✔
859
        double r7 = e[7];
355,912✔
860

861
        double s0, s1, s2 = 0.0, s3 = 0.0, s4;
355,912✔
862

863
        // Phase 1: Bottom-up accumulation using fast_two_sum
864
        // Following proven QD algorithm: accumulate from least to most significant
865
        // Pattern: s0 = sum(a[i], s0), error goes back to a[i]
866
        fast_two_sum(r6, r7, s0, r7);  // s0 = r6+r7, error->r7
355,912✔
867
        fast_two_sum(r5, s0, s0, r6);  // s0 = r5+s0, error->r6
355,912✔
868
        fast_two_sum(r4, s0, s0, r5);  // s0 = r4+s0, error->r5
355,912✔
869
        fast_two_sum(r3, s0, s0, r4);  // s0 = r3+s0, error->r4
355,912✔
870
        fast_two_sum(r2, s0, s0, r3);  // s0 = r2+s0, error->r3
355,912✔
871
        fast_two_sum(r1, s0, s0, r2);  // s0 = r1+s0, error->r2
355,912✔
872
        fast_two_sum(r0, s0, r0, r1);  // r0 = r0+s0, error->r1
355,912✔
873

874
        // Now we have redistributed: r0 (most sig), r1, r2, r3, r4, r5, r6
875

876
        // Phase 2: Extract 4 non-overlapping components with conditional logic
877
        // This is the proven algorithm from QD library
878
        s0 = r0;
355,912✔
879
        s1 = r1;
355,912✔
880

881
        fast_two_sum(r0, r1, s0, s1);
355,912✔
882
        if (s1 != 0.0) {
355,912✔
883
            fast_two_sum(s1, r2, s1, s2);
334,029✔
884
            if (s2 != 0.0) {
334,029✔
885
                fast_two_sum(s2, r3, s2, s3);
323,058✔
886
                if (s3 != 0.0) {
323,058✔
887
                    fast_two_sum(s3, r4, s3, s4);
153,766✔
888
                    if (s4 != 0.0)
153,766✔
889
                        s4 += r5 + r6;  // Final residual absorbed (unavoidable precision loss)
80,750✔
890
                    else
891
                        s3 += r5 + r6;
73,016✔
892
                } else {
893
                    fast_two_sum(s2, r4, s2, s3);
169,292✔
894
                    if (s3 != 0.0)
169,292✔
895
                        s3 += r5 + r6;
165,483✔
896
                    else
897
                        s2 += r5 + r6;
3,809✔
898
                }
899
            } else {
900
                fast_two_sum(s1, r3, s1, s2);
10,971✔
901
                if (s2 != 0.0) {
10,971✔
902
                    fast_two_sum(s2, r4, s2, s3);
7,115✔
903
                    if (s3 != 0.0)
7,115✔
904
                        s3 += r5 + r6;
4,971✔
905
                    else
906
                        s2 += r5 + r6;
2,144✔
907
                } else {
908
                    fast_two_sum(s1, r4, s1, s2);
3,856✔
909
                    if (s2 != 0.0)
3,856✔
910
                        s2 += r5 + r6;
252✔
911
                    else
912
                        s1 += r5 + r6;
3,604✔
913
                }
914
            }
915
        } else {
916
            fast_two_sum(s0, r2, s0, s1);
21,883✔
917
            if (s1 != 0.0) {
21,883✔
918
                fast_two_sum(s1, r3, s1, s2);
12,188✔
919
                if (s2 != 0.0) {
12,188✔
920
                    fast_two_sum(s2, r4, s2, s3);
10,567✔
921
                    if (s3 != 0.0)
10,567✔
922
                        s3 += r5 + r6;
1,067✔
923
                    else
924
                        s2 += r5 + r6;
9,500✔
925
                } else {
926
                    fast_two_sum(s1, r4, s1, s2);
1,621✔
927
                    if (s2 != 0.0)
1,621✔
928
                        s2 += r5 + r6;
1,095✔
929
                    else
930
                        s1 += r5 + r6;
526✔
931
                }
932
            } else {
933
                fast_two_sum(s0, r3, s0, s1);
9,695✔
934
                if (s1 != 0.0) {
9,695✔
935
                    fast_two_sum(s1, r4, s1, s2);
125✔
936
                    if (s2 != 0.0)
125✔
937
                        s2 += r5 + r6;
30✔
938
                    else
939
                        s1 += r5 + r6;
95✔
940
                } else {
941
                    fast_two_sum(s0, r4, s0, s1);
9,570✔
942
                    if (s1 != 0.0)
9,570✔
943
                        s1 += r5 + r6;
×
944
                    else
945
                        s0 += r5 + r6;
9,570✔
946
                }
947
            }
948
        }
949

950
        floatcascade<4> result;
355,912✔
951
        result[0] = s0;
355,912✔
952
        result[1] = s1;
355,912✔
953
        result[2] = s2;
355,912✔
954
        result[3] = s3;
355,912✔
955

956
        return result;
355,912✔
957
    }
958

959
    // Multiply two N-component cascades using diagonal partitioning.
960
    //
961
    // PRECISION NOTE (2026-03-16): This generic multiply computes all N*N
962
    // partial products, sorts them by magnitude, and accumulates with
963
    // error-free transformations. For N=4 this is 16 products plus their
964
    // 16 error terms, which introduces more intermediate rounding than the
965
    // hand-tuned qd::operator*= (which uses only 10 products with explicit
966
    // accumulation chains). In algorithms that chain many multiplications
967
    // (e.g., the 16 squarings in exp()), this causes floatcascade<4> to
968
    // lose ~10 decimal digits compared to qd. The sqr() specialization
969
    // below partially addresses this for squaring, but a full fix would
970
    // require a specialized operator*= for floatcascade<4> mirroring qd's
971
    // accurate_multiplication() algorithm (qd_impl.hpp lines 619-675).
972
    template<size_t N>
973
    floatcascade<N> multiply_cascades(const floatcascade<N>& a, const floatcascade<N>& b) {
543,228✔
974
        // Generate N×N products (partial products matrix)
975
        std::array<double, N * N> products;
976
        std::array<double, N * N> errors;
977

978
        // Compute all products with their errors using two_prod
979
        for (size_t i = 0; i < N; ++i) {
2,419,983✔
980
            for (size_t j = 0; j < N; ++j) {
8,495,304✔
981
                two_prod(a[i], b[j], products[i * N + j], errors[i * N + j]);
6,618,549✔
982
            }
983
        }
984

985
        // Diagonal partitioning: diagonal k contains all products[i*N+j] where i+j == k
986
        // Diagonals represent significance levels: smaller k = more significant
987
        // We have 2*N-1 diagonals total (k = 0 to 2*N-2)
988

989
        // Accumulate each diagonal separately using stable two_sum chains
990
        // Each diagonal produces a sum and carries errors to the next diagonal
991
        std::array<double, 2 * N - 1> diagonal_sums;
992
        std::array<double, 2 * N - 1> diagonal_errors;
993

994
        // Initialize all to zero
995
        for (size_t k = 0; k < 2 * N - 1; ++k) {
3,753,510✔
996
            diagonal_sums[k] = 0.0;
3,210,282✔
997
            diagonal_errors[k] = 0.0;
3,210,282✔
998
        }
999

1000
        // Process each diagonal: accumulate products and errors from previous diagonal
1001
        for (size_t diag = 0; diag < 2 * N - 1; ++diag) {
3,753,510✔
1002
            // Collect all terms for this diagonal
1003
            std::vector<double> terms;
3,210,282✔
1004

1005
            // Add products where i+j == diag
1006
            for (size_t i = 0; i <= diag && i < N; ++i) {
12,199,728✔
1007
                size_t j = diag - i;
8,989,446✔
1008
                if (j < N) {
8,989,446✔
1009
                    terms.push_back(products[i * N + j]);
6,618,549✔
1010
                }
1011
            }
1012

1013
            // Add errors from previous diagonal (errors[i*N+j] where i+j == diag-1)
1014
            if (diag > 0) {
3,210,282✔
1015
                for (size_t i = 0; i <= diag - 1 && i < N; ++i) {
9,779,745✔
1016
                    size_t j = diag - 1 - i;
7,112,691✔
1017
                    if (j < N) {
7,112,691✔
1018
                        terms.push_back(errors[i * N + j]);
6,075,321✔
1019
                    }
1020
                }
1021
            }
1022

1023
            // Accumulate all terms using stable two_sum chain
1024
            if (!terms.empty()) {
3,210,282✔
1025
                double sum = terms[0];
3,210,282✔
1026
                double accumulated_error = 0.0;
3,210,282✔
1027

1028
                for (size_t t = 1; t < terms.size(); ++t) {
12,693,870✔
1029
                    double new_sum, err;
1030
                    two_sum(sum, terms[t], new_sum, err);
9,483,588✔
1031
                    sum = new_sum;
9,483,588✔
1032
                    // Accumulate errors for propagation to next diagonal
1033
                    double err_sum, err_err;
1034
                    two_sum(accumulated_error, err, err_sum, err_err);
9,483,588✔
1035
                    accumulated_error = err_sum;
9,483,588✔
1036
                    // Higher-order errors go to next diagonal
1037
                    if (diag + 1 < 2 * N - 1) {
9,483,588✔
1038
                        diagonal_errors[diag + 1] += err_err;
8,397,132✔
1039
                    }
1040
                }
1041

1042
                diagonal_sums[diag] = sum;
3,210,282✔
1043
                diagonal_errors[diag] = accumulated_error;
3,210,282✔
1044
            }
1045
        }
1046

1047
        // Extract top N components by merging diagonals from most to least significant
1048
        // Build complete expansion including both diagonal sums and errors
1049
        std::vector<double> expansion;
543,228✔
1050
        expansion.reserve(2 * (2 * N - 1));
543,228✔
1051

1052
        for (size_t k = 0; k < 2 * N - 1; ++k) {
3,753,510✔
1053
            if (std::abs(diagonal_sums[k]) > 0.0) {
3,210,282✔
1054
                expansion.push_back(diagonal_sums[k]);
2,980,309✔
1055
            }
1056
            if (std::abs(diagonal_errors[k]) > 0.0) {
3,210,282✔
1057
                expansion.push_back(diagonal_errors[k]);
1,501,643✔
1058
            }
1059
        }
1060

1061
        // Sort by decreasing absolute magnitude to get most significant terms first
1062
        std::sort(expansion.begin(), expansion.end(),
543,228✔
1063
                  [](double a, double b) { return std::abs(a) > std::abs(b); });
8,152,048✔
1064

1065
        // Now accumulate into result using proper two_sum chains to maintain precision
1066
        floatcascade<N> result;
543,228✔
1067

1068
        // Initialize all components to zero explicitly
1069
        for (size_t i = 0; i < N; ++i) {
2,419,983✔
1070
            result[i] = 0.0;
1,876,755✔
1071
        }
1072

1073
        // Accumulate expansion terms into result components
1074
        if (!expansion.empty()) {
543,228✔
1075
            result[0] = expansion[0];  // Most significant term
539,766✔
1076

1077
            // Accumulate remaining terms using two_sum cascade
1078
            for (size_t i = 1; i < expansion.size(); ++i) {
4,481,952✔
1079
                double carry = expansion[i];
3,942,186✔
1080

1081
                // Try to add carry into result components, propagating errors down
1082
                for (size_t j = 0; j < N && std::abs(carry) > 0.0; ++j) {
16,669,809✔
1083
                    double sum, err;
1084
                    two_sum(result[j], carry, sum, err);
12,727,623✔
1085
                    result[j] = sum;
12,727,623✔
1086
                    carry = err;  // Error propagates to next component
12,727,623✔
1087
                }
1088

1089
                // CRITICAL: If carry is still non-zero after exhausting all N components,
1090
                // fold it into the least significant component to avoid silent data loss
1091
                if (std::abs(carry) > 0.0) {
3,942,186✔
1092
                    double sum, err;
1093
                    two_sum(result[N-1], carry, sum, err);
2,086,428✔
1094
                    result[N-1] = sum;
2,086,428✔
1095
                    // Remaining err is truly sub-ULP for N components and can be safely discarded
1096
                    // (it represents precision beyond what N floats can represent)
1097
                }
1098
            }
1099
        }
1100

1101
        // Renormalize to maintain non-overlapping property
1102
        return renormalize(result);
1,086,456✔
1103
    }
543,228✔
1104

1105
    // ---------------------------------------------------------------------
1106
    // Constexpr-friendly overloads for floatcascade<2> (used by dd_cascade)
1107
    // ---------------------------------------------------------------------
1108
    //
1109
    // The generic add_cascades<N> and multiply_cascades<N> templates use
1110
    // std::sort and std::vector, neither of which is constexpr in C++20
1111
    // (std::sort becomes constexpr in C++26).  These non-template overloads
1112
    // pick up before the templates via overload resolution when called with
1113
    // floatcascade<2> arguments, providing a constexpr path for dd_cascade.
1114
    //
1115
    // The algorithms mirror the proven qd_add / qd_mul implementations used
1116
    // by classic dd (see error_free_ops.hpp); they produce a normalized
1117
    // double-double result without going through the generic merge-sort-
1118
    // accumulate-and-renormalize chain.
1119

1120
    // Add two floatcascade<2> -> floatcascade<4>.  Performs a magnitude-sorted
1121
    // merge of the four limbs, then accumulates smallest-to-largest with
1122
    // two_sum.  Bubble sort over 4 elements (3 passes) is constexpr-clean
1123
    // once std::abs is replaced by a ternary.
1124
    constexpr inline floatcascade<4> add_cascades(const floatcascade<2>& a, const floatcascade<2>& b) {
200,815✔
1125
        // Merge components.
1126
        double m0 = a[0];
200,815✔
1127
        double m1 = b[0];
200,815✔
1128
        double m2 = a[1];
200,815✔
1129
        double m3 = b[1];
200,815✔
1130

1131
        // Constexpr-safe absolute-value (std::abs(double) is not constexpr in C++20).
1132
        auto absv = [](double x) constexpr -> double { return x < 0.0 ? -x : x; };
2,409,780✔
1133

1134
        // Bubble sort 4 elements by descending magnitude (3 passes).
1135
        if (absv(m0) < absv(m1)) { double t = m0; m0 = m1; m1 = t; }
200,815✔
1136
        if (absv(m1) < absv(m2)) { double t = m1; m1 = m2; m2 = t; }
200,815✔
1137
        if (absv(m2) < absv(m3)) { double t = m2; m2 = m3; m3 = t; }
200,815✔
1138
        if (absv(m0) < absv(m1)) { double t = m0; m0 = m1; m1 = t; }
200,815✔
1139
        if (absv(m1) < absv(m2)) { double t = m1; m1 = m2; m2 = t; }
200,815✔
1140
        if (absv(m0) < absv(m1)) { double t = m0; m0 = m1; m1 = t; }
200,815✔
1141

1142
        // Accumulate from smallest (m3) to largest (m0) with two_sum.
1143
        double sum = 0.0;
200,815✔
1144
        double corrections[3] = { 0.0, 0.0, 0.0 };
200,815✔
1145
        int nc = 0;
200,815✔
1146

1147
        double new_sum = 0.0, error = 0.0;
200,815✔
1148
        two_sum(sum, m3, new_sum, error);
200,815✔
1149
        if (error != 0.0 && nc < 3) corrections[nc++] = error;
200,815✔
1150
        sum = new_sum;
200,815✔
1151

1152
        two_sum(sum, m2, new_sum, error);
200,815✔
1153
        if (error != 0.0 && nc < 3) corrections[nc++] = error;
200,815✔
1154
        sum = new_sum;
200,815✔
1155

1156
        two_sum(sum, m1, new_sum, error);
200,815✔
1157
        if (error != 0.0 && nc < 3) corrections[nc++] = error;
200,815✔
1158
        sum = new_sum;
200,815✔
1159

1160
        two_sum(sum, m0, new_sum, error);
200,815✔
1161
        if (error != 0.0 && nc < 3) corrections[nc++] = error;
200,815✔
1162
        sum = new_sum;
200,815✔
1163

1164
        floatcascade<4> result;
200,815✔
1165
        result[0] = sum;
200,815✔
1166
        // Store corrections in decreasing order of significance: latest captured
1167
        // (from largest input) is the most significant correction.
1168
        for (int i = 0; i < nc; ++i) {
689,818✔
1169
            result[i + 1] = corrections[nc - 1 - i];
489,003✔
1170
        }
1171
        // result[nc+1..3] already zero (default-constructed).
1172

1173
        return result;
200,815✔
1174
    }
1175

1176
    // Multiply two floatcascade<2> -> floatcascade<2>.  Mirrors classic dd's
1177
    // operator*= (see error_free_ops.hpp / dd_impl.hpp): 4 products + 3
1178
    // residuals, three_sum to fold them, take top 2 components.
1179
    constexpr inline floatcascade<2> multiply_cascades(const floatcascade<2>& a, const floatcascade<2>& b) {
162,824✔
1180
        double p0 = 0.0, p1 = 0.0, p2 = 0.0, p3 = 0.0, p4 = 0.0, p5 = 0.0, p6 = 0.0;
162,824✔
1181
        two_prod(a[0], b[0], p0, p1);
162,824✔
1182

1183
        bool p0_finite;
1184
        if (std::is_constant_evaluated()) {
162,824✔
NEW
1185
            p0_finite = sw::universal::is_finite_cx(p0);
×
1186
        }
1187
        else {
1188
            p0_finite = std::isfinite(p0);
162,824✔
1189
        }
1190

1191
        if (p0_finite) {
162,824✔
1192
            two_prod(a[0], b[1], p2, p4);
162,819✔
1193
            two_prod(a[1], b[0], p3, p5);
162,819✔
1194
            p6 = a[1] * b[1];
162,819✔
1195
            three_sum(p1, p2, p3);
162,819✔
1196
            p2 += p4 + p5 + p6;
162,819✔
1197
            three_sum(p0, p1, p2);
162,819✔
1198
        }
1199
        else {
1200
            p1 = 0.0;
5✔
1201
        }
1202

1203
        floatcascade<2> result;
162,824✔
1204
        result[0] = p0;
162,824✔
1205
        result[1] = p1;
162,824✔
1206
        return result;
162,824✔
1207
    }
1208

1209
} // namespace expansion_ops
1210

1211
///////////////////////////////////////////////////////////////////////////////
1212
// Decimal conversion implementation (defined after expansion_ops)
1213

1214
template<size_t N>
1215
void floatcascade<N>::to_digits(std::vector<char>& s, int& exponent, int precision) const {
1,008✔
1216
    constexpr floatcascade<N> _one(1.0), _ten(10.0);
1,008✔
1217
    constexpr double _log2(0.301029995663981);
1,008✔
1218

1219
    if (iszero()) {
1,008✔
1220
        exponent = 0;
×
1221
        for (int i = 0; i < precision; ++i)
×
1222
            s[static_cast<unsigned>(i)] = '0';
×
1223
        return;
4✔
1224
    }
1225

1226
    // First determine the (approximate) exponent
1227
    int exp;
1228
    (void)std::frexp(e[0], &exp);  // Only need exponent, not mantissa
1,008✔
1229
    --exp;  // adjust exp as frexp gives a binary exp that is 1 too big
1,008✔
1230
    exp = static_cast<int>(_log2 * exp);  // estimate the power of ten exponent
1,008✔
1231

1232
    floatcascade<N> r = abs(*this);
1,008✔
1233
    if (exp < 0) {
1,008✔
1234
        if (exp < -300) {
297✔
1235
            // Scale up to avoid underflow
1236
            floatcascade<N> scaled;
13✔
1237
            for (size_t i = 0; i < N; ++i) {
53✔
1238
                scaled[i] = std::ldexp(r[i], 53);
40✔
1239
            }
1240
            r = scaled;
13✔
1241
            r *= pown(_ten, -exp);
13✔
1242
            // Scale back down
1243
            for (size_t i = 0; i < N; ++i) {
53✔
1244
                r[i] = std::ldexp(r[i], -53);
40✔
1245
            }
1246
        }
1247
        else {
1248
            r *= pown(_ten, -exp);
284✔
1249
        }
1250
    }
1251
    else {
1252
        if (exp > 0) {
711✔
1253
            if (exp > 300) {
172✔
1254
                // Scale down to avoid overflow
1255
                floatcascade<N> scaled;
21✔
1256
                for (size_t i = 0; i < N; ++i) {
84✔
1257
                    scaled[i] = std::ldexp(r[i], -53);
63✔
1258
                }
1259
                r = scaled;
21✔
1260
                r /= pown(_ten, exp);
21✔
1261
                // Scale back up
1262
                for (size_t i = 0; i < N; ++i) {
84✔
1263
                    r[i] = std::ldexp(r[i], 53);
63✔
1264
                }
1265
            }
1266
            else {
1267
                r /= pown(_ten, exp);
151✔
1268
            }
1269
        }
1270
    }
1271

1272
    // Fix exponent if we have gone too far
1273
    if (r >= _ten) {
1,008✔
1274
        r /= _ten;
67✔
1275
        ++exp;
67✔
1276
    }
1277
    else {
1278
        if (r < _one) {
941✔
1279
            r *= _ten;
432✔
1280
            --exp;
432✔
1281
        }
1282
    }
1283

1284
    // Use full floatcascade comparison (not just r[0]) to match dd behavior
1285
    if ((r >= _ten) || (r < _one)) {
1,008✔
1286
        std::cerr << "to_digits() failed to compute exponent\n";
4✔
1287
        return;
4✔
1288
    }
1289

1290
    // at this point the value is normalized to a decimal value between [1.0, 10.0)
1291
    // generate the digits
1292
    int nrDigits = precision + 1;
1,004✔
1293
    for (int i = 0; i < nrDigits; ++i) {
23,019✔
1294
        int mostSignificantDigit = static_cast<int>(r[0]);
22,015✔
1295

1296
        // Subtract the digit
1297
        floatcascade<N> digit_value(static_cast<double>(mostSignificantDigit));
22,015✔
1298

1299
        // Negate and add using expansion_ops (since we don't have operator-= yet)
1300
        floatcascade<N> neg_digit;
22,015✔
1301
        for (size_t j = 0; j < N; ++j) {
85,529✔
1302
            neg_digit[j] = -digit_value[j];
63,514✔
1303
        }
1304
        floatcascade<2*N> temp_expanded = expansion_ops::add_cascades(r, neg_digit);
22,015✔
1305

1306
        // Compress back to N components
1307
        if constexpr (N == 2) {
1308
            r = expansion_ops::compress_4to2(temp_expanded);
9,799✔
1309
        }
1310
        else if constexpr (N == 3) {
1311
            r = expansion_ops::compress_6to3(temp_expanded);
4,948✔
1312
        }
1313
        else if constexpr (N == 4) {
1314
            r = expansion_ops::compress_8to4(temp_expanded);
7,268✔
1315
        }
1316

1317
        // Multiply by 10
1318
        r *= _ten;
22,015✔
1319

1320
        s[static_cast<unsigned>(i)] = static_cast<char>(mostSignificantDigit + '0');
22,015✔
1321
    }
1322

1323
    // Fix out of range digits
1324
    for (int i = nrDigits - 1; i > 0; --i) {
22,015✔
1325
        if (s[static_cast<unsigned>(i)] < '0') {
21,011✔
1326
            s[static_cast<unsigned>(i - 1)]--;
423✔
1327
            s[static_cast<unsigned>(i)] += 10;
423✔
1328
        }
1329
        else {
1330
            if (s[static_cast<unsigned>(i)] > '9') {
20,588✔
1331
                s[static_cast<unsigned>(i - 1)]++;
×
1332
                s[static_cast<unsigned>(i)] -= 10;
×
1333
            }
1334
        }
1335
    }
1336

1337
    if (s[0] <= '0') {
1,004✔
1338
        std::cerr << "to_digits() non-positive leading digit\n";
×
1339
        return;
×
1340
    }
1341

1342
    // Round and propagate carry
1343
    int lastDigit = nrDigits - 1;
1,004✔
1344
    if (s[static_cast<unsigned>(lastDigit)] >= '5') {
1,004✔
1345
        int i = nrDigits - 2;
350✔
1346
        s[static_cast<unsigned>(i)]++;
350✔
1347
        while (i > 0 && s[static_cast<unsigned>(i)] > '9') {
478✔
1348
            s[static_cast<unsigned>(i)] -= 10;
128✔
1349
            s[static_cast<unsigned>(--i)]++;
128✔
1350
        }
1351
    }
1352

1353
    // If first digit is 10, shift left and increment exponent
1354
    if (s[0] > '9') {
1,004✔
1355
        ++exp;
5✔
1356
        for (int i = precision; i >= 2; --i) {
76✔
1357
            s[static_cast<unsigned>(i)] = s[static_cast<unsigned>(i - 1)];
71✔
1358
        }
1359
        s[0] = '1';
5✔
1360
        s[1] = '0';
5✔
1361
    }
1362

1363
    s[static_cast<unsigned>(precision)] = 0;  // termination null
1,004✔
1364
    exponent = exp;
1,004✔
1365
}
1366

1367
template<size_t N>
1368
std::string floatcascade<N>::to_string(
1,072✔
1369
    std::streamsize precision,
1370
    std::streamsize width,
1371
    bool fixed,
1372
    bool scientific,
1373
    bool internal,
1374
    bool left,
1375
    bool showpos,
1376
    bool uppercase,
1377
    char fill
1378
) const {
1379
    std::string s;
1,072✔
1380
    bool negative = (e[0] < 0.0);
1,072✔
1381
    int exponent_value = 0;
1,072✔
1382

1383
    if (fixed && scientific)
1,072✔
1384
        fixed = false;  // scientific format takes precedence
×
1385

1386
    if (isnan()) {
1,072✔
1387
        s = uppercase ? "NAN" : "nan";
16✔
1388
        negative = false;
16✔
1389
    }
1390
    else {
1391
        if (negative) {
1,056✔
1392
            s += '-';
163✔
1393
        }
1394
        else {
1395
            if (showpos)
893✔
1396
                s += '+';
×
1397
        }
1398

1399
        if (isinf()) {
1,056✔
1400
            s += uppercase ? "INF" : "inf";
5✔
1401
        }
1402
        else if (iszero()) {
1,051✔
1403
            s += '0';
43✔
1404
            if (precision > 0) {
43✔
1405
                s += '.';
43✔
1406
                s.append(static_cast<unsigned int>(precision), '0');
43✔
1407
            }
1408
        }
1409
        else {
1410
            int powerOfTenScale = static_cast<int>(std::log10(std::fabs(e[0])));
1,008✔
1411
            int integerDigits = (fixed ? (powerOfTenScale + 1) : 1);
1,008✔
1412
            int nrDigits = integerDigits + static_cast<int>(precision);
1,008✔
1413

1414
            int nrDigitsForFixedFormat = nrDigits;
1,008✔
1415
            if (fixed)
1,008✔
1416
                nrDigitsForFixedFormat = std::max(60, nrDigits);
×
1417

1418
            // a number in the range of [0.5, 1.0) to be printed with zero precision
1419
            // must be rounded up to 1 to print correctly
1420
            if (fixed && (precision == 0) && (std::abs(e[0]) < 1.0)) {
1,008✔
1421
                s += (std::abs(e[0]) >= 0.5) ? '1' : '0';
×
1422
                return s;
×
1423
            }
1424

1425
            if (fixed && nrDigits <= 0) {
1,008✔
1426
                // process values that are near zero
1427
                s += '0';
×
1428
                if (precision > 0) {
×
1429
                    s += '.';
×
1430
                    s.append(static_cast<unsigned int>(precision), '0');
×
1431
                }
1432
            }
1433
            else {
1434
                std::vector<char> t;
1,008✔
1435

1436
                if (fixed) {
1,008✔
1437
                    t.resize(static_cast<size_t>(nrDigitsForFixedFormat + 1));
×
1438
                    to_digits(t, exponent_value, nrDigitsForFixedFormat);
×
1439
                }
1440
                else {
1441
                    t.resize(static_cast<size_t>(nrDigits + 1));
1,008✔
1442
                    to_digits(t, exponent_value, nrDigits);
1,008✔
1443
                }
1444

1445
                if (fixed) {
1,008✔
1446
                    // round the decimal string
1447
                    round_string(t, nrDigits + 1, &integerDigits);
×
1448

1449
                    if (integerDigits > 0) {
×
1450
                        int i;
1451
                        for (i = 0; i < integerDigits; ++i)
×
1452
                            s += t[static_cast<unsigned>(i)];
×
1453
                        if (precision > 0) {
×
1454
                            s += '.';
×
1455
                            for (int j = 0; j < precision; ++j, ++i)
×
1456
                                s += t[static_cast<unsigned>(i)];
×
1457
                        }
1458
                    }
1459
                    else {
1460
                        s += "0.";
×
1461
                        if (integerDigits < 0)
×
1462
                            s.append(static_cast<size_t>(-integerDigits), '0');
×
1463
                        for (int i = 0; i < nrDigits; ++i)
×
1464
                            s += t[static_cast<unsigned>(i)];
×
1465
                    }
1466
                }
1467
                else {
1468
                    s += t[0ull];
1,008✔
1469
                    if (precision > 0)
1,008✔
1470
                        s += '.';
1,008✔
1471

1472
                    for (int i = 1; i <= precision; ++i)
21,083✔
1473
                        s += t[static_cast<unsigned>(i)];
20,075✔
1474
                }
1475
            }
1,008✔
1476
        }
1477

1478
        // TBD: this is seriously broken and needs a redesign
1479
        //
1480
        // fix for improper offset with large values and small values
1481
        // without this trap, output of values of the for 10^j - 1 fail for j > 28
1482
        // and are output with the point in the wrong place, leading to a significant error
1483
        if (fixed && (precision > 0)) {
1,056✔
1484
            // make sure that the value isn't dramatically larger
1485
            double from_string = atof(s.c_str());
×
1486

1487
            // if this ratio is large, then we've got problems
1488
            if (std::fabs(from_string / e[0]) > 3.0) {
×
1489
                // loop on the string, find the point, move it up one
1490
                // don't act on the first character
1491
                for (std::string::size_type i = 1; i < s.length(); ++i) {
×
1492
                    if (s[i] == '.') {
×
1493
                        s[i] = s[i - 1];
×
1494
                        s[i - 1] = '.';  // this will destroy the leading 0 when s[i==1] == '.';
×
1495
                        break;
×
1496
                    }
1497
                }
1498
                // BUG: the loop above, in particular s[i-1] = '.', destroys the leading 0
1499
                // in the fixed point representation if the point is located at i = 1;
1500
                // it also breaks the precision request as it adds a new digit to the fixed representation
1501

1502
                from_string = atof(s.c_str());
×
1503
                // if this ratio is large, then the string has not been fixed
1504
                if (std::fabs(from_string / e[0]) > 3.0) {
×
1505
                    std::cerr << "re-rounding unsuccessful in fixed point fix\n";
×
1506
                }
1507
            }
1508
        }
1509

1510
        if (!fixed && !isinf()) {
1,056✔
1511
            // construct the exponent
1512
            s += uppercase ? 'E' : 'e';
1,051✔
1513
            append_exponent(s, exponent_value);
1,051✔
1514
        }
1515
    }
1516

1517
    // process any fill
1518
    size_t strLength = s.length();
1,072✔
1519
    if (strLength < static_cast<size_t>(width)) {
1,072✔
1520
        size_t nrCharsToFill = (width - strLength);
×
1521
        if (internal) {
×
1522
            if (negative)
×
1523
                s.insert(static_cast<std::string::size_type>(1), nrCharsToFill, fill);
×
1524
            else
1525
                s.insert(static_cast<std::string::size_type>(0), nrCharsToFill, fill);
×
1526
        }
1527
        else if (left) {
×
1528
            s.append(nrCharsToFill, fill);
×
1529
        }
1530
        else {
1531
            s.insert(static_cast<std::string::size_type>(0), nrCharsToFill, fill);
×
1532
        }
1533
    }
1534

1535
    return s;
1,072✔
1536
}
×
1537

1538
template<size_t N>
1539
bool floatcascade<N>::parse(const std::string& number) {
475✔
1540
    const char* p = number.c_str();
475✔
1541

1542
    // Skip any leading spaces
1543
    while (std::isspace(*p)) ++p;
475✔
1544

1545
    floatcascade<N> r;  // result accumulator
475✔
1546
    for (size_t i = 0; i < N; ++i) r[i] = 0.0;
1,621✔
1547

1548
    int nrDigits = 0;
475✔
1549
    int decimalPoint = -1;
475✔
1550
    int sign = 0, eSign = 1;
475✔
1551
    int exp = 0;
475✔
1552
    bool done = false, parsingMantissa = true;
475✔
1553
    char ch;
1554

1555
    while (!done && (ch = *p) != '\0') {
14,941✔
1556
        if (std::isdigit(ch)) {
14,466✔
1557
            if (parsingMantissa) {
13,560✔
1558
                int digit = ch - '0';
13,161✔
1559
                r *= 10.0;
13,161✔
1560
                r += static_cast<double>(digit);
13,161✔
1561
                ++nrDigits;
13,161✔
1562
            }
1563
            else {
1564
                // parsing exponent section
1565
                int digit = ch - '0';
399✔
1566
                exp *= 10;
399✔
1567
                exp += digit;
399✔
1568
            }
1569
        }
1570
        else {
1571
            switch (ch) {
906✔
1572
            case '.':
472✔
1573
                if (decimalPoint >= 0) return false;  // multiple decimal points
472✔
1574
                decimalPoint = nrDigits;
472✔
1575
                break;
472✔
1576

1577
            case '-':
222✔
1578
            case '+':
1579
                if (parsingMantissa) {
222✔
1580
                    if (sign != 0 || nrDigits > 0) return false;  // sign in wrong place
26✔
1581
                    sign = (ch == '-' ? -1 : 1);
26✔
1582
                }
1583
                else {
1584
                    eSign = (ch == '-' ? -1 : 1);
196✔
1585
                }
1586
                break;
222✔
1587

1588
            case 'E':
212✔
1589
            case 'e':
1590
                parsingMantissa = false;
212✔
1591
                break;
212✔
1592

1593
            default:
×
1594
                return false;  // invalid character
×
1595
            }
1596
        }
1597

1598
        ++p;
14,466✔
1599
    }
1600

1601
    exp *= eSign;
475✔
1602

1603
    // Adjust exponent based on decimal point position
1604
    if (decimalPoint >= 0) exp -= (nrDigits - decimalPoint);
475✔
1605

1606
    // Apply exponent using power of 10
1607
    floatcascade<N> ten(10.0);
475✔
1608
    if (exp > 0) {
475✔
1609
        r *= pown(ten, exp);
14✔
1610
    }
1611
    else if (exp < 0) {
461✔
1612
        r /= pown(ten, -exp);
458✔
1613
    }
1614

1615
    // Apply sign
1616
    if (sign == -1) {
475✔
1617
        for (size_t i = 0; i < N; ++i) {
96✔
1618
            r[i] = -r[i];
70✔
1619
        }
1620
    }
1621

1622
    // Copy result to this
1623
    *this = r;
475✔
1624
    return true;
475✔
1625
}
1626

1627
///////////////////////////////////////////////////////////////////////////////
1628
// Arithmetic operator implementations
1629

1630
// Addition operator
1631
template<size_t N>
1632
floatcascade<N>& floatcascade<N>::operator+=(const floatcascade<N>& rhs) noexcept {
13,334✔
1633
    floatcascade<2*N> temp = expansion_ops::add_cascades(*this, rhs);
13,334✔
1634

1635
    // Compress back to N components
1636
    if constexpr (N == 2) {
1637
        *this = expansion_ops::compress_4to2(temp);
9,582✔
1638
    }
1639
    else if constexpr (N == 3) {
1640
        *this = expansion_ops::compress_6to3(temp);
1,039✔
1641
    }
1642
    else if constexpr (N == 4) {
1643
        *this = expansion_ops::compress_8to4(temp);
2,713✔
1644
    }
1645
    else {
1646
        *this = expansion_ops::renormalize(temp);
1647
    }
1648

1649
    return *this;
13,334✔
1650
}
1651

1652
// Subtraction operator
1653
template<size_t N>
1654
floatcascade<N>& floatcascade<N>::operator-=(const floatcascade<N>& rhs) noexcept {
173✔
1655
    // Negate rhs and add
1656
    floatcascade<N> neg_rhs;
173✔
1657
    for (size_t i = 0; i < N; ++i) {
865✔
1658
        neg_rhs[i] = -rhs[i];
692✔
1659
    }
1660
    return *this += neg_rhs;
173✔
1661
}
1662

1663
// Multiplication operator
1664
template<size_t N>
1665
floatcascade<N>& floatcascade<N>::operator*=(const floatcascade<N>& rhs) noexcept {
37,529✔
1666
    *this = expansion_ops::multiply_cascades(*this, rhs);
37,529✔
1667
    return *this;
37,529✔
1668
}
1669

1670
// Division operator (Newton-Raphson method, generalized from dd_cascade)
1671
template<size_t N>
1672
floatcascade<N>& floatcascade<N>::operator/=(const floatcascade<N>& rhs) noexcept {
697✔
1673
    if (isnan())
697✔
1674
        return *this;
×
1675
    if (rhs.isnan())
697✔
1676
        return *this = rhs;
×
1677
    if (rhs.iszero()) {
697✔
1678
        if (iszero()) {
×
1679
            // Set to NaN
1680
            e[0] = std::numeric_limits<double>::quiet_NaN();
×
1681
            for (size_t i = 1; i < N; ++i) e[i] = 0.0;
×
1682
        } else {
1683
            // Set to infinity with appropriate sign
1684
            double inf_val = (sign() == rhs.sign()) ? INFINITY : -INFINITY;
×
1685
            e[0] = inf_val;
×
1686
            for (size_t i = 1; i < N; ++i) e[i] = 0.0;
×
1687
        }
1688
        return *this;
×
1689
    }
1690

1691
    // Newton-Raphson division: compute reciprocal then multiply
1692
    // Initial approximation q0 = a/b using highest component
1693
    double q0 = e[0] / rhs.e[0];
697✔
1694

1695
    // Compute residual: *this - q0 * rhs
1696
    floatcascade<N> q0_fc;
697✔
1697
    q0_fc[0] = q0;
697✔
1698
    for (size_t i = 1; i < N; ++i) q0_fc[i] = 0.0;
1,805✔
1699

1700
    floatcascade<N> q0_times_rhs = expansion_ops::multiply_cascades(q0_fc, rhs);
697✔
1701

1702
    // Use add_cascades for subtraction (add negative)
1703
    floatcascade<N> neg_q0_times_rhs;
697✔
1704
    for (size_t i = 0; i < N; ++i) {
2,502✔
1705
        neg_q0_times_rhs[i] = -q0_times_rhs[i];
1,805✔
1706
    }
1707
    floatcascade<2*N> residual_expanded = expansion_ops::add_cascades(*this, neg_q0_times_rhs);
697✔
1708

1709
    // Compress residual back to N components
1710
    floatcascade<N> residual;
697✔
1711
    if constexpr (N == 2) {
1712
        residual = expansion_ops::compress_4to2(residual_expanded);
431✔
1713
    }
1714
    else if constexpr (N == 3) {
1715
        residual = expansion_ops::compress_6to3(residual_expanded);
121✔
1716
    }
1717
    else if constexpr (N == 4) {
1718
        residual = expansion_ops::compress_8to4(residual_expanded);
145✔
1719
    }
1720

1721
    // Refine: q1 = q0 + residual/rhs
1722
    double q1 = residual.e[0] / rhs.e[0];
697✔
1723

1724
    // Combine quotients using two_sum for error-free addition
1725
    floatcascade<N> result_cascade;
697✔
1726
    result_cascade[0] = q0;
697✔
1727
    result_cascade[1] = q1;
697✔
1728
    for (size_t i = 2; i < N; ++i) result_cascade[i] = 0.0;
1,108✔
1729

1730
    *this = expansion_ops::renormalize(result_cascade);
697✔
1731
    return *this;
697✔
1732
}
1733

1734
///////////////////////////////////////////////////////////////////////////////
1735
// Helper functions for decimal conversion and mathematical operations
1736

1737
// absolute value
1738
template<size_t N>
1739
inline floatcascade<N> abs(const floatcascade<N>& a) {
1,008✔
1740
    floatcascade<N> result(a);
1,008✔
1741
    if (a[0] < 0.0) {
1,008✔
1742
        // Negate all components
1743
        for (size_t i = 0; i < N; ++i) {
632✔
1744
            result[i] = -result[i];
469✔
1745
        }
1746
    }
1747
    return result;
1,008✔
1748
}
1749

1750
// square: x^2 (generic version)
1751
template<size_t N>
1752
inline floatcascade<N> sqr(const floatcascade<N>& a) {
133,928✔
1753
    return expansion_ops::multiply_cascades(a, a);
133,928✔
1754
}
1755

1756
// square: x^2 (specialized for 4-component cascade)
1757
// This mirrors the qd sqr() algorithm from Hida/Li/Bailey which
1758
// exploits the symmetry of a*a to use only 6 partial products
1759
// instead of 16, achieving higher precision through fewer
1760
// intermediate rounding steps.
1761
template<>
1762
inline floatcascade<4> sqr(const floatcascade<4>& a) {
191,216✔
1763
    using namespace expansion_ops;
1764

1765
    // local two_sqr: p = a*a exactly, error in r (uses FMA)
1766
    auto two_sqr_local = [](double a, double& r) -> double {
382,432✔
1767
        volatile double p = a * a;
382,432✔
1768
        r = std::fma(a, a, -static_cast<double>(p));
382,432✔
1769
        return static_cast<double>(p);
382,432✔
1770
    };
1771
    // local quick_two_sum: assumes |a| >= |b|
1772
    auto qts = [](double a, double b, double& r) -> double {
1,912,160✔
1773
        volatile double s = a + b;
1,912,160✔
1774
        r = (std::isfinite(static_cast<double>(s)) ? b - (static_cast<double>(s) - a) : 0.0);
1,912,160✔
1775
        return static_cast<double>(s);
1,912,160✔
1776
    };
1777

1778
    double q0, q1, q2, q3;
1779
    double p0 = two_sqr_local(a[0], q0);
191,216✔
1780
    double p1, p2, p3;
1781
    two_prod(2.0 * a[0], a[1], p1, q1);
191,216✔
1782
    two_prod(2.0 * a[0], a[2], p2, q2);
191,216✔
1783
    p3 = two_sqr_local(a[1], q3);
191,216✔
1784

1785
    double t0, t1;
1786
    two_sum(q0, p1, p1, q0);
191,216✔
1787

1788
    two_sum(q0, q1, q0, q1);
191,216✔
1789
    two_sum(p2, p3, p2, p3);
191,216✔
1790

1791
    double s0, s1;
1792
    two_sum(q0, p2, s0, t0);
191,216✔
1793
    two_sum(q1, p3, s1, t1);
191,216✔
1794

1795
    two_sum(s1, t0, s1, t0);
191,216✔
1796
    t0 += t1;
191,216✔
1797

1798
    s1 = qts(s1, t0, t0);
191,216✔
1799
    p2 = qts(s0, s1, t1);
191,216✔
1800
    p3 = qts(t1, t0, q0);
191,216✔
1801

1802
    double p4 = 2.0 * a[0] * a[3];
191,216✔
1803
    double p5 = 2.0 * a[1] * a[2];
191,216✔
1804

1805
    two_sum(p4, p5, p4, p5);
191,216✔
1806
    two_sum(q2, q3, q2, q3);
191,216✔
1807

1808
    two_sum(p4, q2, t0, t1);
191,216✔
1809
    t1 = t1 + p5 + q3;
191,216✔
1810

1811
    two_sum(p3, t0, p3, p4);
191,216✔
1812
    p4 = p4 + q0 + t1;
191,216✔
1813

1814
    // renormalize: two passes of quick_two_sum
1815
    double s;
1816
    s = qts(p0, p1, p1); p0 = s;
191,216✔
1817
    s = qts(p1, p2, p2); p1 = s;
191,216✔
1818
    s = qts(p2, p3, p3); p2 = s;
191,216✔
1819
    s = qts(p3, p4, p4); p3 = s;
191,216✔
1820

1821
    s = qts(p0, p1, p1); p0 = s;
191,216✔
1822
    s = qts(p1, p2, p2); p1 = s;
191,216✔
1823
    s = qts(p2, p3, p3); p2 = s;
191,216✔
1824

1825
    floatcascade<4> result;
191,216✔
1826
    result[0] = p0;
191,216✔
1827
    result[1] = p1;
191,216✔
1828
    result[2] = p2;
191,216✔
1829
    result[3] = p3;
191,216✔
1830
    return result;
191,216✔
1831
}
1832

1833
// reciprocal: 1/x
1834
template<size_t N>
1835
inline floatcascade<N> reciprocal(const floatcascade<N>& a) {
×
1836
    if (a.iszero()) {
×
1837
        floatcascade<N> result;
×
1838
        result[0] = (a[0] < 0.0) ? -INFINITY : INFINITY;
×
1839
        for (size_t i = 1; i < N; ++i) result[i] = 0.0;
×
1840
        return result;
×
1841
    }
1842

1843
    if (a.isinf()) {
×
1844
        floatcascade<N> result;
×
1845
        for (size_t i = 0; i < N; ++i) result[i] = 0.0;
×
1846
        return result;
×
1847
    }
1848

1849
    // Use division operator: 1.0 / a
1850
    floatcascade<N> one(1.0);
×
1851
    floatcascade<N> result = one;
×
1852
    result /= a;
×
1853
    return result;
×
1854
}
1855

1856
// power to integer exponent using binary exponentiation
1857
template<size_t N>
1858
inline floatcascade<N> pown(const floatcascade<N>& a, int n) {
941✔
1859
    if (a.isnan()) return a;
941✔
1860

1861
    int abs_n = (n < 0) ? -n : n;
941✔
1862
    floatcascade<N> result;
941✔
1863

1864
    // Handle special cases
1865
    if (abs_n == 0) {
941✔
1866
        if (a.iszero()) {
×
1867
            std::cerr << "pown: 0^0 is undefined\n";
×
1868
            result[0] = std::numeric_limits<double>::quiet_NaN();
×
1869
            for (size_t i = 1; i < N; ++i) result[i] = 0.0;
×
1870
            return result;
×
1871
        }
1872
        // x^0 = 1
1873
        result[0] = 1.0;
×
1874
        for (size_t i = 1; i < N; ++i) result[i] = 0.0;
×
1875
        return result;
×
1876
    }
1877

1878
    if (abs_n == 1) {
941✔
1879
        result = a;
106✔
1880
    }
1881
    else if (abs_n == 2) {
835✔
1882
        result = sqr(a);
29✔
1883
    }
1884
    else {
1885
        // Binary exponentiation
1886
        floatcascade<N> base = a;
806✔
1887
        result[0] = 1.0;
806✔
1888
        for (size_t i = 1; i < N; ++i) result[i] = 0.0;
2,124✔
1889

1890
        int exp = abs_n;
806✔
1891
        while (exp > 0) {
5,122✔
1892
            if (exp % 2 == 1) {
4,316✔
1893
                result = expansion_ops::multiply_cascades(result, base);
2,171✔
1894
            }
1895
            exp /= 2;
4,316✔
1896
            if (exp > 0) {
4,316✔
1897
                base = sqr(base);
3,510✔
1898
            }
1899
        }
1900
    }
1901

1902
    // If exponent was negative, return reciprocal
1903
    if (n < 0) {
941✔
1904
        result = reciprocal(result);
×
1905
    }
1906

1907
    return result;
941✔
1908
}
1909

1910
///////////////////////////////////////////////////////////////////////////////
1911
// Comparison operators
1912

1913
// Equality: compare all components
1914
template<size_t N>
1915
inline bool operator==(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
773✔
1916
    for (size_t i = 0; i < N; ++i) {
3,000✔
1917
        if (lhs[i] != rhs[i]) return false;
2,573✔
1918
    }
1919
    return true;
427✔
1920
}
1921

1922
template<size_t N>
1923
inline bool operator!=(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
1924
    return !(lhs == rhs);
1925
}
1926

1927
// Less than: lexicographic comparison (compare high to low components)
1928
template<size_t N>
1929
inline bool operator<(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
3,964✔
1930
    for (size_t i = 0; i < N; ++i) {
4,531✔
1931
        if (lhs[i] < rhs[i]) return true;
4,379✔
1932
        if (lhs[i] > rhs[i]) return false;
1,996✔
1933
    }
1934
    return false; // equal
152✔
1935
}
1936

1937
template<size_t N>
1938
inline bool operator>(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
1939
    return rhs < lhs;
1940
}
1941

1942
template<size_t N>
1943
inline bool operator<=(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
1944
    return !(rhs < lhs);
1945
}
1946

1947
template<size_t N>
1948
inline bool operator>=(const floatcascade<N>& lhs, const floatcascade<N>& rhs) {
2,016✔
1949
    return !(lhs < rhs);
2,016✔
1950
}
1951

1952
// Comparison with double (convert double to floatcascade<N>)
1953
template<size_t N>
1954
inline bool operator==(const floatcascade<N>& lhs, double rhs) {
1955
    return lhs == floatcascade<N>(rhs);
1956
}
1957

1958
template<size_t N>
1959
inline bool operator!=(const floatcascade<N>& lhs, double rhs) {
1960
    return lhs != floatcascade<N>(rhs);
1961
}
1962

1963
template<size_t N>
1964
inline bool operator<(const floatcascade<N>& lhs, double rhs) {
1965
    return lhs < floatcascade<N>(rhs);
1966
}
1967

1968
template<size_t N>
1969
inline bool operator>(const floatcascade<N>& lhs, double rhs) {
1970
    return lhs > floatcascade<N>(rhs);
1971
}
1972

1973
template<size_t N>
1974
inline bool operator<=(const floatcascade<N>& lhs, double rhs) {
1975
    return lhs <= floatcascade<N>(rhs);
1976
}
1977

1978
template<size_t N>
1979
inline bool operator>=(const floatcascade<N>& lhs, double rhs) {
1980
    return lhs >= floatcascade<N>(rhs);
1981
}
1982

1983
template<size_t N>
1984
inline bool operator==(double lhs, const floatcascade<N>& rhs) {
1985
    return floatcascade<N>(lhs) == rhs;
1986
}
1987

1988
template<size_t N>
1989
inline bool operator!=(double lhs, const floatcascade<N>& rhs) {
1990
    return floatcascade<N>(lhs) != rhs;
1991
}
1992

1993
template<size_t N>
1994
inline bool operator<(double lhs, const floatcascade<N>& rhs) {
1995
    return floatcascade<N>(lhs) < rhs;
1996
}
1997

1998
template<size_t N>
1999
inline bool operator>(double lhs, const floatcascade<N>& rhs) {
2000
    return floatcascade<N>(lhs) > rhs;
2001
}
2002

2003
template<size_t N>
2004
inline bool operator<=(double lhs, const floatcascade<N>& rhs) {
2005
    return floatcascade<N>(lhs) <= rhs;
2006
}
2007

2008
template<size_t N>
2009
inline bool operator>=(double lhs, const floatcascade<N>& rhs) {
2010
    return floatcascade<N>(lhs) >= rhs;
2011
}
2012

2013
} // namespace sw::universal
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc