• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

stillwater-sc / universal / 26381119807

25 May 2026 03:08AM UTC coverage: 84.02% (+0.01%) from 84.008%
26381119807

Pull #993

github

web-flow
Merge b7fbe7947 into 4648aaee3
Pull Request #993: fix(erational): correct ieee754 conversion (both directions) + add tests (#986)

76 of 79 new or added lines in 1 file covered. (96.2%)

32 existing lines in 4 files now uncovered.

47426 of 56446 relevant lines covered (84.02%)

8745134.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.64
/include/sw/universal/number/cfloat/cfloat_impl.hpp
1
#pragma once
2
// cfloat_impl.hpp: implementation of an arbitrary configuration fixed-size 'classic' floating-point representation
3
// cfloat<> can emulate IEEE-754 floats and the new Deep Learning types, such as 
4
// IEEE-754 half-precision floats
5
// Google bfloat16
6
// NVIDIA TensorFloat 
7
// AMD FP16 and FP32
8
// Microsoft FP8 and FP9
9
// Tesla CFP8, CFP16
10
// 
11
// cfloat<> can also emulate more precise configurations, such as
12
// 80bit IEEE-754 extended precision floats
13
// true 128bit quad precision floats
14
//
15
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
16
// SPDX-License-Identifier: MIT
17
//
18
// This file is part of the universal numbers project, which is released under an MIT Open Source license.
19

20
// compiler specific environment has been delegated to be handled by the
21
// number system include file <universal/number/cfloat/cfloat.hpp>
22
// 
23
// supporting types and functions
24
#include <cctype>
25
#include <limits>
26
#include <regex>
27
#include <sstream>
28
#include <string_view>
29
#include <type_traits>
30
#include <universal/native/ieee754.hpp>
31
#include <universal/native/subnormal.hpp>
32
#include <universal/utility/find_msb.hpp>
33
#include <universal/utility/decimal_to_binary.hpp>
34
#include <universal/number/shared/nan_encoding.hpp>
35
#include <universal/number/shared/infinite_encoding.hpp>
36
#include <universal/number/shared/specific_value_encoding.hpp>
37
// arithmetic tracing options
38
#include <universal/number/algorithm/trace_constants.hpp>
39
// cfloat exception structure
40
#include <universal/number/cfloat/exceptions.hpp>
41
// composition types used by cfloat
42
#include <universal/internal/blockbinary/blockbinary.hpp>
43
#include <universal/internal/blocktriple/blocktriple.hpp>
44
#include <universal/number/support/decimal.hpp>
45

46
#ifndef CFLOAT_THROW_ARITHMETIC_EXCEPTION
47
#define CFLOAT_THROW_ARITHMETIC_EXCEPTION 0
48
#endif
49

50
#ifndef TRACE_CONVERSION
51
#define TRACE_CONVERSION 0
52
#endif
53

54
namespace sw { namespace universal {
55

56
/*
57
 * classic floating-point cfloat offers subnormals, max-exponent values, and
58
 * saturation. The default configuration turns off subnormals, max-exponent
59
 * values, and projects values outside of their dynamic range to +-inf
60
 *
61
 * Behavior flags
62
 *   hasSubnormals   : gradual underflow: use all fraction encodings when exponent is all 0's
63
 *   hasMaxExpValues  : reclaim max-exponent encodings as numeric values instead of
64
 *                      reserving the entire all-1s exponent binade for inf/NaN;
65
 *                      only 4 encodings are reserved for +-inf and quiet/signalling NaN
66
 *   isSaturating     : saturate to maxneg or maxpos when value is out of dynamic range
67
 */
68

69
/// <summary>
70
/// decode an cfloat value into its constituent parts
71
/// </summary>
72
/// <typeparam name="bt">block type</typeparam>
73
/// <param name="v">cfloat value to decode (input: const ref)</param>
74
/// <param name="s">sign (output: bool ref)</param>
75
/// <param name="e">exponent (output: blockbinary ref)</param>
76
/// <param name="f">fraction (output: blockbinary ref)</param>
77
template<unsigned nbits, unsigned es, unsigned fbitsPlus1, typename bt,
78
        bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
79
void decode(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& v, bool& s, blockbinary<es, bt>& e, blockbinary<fbitsPlus1, bt>& f) {
804✔
80
        v.sign(s);
804✔
81
        v.exponent(e);
804✔
82
        v.fraction(f);
804✔
83
}
804✔
84

85
/// <summary>
86
/// return the binary scale of the given number
87
/// </summary>
88
/// <typeparam name="bt">Block type used for storage: derived through ADL</typeparam>
89
/// <param name="v">the cfloat number for which we seek to know the binary scale</param>
90
/// <returns>binary scale, i.e. 2^scale, of the value of the cfloat</returns>
91
template<unsigned nbits, unsigned es, typename bt,
92
        bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
93
int scale(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& v) {
329✔
94
        return v.scale();
329✔
95
}
96

97
/// <summary>
98
/// convert a blocktriple to a cfloat. blocktriples come out of the arithmetic
99
/// engine in the form ii.ff...ff and a scale. The conversion must take this
100
/// denormalized form into account during conversion.
101
/// 
102
/// The blocktriple must be in this form to round correctly, as all the bits
103
/// after an arithmetic operation must be taken into account.
104
/// 
105
/// Transformation:
106
///    ii.ff...ff  transform to    s.eee.fffff
107
/// All number systems that depend on blocktriple will need to have
108
/// the rounding decision answered, so that functionality can be
109
/// reused if we locate it inside blocktriple.
110
/// 
111
/// if (srcbits > fbits) // we need to round
112
///     if (ii.00..00 > 1) 
113
///         mask is at srcbits - fbits + 1
114
///     else 
115
///                    mask is at srcbits - fbits
116
/// }
117
/// else {               // no need to round
118
/// }
119
/// </summary>
120
/// <typeparam name="bt">type of the block used for cfloat storage</typeparam>
121
/// <param name="src">the blocktriple to be converted</param>
122
/// <param name="tgt">the resulting cfloat</param>
123
template<unsigned srcbits, BlockTripleOperator op, unsigned nbits, unsigned es, typename bt,
124
        bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
125
constexpr inline void convert(const blocktriple<srcbits, op, bt>& src, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& tgt) {
78,757,253✔
126
        using btType = blocktriple<srcbits, op, bt>;
127
        using cfloatType = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
128
        // test special cases
129
        if (src.isnan()) {
78,757,253✔
130
                tgt.setnan(src.sign() ? NAN_TYPE_SIGNALLING : NAN_TYPE_QUIET);
560✔
131
        }
132
        else        if (src.isinf()) {
78,756,693✔
133
                tgt.setinf(src.sign());
560✔
134
        }
135
        else         if (src.iszero()) {
78,756,133✔
136
                tgt.setzero();
47,536✔
137
                tgt.setsign(src.sign()); // preserve sign
47,536✔
138
        }
139
        else {
140
                int significandScale = src.significandscale();
78,708,597✔
141
                int exponent = src.scale() + significandScale;
78,708,597✔
142
                // blocktriple keeps arithmetic results intentionally unnormalized. scale() tracks the source binade,
143
                // while significandScale() reports whether the exact result spilled into the integer headroom above
144
                // the radix. cfloat conversion has to combine both before it can classify the value as normal,
145
                // subnormal, or overflowing.
146
                // special case of underflow
147
                if constexpr (hasSubnormals) {
148
//                        std::cout << "exponent = " << exponent << " bias = " << cfloatType::EXP_BIAS << " exp subnormal = " << cfloatType::MIN_EXP_SUBNORMAL << '\n';
149
                        // why must exponent be less than (minExpSubnormal - 1) to be rounded to zero? 
150
                        // because the half-way value that would round up to minpos is at exp = (minExpSubnormal - 1)
151
                        if (exponent < cfloatType::MIN_EXP_SUBNORMAL) {
55,356,819✔
152
                                tgt.setzero();
183,211✔
153
                                if (exponent == (cfloatType::MIN_EXP_SUBNORMAL - 1)) {
183,211✔
154
                                        // -exponent because we are right shifting and exponent in this range is negative
155
                                        int adjustment = -(exponent + subnormal_reciprocal_shift[es]);
39,885✔
156
                                        std::pair<bool, unsigned> alignment = src.roundingDecision(adjustment);
39,885✔
157
                                        if (alignment.first) ++tgt; // we are minpos
39,885✔
158
                                }
159
                                tgt.setsign(src.sign());
183,211✔
160
                                return;
3,108,659✔
161
                        }
162
                }
163
                else {
164
                        if (exponent + cfloatType::EXP_BIAS <= 0) {  // value is in the subnormal range, which maps to 0
23,351,778✔
165
                                tgt.setzero();
363,539✔
166
                                tgt.setsign(src.sign());
363,539✔
167
                                return;
1,241,234✔
168
                        }
169
                }
170
                // special case of overflow
171
                if constexpr (hasMaxExpValues) {
172
                        if constexpr (isSaturating) {
173
                                if (exponent > cfloatType::MAX_EXP) {
21,098,841✔
174
                                        if (src.sign()) tgt.maxneg(); else tgt.maxpos();
934,372✔
175
                                        return;
934,372✔
176
                                }
177
                        }
178
                        else {
179
                                if (exponent > cfloatType::MAX_EXP) {
30,556,136✔
180
                                        tgt.setinf(src.sign());
2,673,524✔
181
                                        return;
2,673,524✔
182
                                }
183
                        }
184
                }
185
                else {  // no max-exponent values: will saturate at a different encoding: TODO can we hide it all in maxpos?
186
                        if constexpr (isSaturating) {
187
                                if (exponent > cfloatType::MAX_EXP) {
3,701✔
188
                                        if (src.sign()) tgt.maxneg(); else tgt.maxpos();
7✔
189
                                        return;
7✔
190
                                }
191
                        }
192
                        else {
193
                                if (exponent > cfloatType::MAX_EXP) {
26,503,169✔
194
                                        tgt.setinf(src.sign());
195,240✔
195
                                        return;
195,240✔
196
                                }
197
                        }
198
                }
199

200
                // our value needs to go through rounding to be correctly interpreted
201
                // 
202
                // tgt.clear();  // no need as all bits are going to be set by the code below
203

204
                // exponent construction
205
                int adjustment{ 0 };
74,358,704✔
206
                // construct exponent
207
                uint64_t biasedExponent = static_cast<uint64_t>(static_cast<long long>(exponent) + static_cast<long long>(cfloatType::EXP_BIAS)); // this is guaranteed to be positive if exponent in encoding range
74,358,704✔
208
//                        std::cout << "exponent         " << to_binary(biasedExponent) << '\n';        
209
                if constexpr (hasSubnormals) {
210
                        //if (exponent >= cfloatType::MIN_EXP_SUBNORMAL && exponent < cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::MIN_EXP_NORMAL) {
211
                        if (exponent < cfloatType::MIN_EXP_NORMAL) {
52,248,160✔
212
                                // the value is in the subnormal range of the cfloat
213
                                biasedExponent = 0;
35,480,422✔
214
                                // -exponent because we are right shifting and exponent in this range is negative
215
                                adjustment = -(exponent + subnormal_reciprocal_shift[es]);
35,480,422✔
216
                                // this is the right shift adjustment required for subnormal representation due 
217
                                // to the scale of the input number, i.e. the exponent of 2^-adjustment
218
                        }
219
                        else {
220
                                // the value is in the normal range of the cfloat
221
                                biasedExponent = static_cast<uint64_t>(static_cast<long long>(exponent) + static_cast<long long>(cfloatType::EXP_BIAS)); // this is guaranteed to be positive
16,767,738✔
222
                        }
223
                }
224
                else {
225
                        if (exponent < cfloatType::MIN_EXP_NORMAL) biasedExponent = 1ull; // fixup biasedExponent if we are in the subnormal region
22,110,544✔
226
                }
227

228

229
                // get the rounding direction and the LSB right shift: 
230
                std::pair<bool, unsigned> alignment = src.roundingDecision(adjustment);
74,358,704✔
231
                unsigned rightShift = alignment.second;  // this is the shift to get the LSB of the src to the LSB of the tgt
74,358,704✔
232
                // rightShift is also the normalization step: it lines the blocktriple hidden bit up with the cfloat
233
                // hidden-bit position, while the bool tells us whether the discarded tail rounds the retained payload up.
234
                //std::cout << "rightShift       " << rightShift << '\n';
235

236
                if constexpr (btType::bfbits < 65) {
237
                        bool roundup = alignment.first;
73,826,816✔
238
                        //std::cout << "round-up?        " << (roundup ? "yes" : "no") << '\n';
239
                        // we can use a uint64_t to construct the cfloat
240
                        uint64_t raw = (src.sign() ? 1ull : 0ull); // process sign
73,826,816✔
241
                        //std::cout << "raw bits (sign)  " << to_binary(raw) << '\n';
242
                        // construct the fraction bits
243
                        uint64_t fracbits = src.significand_ull(); // get all the bits, including the integer bits
73,826,816✔
244
                        //std::cout << "fracbits         " << to_binary(fracbits) << '\n';
245
                        fracbits >>= rightShift;
73,826,816✔
246
                        //std::cout << "fracbits shifted " << to_binary(fracbits) << '\n';
247
                        fracbits &= cfloatType::ALL_ONES_FR; // remove the hidden bit
73,826,816✔
248
                        //std::cout << "fracbits masked  " << to_binary(fracbits) << '\n';
249
                        if (roundup) ++fracbits;
73,826,816✔
250
                        // Rounding can turn 1.111... into 10.000..., so the carry has to be reinterpreted as an exponent bump.
251
                        // This is the last place where the unrounded blocktriple state can still change the target binade.
252
                        if (fracbits == (1ull << cfloatType::fbits)) { // check for overflow
73,826,816✔
253
                                if (biasedExponent == cfloatType::ALL_ONES_ES) {
456,331✔
254
                                        fracbits = cfloatType::INF_ENCODING; // project to INF
6,424✔
255
                                }
256
                                else {
257
                                        ++biasedExponent;
449,907✔
258
                                        fracbits = 0;
449,907✔
259
                                }
260
                        }
261

262
                        raw <<= es; // shift sign to make room for the exponent bits
73,826,816✔
263
                        raw |= biasedExponent;
73,826,816✔
264
                        //std::cout << "raw bits (exp)   " << to_binary(raw) << '\n';
265
                        raw <<= cfloatType::fbits; // make room for the fraction bits
73,826,816✔
266
                        //std::cout << "raw bits (s+exp) " << to_binary(raw) << '\n';
267
                        raw |= fracbits;
73,826,816✔
268
                        //std::cout << "raw bits (final) " << to_binary(raw) << '\n';
269
                        tgt.setbits(raw);
73,826,816✔
270
//                        std::cout << "raw bits (all)   " << to_binary(raw) << '\n';
271
                        if constexpr (isSaturating) {
272
                                if (tgt.isnan()) {
20,168,155✔
273
                                        if (src.sign()) {
722✔
274
                                                tgt.maxneg();        // map back to maxneg
361✔
275
                                        }
276
                                        else {
277
                                                tgt.maxpos();        // map back to maxpos
361✔
278
                                        }
279
                                }
280
                        }
281
                        else {
282
                                // when you get too far, map it back to +-inf: 
283
                                // TBD: this doesn't appear to be the right algorithm to catch all overflow patterns
284
                                if (tgt.isnan()) tgt.setinf(src.sign());        // map back to +-inf
53,658,661✔
285
                        }
286
                }
287
                else {
288
                        // compose the segments
289
                        bool roundup = alignment.first;
531,888✔
290
                        auto fracbits = src.significand();
531,888✔
291
                        fracbits >>= static_cast<int>(rightShift);
531,888✔
292
                        fracbits.setbit(cfloatType::fbits, false); // remove the hidden bit (matches narrow path: fracbits &= ALL_ONES_FR)
531,888✔
293

294
                        // apply rounding (matches the bfbits < 65 path above)
295
                        if (roundup) fracbits.increment();
531,888✔
296

297
                        // check for rounding overflow: if the fraction carried into
298
                        // the hidden-bit position, clear the fraction and bump the exponent
299
                        if (fracbits.test(cfloatType::fbits)) {
531,888✔
300
                                fracbits.clear();
33✔
301
                                if (biasedExponent == cfloatType::ALL_ONES_ES) {
33✔
302
                                        // rounding overflowed beyond the maximum binade:
303
                                        // handle explicitly because the isnan() remap below
304
                                        // does not catch all configurations (e.g. hasMaxExpValues
305
                                        // with zero fraction is a valid finite encoding, not NaN)
306
                                        if constexpr (isSaturating) {
307
                                                if (src.sign()) tgt.maxneg(); else tgt.maxpos();
×
308
                                        }
309
                                        else {
310
                                                tgt.setinf(src.sign());
×
311
                                        }
312
                                        return;
×
313
                                }
314
                                else {
315
                                        ++biasedExponent;
33✔
316
                                        ++exponent;
33✔
317
                                }
318
                        }
319

320
                        // copy the blocks that contain fraction bits
321
                        tgt.clear();
531,888✔
322
                        for (unsigned b = 0; b < btType::nrBlocks; ++b) {
1,074,298✔
323
                                tgt.setblock(b, fracbits.block(b));
542,410✔
324
                        }
325
                        tgt.setsign(src.sign());
531,888✔
326
                        if (!tgt.setexponent(exponent)) {
531,888✔
327
                                // std::cerr is not constexpr-callable; gate the diagnostic on
328
                                // runtime context only. The constant-evaluator silently drops
329
                                // the diagnostic but the enclosing branch is rare in practice
330
                                // (only triggered when blocktriple bfbits >= 65 and the
331
                                // computed exponent doesn't fit the cfloat config).
332
                                if (!std::is_constant_evaluated()) {
×
333
                                        std::cerr << "exponent value is out of range: " << exponent << '\n';
×
334
                                }
335
                        }
336

337
                        // saturation / overflow-to-inf handling (matches the bfbits < 65 path)
338
                        if constexpr (isSaturating) {
339
                                if (tgt.isnan()) {
8✔
340
                                        if (src.sign()) {
×
341
                                                tgt.maxneg();
×
342
                                        }
343
                                        else {
344
                                                tgt.maxpos();
×
345
                                        }
346
                                }
347
                        }
348
                        else {
349
                                if (tgt.isnan()) tgt.setinf(src.sign());
531,880✔
350
                        }
351
                }
352
        }
353
}
354

355

356
/// <summary>
357
/// An arbitrary, fixed-size floating-point number with configurable gradual under/overflow and saturation/non-saturation arithmetic.
358
/// Default configuration offers normal encoding and non-saturating arithmetic.
359
/// /// </summary>
360
/// <typeparam name="nbits">number of bits in the encoding</typeparam>
361
/// <typeparam name="es">number of exponent bits in the encoding</typeparam>
362
/// <typeparam name="bt">the type to use as storage class: one of [uint8_t|uint16_t|uint32_t]</typeparam>
363
/// <typeparam name="hasSubnormals">configure gradual underflow (==subnormals)</typeparam>
364
/// <typeparam name="hasMaxExpValues">reclaim max-exponent encodings as numeric values</typeparam>
365
/// <typeparam name="isSaturating">configure saturation arithmetic</typeparam>
366
template<unsigned _nbits, unsigned _es, typename bt = uint8_t,
367
        bool _hasSubnormals = false, bool _hasMaxExpValues = false, bool _isSaturating = false>
368
class cfloat {
369
public:
370
        static_assert(_nbits > _es + 1ull, "nbits is too small to accomodate the requested number of exponent bits");
371
        static_assert(_es < 21ull, "my God that is a big number, are you trying to break the Interweb?");
372
        static_assert(_es > 0, "number of exponent bits must be bigger than 0 to be a classic floating point number");
373
        // how do you assert on the condition that if es == 1 then subnormals and max-exponent values must be true?
374
        static constexpr bool     subsuper = (_hasSubnormals && _hasMaxExpValues);
375
        static constexpr bool     special = (subsuper ? true : (_es > 1));
376
        static_assert(special, "when es == 1, cfloat must have both subnormals and max-exponent values");
377
        static constexpr unsigned bitsInByte = 8u;
378
        static constexpr unsigned bitsInBlock = sizeof(bt) * bitsInByte;
379
        static_assert(bitsInBlock <= 64, "storage unit for block arithmetic needs to be <= uint64_t"); // TODO: carry propagation on uint64_t requires assembly code
380

381
        static constexpr unsigned nbits = _nbits;
382
        static constexpr unsigned es = _es;
383
        static constexpr unsigned fbits  = nbits - 1u - es;    // number of fraction bits excluding the hidden bit
384
        static constexpr unsigned fhbits = nbits - es;           // number of fraction bits including the hidden bit
385

386
        static constexpr uint64_t  storageMask = (0xFFFFFFFFFFFFFFFFull >> (64ull - bitsInBlock));
387
        static constexpr bt       BLOCK_MASK = bt(~0);
388
        static constexpr bt       ALL_ONES = bt(~0); // block type specific all 1's value
389
        static constexpr uint32_t ALL_ONES_ES = (0xFFFF'FFFFul >> (32 - es));
390
        static constexpr uint64_t topfbits = fbits % 64;
391
        static constexpr uint64_t FR_SHIFT = (topfbits > 0 ? (64 - topfbits) : 0);
392
        static constexpr uint64_t ALL_ONES_FR = (topfbits > 0 ? (0xFFFF'FFFF'FFFF'FFFFull >> FR_SHIFT) : 0ull); // special case for nbits <= 64
393
        static constexpr uint64_t INF_ENCODING = (ALL_ONES_FR & ~1ull);
394

395
        static constexpr unsigned nrBlocks = 1u + ((nbits - 1ull) / bitsInBlock);
396
        static constexpr unsigned MSU = nrBlocks - 1u; // MSU == Most Significant Unit, as MSB is already taken
397
        static constexpr bt       MSU_MASK = (ALL_ONES >> (nrBlocks * bitsInBlock - nbits));
398
        static constexpr unsigned bitsInMSU = bitsInBlock - (nrBlocks * bitsInBlock - nbits);
399
        static constexpr unsigned fBlocks = 1ull + ((fbits - 1ull) / bitsInBlock); // nr of blocks with fraction bits
400
        static constexpr unsigned FSU = fBlocks - 1u;  // FSU = Fraction Significant Unit: the index of the block that contains the most significant fraction bits
401
        static constexpr bt       FSU_MASK = (ALL_ONES >> (fBlocks * bitsInBlock - fbits));
402
        static constexpr unsigned bitsInFSU = bitsInBlock - (fBlocks * bitsInBlock - fbits);
403

404
        static constexpr bt       SIGN_BIT_MASK = bt(bt(1ull) << ((nbits - 1ull) % bitsInBlock));
405
        static constexpr bt       LSB_BIT_MASK = bt(1ull);
406
        static constexpr bool     MSU_CAPTURES_EXP = (1ull + es) <= bitsInMSU;
407
        static constexpr unsigned EXP_SHIFT = (MSU_CAPTURES_EXP ? (1 == nrBlocks ? (nbits - 1ull - es) : (bitsInMSU - 1ull - es)) : 0);
408
        static constexpr bt       MSU_EXP_MASK = ((ALL_ONES << EXP_SHIFT) & ~SIGN_BIT_MASK) & MSU_MASK;
409
        static constexpr int      EXP_BIAS = ((1 << (es - 1u)) - 1l);
410
        static constexpr int      MAX_EXP = (es == 1) ? 1 : ((1 << es) - EXP_BIAS - 1);
411
        static constexpr int      MIN_EXP_NORMAL = 1 - EXP_BIAS;
412
        static constexpr int      MIN_EXP_SUBNORMAL = 1 - EXP_BIAS - int(fbits); // the scale of smallest ULP
413

414
        static constexpr bool     hasSubnormals   = _hasSubnormals;
415
        static constexpr bool     hasMaxExpValues = _hasMaxExpValues;
416
        static constexpr bool     isSaturating    = _isSaturating;
417
        typedef bt BlockType;
418

419
        // constructors
420
        cfloat() = default;
421
        cfloat(const cfloat&) = default;
422
        cfloat& operator=(const cfloat&) = default;
423

424
        // construct a cfloat from another
425
        template<unsigned nnbits, unsigned ees, typename bbt, bool ssub, bool ssup, bool ssat>
426
        cfloat(const cfloat<nnbits, ees, bbt, ssub, ssup, ssat>& rhs) noexcept : _block{} {
10,995✔
427
                if (rhs.isnan()) {
10,995✔
428
                        setnan(rhs.sign() ? NAN_TYPE_SIGNALLING : NAN_TYPE_QUIET);
461✔
429
                }
430
                else if (rhs.isinf()) {
10,534✔
431
                        setinf(rhs.sign());
23✔
432
                }
433
                else if (rhs.iszero()) {
10,511✔
434
                        setzero();
1,125✔
435
                }
436
                else {
437
                        if constexpr (std::is_same_v<bt, bbt>) {
438
                                // Full-precision path: create a blocktriple sized for the TARGET's
439
                                // fraction width, pre-align the source fraction bits into it (with
440
                                // proper rounding-bit collection), then let convert() handle
441
                                // rounding and encoding.
442
                                constexpr unsigned srcFbits = nnbits - 1u - ees;
4,968✔
443
                                using TgtBT = blocktriple<fbits, BlockTripleOperator::ADD, bt>;
444
                                constexpr unsigned addRbits = TgtBT::rbits;
4,968✔
445
                                TgtBT value;
4,968✔
446

447
                                bool srcSign = rhs.sign();
4,968✔
448
                                int srcScale = rhs.scale();
4,968✔
449

450
                                if constexpr (srcFbits < 64 && (fbits + addRbits) < 64) {
451
                                        // Fast path: fits in uint64_t
452
                                        // Guard requires (fbits + addRbits) < 64 to prevent UB from
453
                                        // left-shift of amount (fbits + addRbits - srcFbits) >= 64.
454
                                        uint64_t srcRaw = rhs.fraction_ull();
4,968✔
455
                                        if (rhs.isnormal() || rhs.ismaxexpvalue()) {
4,968✔
456
                                                // Normal and max-exponent values both have a hidden bit
457
                                                srcRaw |= (1ull << srcFbits);
4,968✔
458
                                        }
459
                                        else {
460
                                                // Normalize subnormal: find leading 1, shift to hidden-bit position
461
                                                using SrcCfloat = cfloat<nnbits, ees, bbt, ssub, ssup, ssat>;
462
                                                srcScale = SrcCfloat::MIN_EXP_NORMAL;
×
463
                                                for (int i = static_cast<int>(srcFbits) - 1; i >= 0; --i) {
×
464
                                                        if (srcRaw & (1ull << i)) {
×
465
                                                                unsigned shift = srcFbits - static_cast<unsigned>(i);
×
466
                                                                srcScale -= static_cast<int>(shift);
×
467
                                                                srcRaw <<= shift;
×
468
                                                                break;
×
469
                                                        }
470
                                                }
471
                                        }
472
                                        // Align source hidden bit (at srcFbits) to target position (fbits + addRbits)
473
                                        uint64_t tgtRaw;
474
                                        if constexpr (srcFbits >= fbits + addRbits) {
475
                                                constexpr unsigned rshift = srcFbits - fbits - addRbits;
4,230✔
476
                                                if constexpr (rshift > 0) {
477
                                                        uint64_t stickyMask = (1ull << rshift) - 1;
646✔
478
                                                        bool sticky = (srcRaw & stickyMask) != 0;
646✔
479
                                                        tgtRaw = srcRaw >> rshift;
646✔
480
                                                        if (sticky) tgtRaw |= 1; // fold discarded bits into sticky
646✔
481
                                                }
482
                                                else {
483
                                                        tgtRaw = srcRaw;
3,584✔
484
                                                }
485
                                        }
486
                                        else {
487
                                                constexpr unsigned lshift = fbits + addRbits - srcFbits;
738✔
488
                                                tgtRaw = srcRaw << lshift;
738✔
489
                                        }
490
                                        value.setsign(srcSign);
4,968✔
491
                                        value.setscale(srcScale);
4,968✔
492
                                        value.setbits(tgtRaw);
4,968✔
493
                                }
494
                                else {
495
                                        // Wide path: bit-by-bit copy with alignment
496
                                        int offset = static_cast<int>(fbits + addRbits) - static_cast<int>(srcFbits);
497
                                        // Initialize radix via setbits, then overwrite significand
498
                                        value.setbits(0);
499
                                        bool sticky = false;
500
                                        for (unsigned i = 0; i < srcFbits; ++i) {
501
                                                bool v = rhs.at(i);
502
                                                if (!v) continue;
503
                                                int tgtBit = static_cast<int>(i) + offset;
504
                                                if (tgtBit >= 0 && tgtBit < static_cast<int>(TgtBT::bfbits)) {
505
                                                        value.setbit(static_cast<unsigned>(tgtBit), true);
506
                                                }
507
                                                else if (tgtBit < 0) {
508
                                                        sticky = true;
509
                                                }
510
                                        }
511
                                        if (sticky) value.setbit(0, true);
512
                                        if (rhs.isnormal() || rhs.ismaxexpvalue()) {
513
                                                // Normal and max-exponent values both have a hidden bit
514
                                                value.setbit(static_cast<unsigned>(fbits + addRbits), true);
515
                                        }
516
                                        else {
517
                                                // Normalize subnormal for wide path.
518
                                                // The bit-copy loop above placed source bit i at target position
519
                                                // i + offset = i + (fbits + addRbits - srcFbits).  The blocktriple
520
                                                // scale represents the exponent such that:
521
                                                //   value = 2^scale * significant * 2^(-fbits-addRbits)
522
                                                // For the subnormal with leading 1 at source bit i, the significant
523
                                                // integer has its leading 1 at position i + fbits + addRbits - srcFbits,
524
                                                // giving value = 2^scale * 2^(i - srcFbits) * 1.xxx.
525
                                                // The true subnormal value is 2^MIN_EXP_NORMAL * 2^(i - srcFbits) * 1.xxx,
526
                                                // so scale = MIN_EXP_NORMAL with no further adjustment.
527
                                                using SrcCfloat = cfloat<nnbits, ees, bbt, ssub, ssup, ssat>;
528
                                                srcScale = SrcCfloat::MIN_EXP_NORMAL;
529
                                                // (No srcScale -= shift: the offset alignment already accounts for
530
                                                //  the bit position; subtracting shift would apply it twice.)
531
                                        }
532
                                        value.setnormal(); // clear _zero flag set by setbits(0)
533
                                        value.setsign(srcSign);
534
                                        value.setscale(srcScale);
535
                                }
536
                                convert(value, *this);
4,968✔
537
                        }
538
                        else {
539
                                // Cross-block-type: marshall through double.
540
                                // Safe only when the source fraction fits in double's 52-bit significand.
541
                                constexpr unsigned srcFbitsXB = nnbits - ees - 1u;
4,418✔
542
                                static_assert(srcFbitsXB <= 52u,
543
                                        "cfloat converting constructor: source and target have different "
544
                                        "block types and source has more fraction bits than double can represent; "
545
                                        "use matching block types for full-precision cross-config conversion");
546
                                *this = double(rhs);
4,418✔
547
                        }
548
                }
549
        }
10,995✔
550

551
        // converting constructors
552
        constexpr cfloat(const std::string& stringRep) : _block{} { assign(stringRep); }
1✔
553
        // specific value constructor
554
        constexpr cfloat(const SpecificValue code) noexcept : _block{} {
305✔
555
                switch (code) {
305✔
556
                case SpecificValue::maxpos:
91✔
557
                        maxpos();
91✔
558
                        break;
91✔
559
                case SpecificValue::minpos:
111✔
560
                        minpos();
111✔
561
                        break;
111✔
562
                case SpecificValue::zero:
×
563
                default:
564
                        zero();
×
565
                        break;
×
566
                case SpecificValue::minneg:
×
567
                        minneg();
×
568
                        break;
×
569
                case SpecificValue::maxneg:
48✔
570
                        maxneg();
48✔
571
                        break;
48✔
572
                case SpecificValue::infpos:
21✔
573
                        setinf(false);
21✔
574
                        break;
21✔
575
                case SpecificValue::infneg:
×
576
                        setinf(true);
×
577
                        break;
×
578
                case SpecificValue::nar: // approximation as cfloats don't have a NaR
17✔
579
                case SpecificValue::qnan:
580
                        setnan(NAN_TYPE_QUIET);
17✔
581
                        break;
17✔
582
                case SpecificValue::snan:
17✔
583
                        setnan(NAN_TYPE_SIGNALLING);
17✔
584
                        break;
17✔
585
                }
586
        }
305✔
587

588
        constexpr cfloat(signed char iv)                    noexcept : _block{} { *this = iv; }
589
        constexpr cfloat(short iv)                          noexcept : _block{} { *this = iv; }
590
        constexpr cfloat(int iv)                            noexcept : _block{} { *this = iv; }
17,704✔
591
        constexpr cfloat(long iv)                           noexcept : _block{} { *this = iv; }
592
        constexpr cfloat(long long iv)                      noexcept : _block{} { *this = iv; }
1✔
593
        constexpr cfloat(char iv)                           noexcept : _block{} { *this = iv; }
594
        constexpr cfloat(unsigned short iv)                 noexcept : _block{} { *this = iv; }
595
        constexpr cfloat(unsigned int iv)                   noexcept : _block{} { *this = iv; }
596
        constexpr cfloat(unsigned long iv)                  noexcept : _block{} { *this = iv; }
100✔
597
        constexpr cfloat(unsigned long long iv)             noexcept : _block{} { *this = iv; }
598
        CONSTEXPRESSION cfloat(float iv)                    noexcept : _block{} { *this = iv; }
340,809✔
599
        CONSTEXPRESSION cfloat(double iv)                   noexcept : _block{} { *this = iv; }
478,304✔
600

601
        // assignment operators
602
        constexpr cfloat& operator=(signed char rhs)        noexcept { return convert_signed_integer(rhs); }
603
        constexpr cfloat& operator=(short rhs)              noexcept { return convert_signed_integer(rhs); }
604
        constexpr cfloat& operator=(int rhs)                noexcept { return convert_signed_integer(rhs); }
23,186✔
605
        constexpr cfloat& operator=(long rhs)               noexcept { return convert_signed_integer(rhs); }
606
        constexpr cfloat& operator=(long long rhs)          noexcept { return convert_signed_integer(rhs); }
1✔
607

608
        constexpr cfloat& operator=(char rhs)               noexcept { return convert_unsigned_integer(rhs); }
609
        constexpr cfloat& operator=(unsigned short rhs)     noexcept { return convert_unsigned_integer(rhs); }
610
        constexpr cfloat& operator=(unsigned int rhs)       noexcept { return convert_unsigned_integer(rhs); }
2,685✔
611
        constexpr cfloat& operator=(unsigned long rhs)      noexcept { return convert_unsigned_integer(rhs); }
110✔
612
        constexpr cfloat& operator=(unsigned long long rhs) noexcept { return convert_unsigned_integer(rhs); }
613

614
        BIT_CAST_CONSTEXPR cfloat& operator=(float rhs)     noexcept { return convert_ieee754(rhs); }
32,488,684✔
615
        BIT_CAST_CONSTEXPR cfloat& operator=(double rhs)    noexcept { return convert_ieee754(rhs); }
105,792,286✔
616

617
        // make conversions to native types explicit
618
        explicit constexpr operator int()                       const noexcept { return to_int(); }
619
        explicit constexpr operator long()                      const noexcept { return to_long(); }
620
        explicit constexpr operator long long()                 const noexcept { return to_long_long(); }
1✔
621
        explicit constexpr operator float()                     const noexcept { return to_native<float>(); }
35,177,193✔
622
        explicit constexpr operator double()                    const noexcept { return to_native<double>(); }
78,370,312✔
623

624
        // guard long double support to enable ARM and RISC-V embedded environments
625
#if LONG_DOUBLE_SUPPORT
626
        explicit constexpr operator long double()               const noexcept { return to_native<long double>(); }
103✔
627
        BIT_CAST_CONSTEXPR cfloat(long double iv)           noexcept : _block{} { *this = iv; }
8✔
628
        BIT_CAST_CONSTEXPR cfloat& operator=(long double rhs)  noexcept { return convert_ieee754(rhs); }
80✔
629
#endif
630

631
        // arithmetic operators
632
        // prefix operator
633
        constexpr inline cfloat operator-() const {
8,732,304✔
634
                cfloat tmp(*this);
8,732,304✔
635
                tmp._block[MSU] ^= SIGN_BIT_MASK;
8,732,304✔
636
                return tmp;
8,732,304✔
637
        }
638

639
        constexpr cfloat& operator+=(const cfloat& rhs) CFLOAT_EXCEPT {
19,183,834✔
640
                if constexpr (_trace_add) std::cout << "---------------------- ADD -------------------" << std::endl;
641
                // special case handling of the inputs
642
#if CFLOAT_THROW_ARITHMETIC_EXCEPTION
643
                if (isnan(NAN_TYPE_SIGNALLING) || rhs.isnan(NAN_TYPE_SIGNALLING)) {
739✔
644
                        throw cfloat_operand_is_nan{};
×
645
                }
646
#else
647
                if (isnan(NAN_TYPE_SIGNALLING) || rhs.isnan(NAN_TYPE_SIGNALLING)) {
19,183,095✔
648
                        setnan(NAN_TYPE_SIGNALLING);
234,874✔
649
                        return *this;
234,874✔
650
                }
651
                if (isnan(NAN_TYPE_QUIET) || rhs.isnan(NAN_TYPE_QUIET)) {
18,948,221✔
652
                        setnan(NAN_TYPE_QUIET);
215,298✔
653
                        return *this;
215,298✔
654
                }
655
#endif
656
                // normal + inf  = inf
657
                // normal + -inf = -inf
658
                // inf + normal = inf
659
                // inf + inf    = inf
660
                // inf + -inf    = ?
661
                // -inf + normal = -inf
662
                // -inf + -inf   = -inf
663
                // -inf + inf    = ?
664
                if (isinf()) {
18,733,662✔
665
                        if (rhs.isinf()) {
60,327✔
666
                                if (sign() != rhs.sign()) {
939✔
667
                                        setnan(NAN_TYPE_SIGNALLING);
514✔
668
                                }
669
                                return *this;
939✔
670
                        }
671
                        else {
672
                                return *this;
59,388✔
673
                        }
674
                }
675
                else {
676
                        if (rhs.isinf()) {
18,673,335✔
677
                                *this = rhs;
60,432✔
678
                                return *this;
60,432✔
679
                        }
680
                }
681

682
                if (iszero()) {
18,612,903✔
683
                        *this = rhs;
215,661✔
684
                        return *this;
215,661✔
685
                }
686
                if (rhs.iszero()) return *this;
18,397,242✔
687

688
                // arithmetic operation
689
                blocktriple<fbits, BlockTripleOperator::ADD, bt> a, b, sum;
18,228,294✔
690

691
                // transform the inputs into (sign,scale,significant) 
692
                normalizeAddition(a); 
18,228,294✔
693
                rhs.normalizeAddition(b);
18,228,294✔
694
                sum.add(a, b);
18,228,294✔
695

696
                convert(sum, *this);
18,228,294✔
697

698
                return *this;
18,228,294✔
699
        }
700
        constexpr cfloat& operator+=(double rhs) CFLOAT_EXCEPT {
701
                return *this += cfloat(rhs);
702
        }
703
        constexpr cfloat& operator-=(const cfloat& rhs) CFLOAT_EXCEPT {
8,815,259✔
704
                if constexpr (_trace_sub) std::cout << "---------------------- SUB -------------------" << std::endl;
705
                if (rhs.isnan()) 
8,815,259✔
706
                        return *this += rhs;
170,204✔
707
                else 
708
                        return *this += -rhs;
8,645,055✔
709
        }
710
        constexpr cfloat& operator-=(double rhs) CFLOAT_EXCEPT {
8✔
711
                return *this -= cfloat(rhs);
8✔
712
        }
713
        constexpr cfloat& operator*=(const cfloat& rhs) CFLOAT_EXCEPT {
4,208,292✔
714
                if constexpr (_trace_mul) std::cout << "---------------------- MUL -------------------\n";
322✔
715
                // special case handling of the inputs
716
#if CFLOAT_THROW_ARITHMETIC_EXCEPTION
717
                if (isnan(NAN_TYPE_SIGNALLING) || rhs.isnan(NAN_TYPE_SIGNALLING)) {
613✔
718
                        throw cfloat_operand_is_nan{};
×
719
                }
720
#else
721
                if (isnan(NAN_TYPE_SIGNALLING) || rhs.isnan(NAN_TYPE_SIGNALLING)) {
4,207,679✔
722
                        setnan(NAN_TYPE_SIGNALLING);
124,657✔
723
                        return *this;
124,657✔
724
                }
725
                if (isnan(NAN_TYPE_QUIET) || rhs.isnan(NAN_TYPE_QUIET)) {
4,083,022✔
726
                        setnan(NAN_TYPE_QUIET);
113,354✔
727
                        return *this;
113,354✔
728
                }
729
#endif
730
                //  inf * inf = inf
731
                //  inf * -inf = -inf
732
                // -inf * inf = -inf
733
                // -inf * -inf = inf
734
                //        0 * inf = -nan(ind)
735
                //        inf * 0 = -nan(ind)
736
                bool resultSign = sign() != rhs.sign();
3,970,281✔
737
                if (isinf()) {
3,970,281✔
738
                        if (rhs.iszero()) {
24,458✔
739
                                setnan(NAN_TYPE_QUIET);
1,591✔
740
                        }
741
                        else {
742
                                setsign(resultSign);
22,867✔
743
                        }
744
                        return *this;
24,458✔
745
                }
746
                if (rhs.isinf()) {
3,945,823✔
747
                        if (iszero()) {
25,815✔
748
                                setnan(NAN_TYPE_QUIET);
2,969✔
749
                        }
750
                        else {
751
                                setinf(resultSign);
22,846✔
752
                        }
753
                        return *this;
25,815✔
754
                }
755

756
                if (iszero() || rhs.iszero()) {                        
3,920,008✔
757
                        setzero();
1,816,969✔
758
                        setsign(resultSign); // deal with negative 0
1,816,969✔
759
                        return *this;
1,816,969✔
760
                }
761

762
                // arithmetic operation
763
                blocktriple<fbits, BlockTripleOperator::MUL, bt> a, b, product;
2,103,039✔
764

765
                // transform the inputs into (sign,scale,significant) 
766
                // triples of the correct width
767
                normalizeMultiplication(a);
2,103,039✔
768
                rhs.normalizeMultiplication(b);
2,103,039✔
769
                product.mul(a, b);
2,103,039✔
770
                convert(product, *this);
2,103,039✔
771

772
                if constexpr (_trace_mul) std::cout << to_binary(a) << " : " << a << " *\n" << to_binary(b) << " : " << b << " =\n" << to_binary(product) << " : " << product << '\n';
106✔
773

774
                return *this;
2,103,039✔
775
        }
776
        constexpr cfloat& operator*=(double rhs) CFLOAT_EXCEPT {
40✔
777
                return *this *= cfloat(rhs);
40✔
778
        }
779
        constexpr cfloat& operator/=(const cfloat& rhs) CFLOAT_EXCEPT {
5,023,391✔
780
                if constexpr (_trace_div) std::cout << "---------------------- DIV -------------------" << std::endl;
781

782
                // special case handling of the inputs
783
                // qnan / qnan = qnan
784
                // qnan / snan = qnan
785
                // snan / qnan = snan
786
                // snan / snan = snan
787
#if CFLOAT_THROW_ARITHMETIC_EXCEPTION
788
                if (rhs.iszero()) throw cfloat_divide_by_zero();
49✔
789
                if (rhs.isnan()) throw cfloat_divide_by_nan();
48✔
790
                if (isnan()) throw cfloat_operand_is_nan();
48✔
791
#else
792
                if (isnan(NAN_TYPE_SIGNALLING) || rhs.isnan(NAN_TYPE_SIGNALLING)) {
5,023,342✔
793
                        setnan(NAN_TYPE_SIGNALLING);
162,762✔
794
                        return *this;
162,762✔
795
                }
796
                if (isnan(NAN_TYPE_QUIET) || rhs.isnan(NAN_TYPE_QUIET)) {
4,860,580✔
797
                        setnan(NAN_TYPE_QUIET);
149,935✔
798
                        return *this;
149,935✔
799
                }
800
                if (rhs.iszero()) {
4,710,645✔
801
                        if (iszero()) {
169,760✔
802
                                // zero divide by zero yields quiet NaN (in MSVC it is labeled -nan(ind) for indeterminate)
803
                                setnan(NAN_TYPE_QUIET);
29,299✔
804
                        }
805
                        else {
806
                                // non-zero divide by zero yields INF
807
                                bool resultSign = sign() != rhs.sign();
140,461✔
808
                                setinf(resultSign);
140,461✔
809
                        }
810
                        return *this;
169,760✔
811
                }
812
#endif
813
                //  inf /  inf = -nan(ind)
814
                //  inf / -inf = -nan(ind)
815
                // -inf /  inf = -nan(ind)
816
                // -inf / -inf = -nan(ind)
817
                //        1.0 /  inf = 0
818
                bool resultSign = sign() != rhs.sign();
4,540,933✔
819
                if (isinf()) {
4,540,933✔
820
                        if (rhs.isinf()) {
31,087✔
821
                                // inf divide by inf yields quiet NaN (in MSVC it is labeled -nan(ind) for indeterminate)
822
                                setnan(NAN_TYPE_QUIET);
559✔
823
                                return *this;
559✔
824
                        }
825
                        else {
826
                                // we stay an infinite but may change sign
827
                                setsign(resultSign);
30,528✔
828
                                return *this;
30,528✔
829
                        }
830
                }
831
                else {
832
                        if (rhs.isinf()) {
4,509,846✔
833
                                setzero();
32,833✔
834
                                setsign(resultSign);
32,833✔
835
                                return *this;
32,833✔
836
                        }
837
                }
838

839
                if (iszero()) {
4,477,013✔
840
                        setzero();
139,460✔
841
                        setsign(resultSign); // deal with negative 0
139,460✔
842
                        return *this;
139,460✔
843
                }
844

845
                // arithmetic operation
846
                using BlockTriple = blocktriple<fbits, BlockTripleOperator::DIV, bt>;
847
                BlockTriple a, b, quotient;
4,337,553✔
848

849
                // transform the inputs into (sign,scale,significant) 
850
                // triples of the correct width
851
                normalizeDivision(a);
4,337,553✔
852
                rhs.normalizeDivision(b);
4,337,553✔
853
                quotient.div(a, b);
4,337,553✔
854
                quotient.setradix(BlockTriple::radix);
4,337,553✔
855
                convert(quotient, *this);
4,337,553✔
856

857
                if constexpr (_trace_div) std::cout << to_binary(a) << " : " << a << " /\n" << to_binary(b) << " : " << b << " =\n" << to_binary(quotient) << " : " << quotient << '\n';
858

859
                return *this;
4,337,553✔
860
        }
861
        constexpr cfloat& operator/=(double rhs) CFLOAT_EXCEPT {
862
                return *this /= cfloat(rhs);
863
        }
864
        constexpr cfloat& reciprocal() CFLOAT_EXCEPT {
865
                cfloat c = 1.0 / *this;
866
                return *this = c;
867
        }
868
        /// <summary>
869
        /// move to the next bit encoding modulo 2^nbits
870
        /// </summary>
871
        /// <typeparam name="bt"></typeparam>
872
        constexpr cfloat& operator++() {
4,596,089✔
873
                if constexpr (0 == nrBlocks) {
874
                        return *this;
875
                }
876
                else if constexpr (1 == nrBlocks) {
877
                        if (sign()) {
861,696✔
878
                                if (_block[MSU] == (SIGN_BIT_MASK | 1ul)) { // pattern: 1.00.001 = minneg
467✔
879
                                        _block[MSU] = 0; // pattern: 0.00.000 = +0 
7✔
880
                                }
881
                                else {
882
                                        --_block[MSU];
460✔
883
                                }
884
                                if constexpr (!hasSubnormals) {
885
                                        if (isdenormal()) {
198✔
886
                                                // special case, we need to jump past all the subnormal value encodings which puts us on 0
887
                                                _block[MSU] = 0; // pattern: 0.00.000 = +0
3✔
888
                                        }
889
                                }
890
                        }
891
                        else {
892
                                if constexpr (!hasSubnormals) {
893
                                        if (_block[MSU] == 0) {
384,647✔
894
                                                // special case, we need to jump past all the subnormal value encodings minus 1
895
                                                setfraction(0xFFFF'FFFF'FFFF'FFFFull);
4✔
896
                                        }
897
                                }
898
                                if ((_block[MSU] & (MSU_MASK >> 1)) == (MSU_MASK >> 1)) { // pattern: 0.11.111 = nan
861,229✔
899
                                        _block[MSU] |= SIGN_BIT_MASK; // pattern: 1.11.111 = snan : wrap to the other side of the encoding
×
900
                                }
901
                                else {
902
                                        ++_block[MSU];
861,229✔
903
                                }
904
                        }
905
                }
906
                else {
907
                        if (sign()) {
3,734,393✔
908
                                // special case: pattern: 1.00.001 = minneg transitions to pattern: 0.00.000 = +0 
909
                                if (isminnegencoding()) {
66,947✔
910
                                        setzero();
8✔
911
                                }
912
                                else {
913
                                        //  1111 0000
914
                                        //  1110 1111
915
                                        bool borrow = true;
66,939✔
916
                                        for (unsigned i = 0; i < MSU; ++i) {
199,415✔
917
                                                if (borrow) {
132,476✔
918
                                                        if ((_block[i] & storageMask) == 0) { // block will underflow
67,197✔
919
                                                                --_block[i];
261✔
920
                                                                borrow = true;
261✔
921
                                                        }
922
                                                        else {
923
                                                                --_block[i];
66,936✔
924
                                                                borrow = false;
66,936✔
925
                                                        }
926
                                                }
927
                                        }
928
                                        if (borrow) {
66,939✔
929
                                                --_block[MSU];
3✔
930
                                        }
931
                                        if constexpr (!hasSubnormals) {
932
                                                if (isdenormal()) {
385✔
933
                                                        // special case, we need to jump past all the subnormal value encodings which puts us on 0
934
                                                        setzero(); // pattern: 0.00.000 = +0
3✔
935
                                                }
936
                                        }
937
                                }
938
                        }
939
                        else {
940
                                // special case: pattern: 0.11.111 = nan transitions to pattern: 1.11.111 = snan 
941
                                if (isnanencoding()) {
3,667,446✔
942
                                        setnan(NAN_TYPE_SIGNALLING);
×
943
                                }
944
                                else {
945
                                        if constexpr (!hasSubnormals) {
946
                                                if (iszero()) {
1,720,661✔
947
                                                        // special case, we need to jump past all the subnormal value encodings minus 1 so that the increment code below ends up on normal minpos
948
                                                        setfraction(0xFFFF'FFFF'FFFF'FFFFull);
3✔
949
                                                }
950
                                        }
951
                                        bool carry = true;
3,667,446✔
952
                                        for (unsigned i = 0; i < MSU; ++i) {
7,400,789✔
953
                                                if (carry) {
3,733,343✔
954
                                                        if ((_block[i] & storageMask) == storageMask) { // block will overflow
3,667,701✔
955
                                                                _block[i] = 0;
256✔
956
                                                                carry = true;
256✔
957
                                                        }
958
                                                        else {
959
                                                                ++_block[i];
3,667,445✔
960
                                                                carry = false;
3,667,445✔
961
                                                        }
962
                                                }
963
                                        }
964
                                        if (carry) {
3,667,446✔
965
                                                ++_block[MSU];
1✔
966
                                        }
967
                                }
968
                        }
969
                }
970
                return *this;
4,596,089✔
971
        }
972
        constexpr cfloat operator++(int) {
134,800✔
973
                cfloat tmp(*this);
134,800✔
974
                operator++();
134,800✔
975
                return tmp;
134,800✔
976
        }
977
        constexpr cfloat& operator--() {
5,014,100✔
978
                if constexpr (0 == nrBlocks) {
979
                        return *this;
980
                }
981
                else if constexpr (1 == nrBlocks) {
982
                        if (sign()) {
942,793✔
983
                                ++_block[MSU];
942,299✔
984
                        }
985
                        else {
986
                                // positive range
987
                                if (_block[MSU] == 0) { // pattern: 0.00.000 = 0
494✔
988
                                        if constexpr (hasSubnormals) {
989
                                                _block[MSU] |= SIGN_BIT_MASK | bt(1u); // pattern: 1.00.001 = minneg 
8✔
990
                                        }
991
                                        else {
992
                                                // special case, we need to jump past all the subnormal value encodings
993
                                                setfraction(0xFFFF'FFFF'FFFF'FFFFull); // set to 0.00.11...11
3✔
994
                                                ++_block[MSU]; // increment into 0.01.0000
3✔
995
                                                _block[MSU] |= SIGN_BIT_MASK; // set to 1.01.0000
3✔
996
                                        }
997
                                }
998
                                else {
999
                                        --_block[MSU];
483✔
1000
                                }
1001
                                if constexpr (!hasSubnormals) {
1002
                                        if (isdenormal()) {
211✔
1003
                                                // special case, we need to jump past all the subnormal value encodings which puts us on 0
1004
                                                _block[MSU] = 0; // pattern: 0.00.000 = +0
3✔
1005
                                        }
1006
                                }
1007
                        }
1008
                }
1009
                else {
1010
                        if (sign()) {
4,071,307✔
1011
                                bool carry = true;
4,004,349✔
1012
                                for (unsigned i = 0; i < MSU; ++i) {
8,074,235✔
1013
                                        if (carry) {
4,069,886✔
1014
                                                if ((_block[i] & storageMask) == storageMask) { // block will overflow
4,004,604✔
1015
                                                        _block[i] = 0;
256✔
1016
                                                        carry = true;
256✔
1017
                                                }
1018
                                                else {
1019
                                                        ++_block[i];
4,004,348✔
1020
                                                        carry = false;
4,004,348✔
1021
                                                }
1022
                                        }
1023
                                }
1024
                                if (carry) {
4,004,349✔
1025
                                        ++_block[MSU];
1✔
1026
                                }
1027
                        }
1028
                        else {
1029
                                // special case: pattern: 0.00.000 = +0 transitions to pattern: 1.00.001 = minneg
1030
                                if (iszeroencoding()) {
66,958✔
1031
                                        if constexpr (hasSubnormals) {
1032
                                                setsign(true);
8✔
1033
                                                setbit(0, true);
8✔
1034
                                        }
1035
                                        else {
1036
                                                // special case, we need to jump past all the subnormal value encodings 1.01.0000 = minneg normal
1037
                                                setexponent(1 - EXP_BIAS);
2✔
1038
                                                setsign(true);
2✔
1039
                                        }
1040
                                }
1041
                                else {
1042
                                        bool borrow = true;
66,948✔
1043
                                        for (unsigned i = 0; i < MSU; ++i) {
199,441✔
1044
                                                if (borrow) {
132,493✔
1045
                                                        if ((_block[i] & storageMask) == 0) { // block will underflow
67,209✔
1046
                                                                --_block[i];
266✔
1047
                                                                borrow = true;
266✔
1048
                                                        }
1049
                                                        else {
1050
                                                                --_block[i];
66,943✔
1051
                                                                borrow = false;
66,943✔
1052
                                                        }
1053
                                                }
1054
                                        }
1055
                                        if (borrow) {
66,948✔
1056
                                                --_block[MSU];
5✔
1057
                                        }
1058
                                        if constexpr (!hasSubnormals) {
1059
                                                if (isdenormal()) {
384✔
1060
                                                        // special case, we need to jump past all the subnormal value encodings which puts us on 0
1061
                                                        setzero(); // pattern: 0.00.000 = +0
2✔
1062
                                                }
1063
                                        }
1064
                                }
1065
                        }
1066
                }
1067
                return *this;
5,014,100✔
1068
        }
1069
        constexpr cfloat operator--(int) {
134,812✔
1070
                cfloat tmp(*this);
134,812✔
1071
                operator--();
134,812✔
1072
                return tmp;
134,812✔
1073
        }
1074

1075
        // modifiers        
1076
        constexpr void clear() noexcept {
143,264,297✔
1077
                if constexpr (0 == nrBlocks) {
1078
                        return;
1079
                }
1080
                else if constexpr (1 == nrBlocks) {
1081
                        _block[0] = bt(0);
41,091,867✔
1082
                }
1083
                else if constexpr (2 == nrBlocks) {
1084
                        _block[0] = bt(0);
48,065,097✔
1085
                        _block[1] = bt(0);
48,065,097✔
1086
                }
1087
                else if constexpr (3 == nrBlocks) {
1088
                        _block[0] = bt(0);
53,973,314✔
1089
                        _block[1] = bt(0);
53,973,314✔
1090
                        _block[2] = bt(0);
53,973,314✔
1091
                }
1092
                else if constexpr (4 == nrBlocks) {
1093
                        _block[0] = bt(0);
43,336✔
1094
                        _block[1] = bt(0);
43,336✔
1095
                        _block[2] = bt(0);
43,336✔
1096
                        _block[3] = bt(0);
43,336✔
1097
                }
1098
                else if constexpr (5 == nrBlocks) {
1099
                        _block[0] = bt(0);
29,999✔
1100
                        _block[1] = bt(0);
29,999✔
1101
                        _block[2] = bt(0);
29,999✔
1102
                        _block[3] = bt(0);
29,999✔
1103
                        _block[4] = bt(0);
29,999✔
1104
                }
1105
                else if constexpr (6 == nrBlocks) {
1106
                        _block[0] = bt(0);
10,015✔
1107
                        _block[1] = bt(0);
10,015✔
1108
                        _block[2] = bt(0);
10,015✔
1109
                        _block[3] = bt(0);
10,015✔
1110
                        _block[4] = bt(0);
10,015✔
1111
                        _block[5] = bt(0);
10,015✔
1112
                }
1113
                else if constexpr (7 == nrBlocks) {
1114
                        _block[0] = bt(0);
9,999✔
1115
                        _block[1] = bt(0);
9,999✔
1116
                        _block[2] = bt(0);
9,999✔
1117
                        _block[3] = bt(0);
9,999✔
1118
                        _block[4] = bt(0);
9,999✔
1119
                        _block[5] = bt(0);
9,999✔
1120
                        _block[6] = bt(0);
9,999✔
1121
                }
1122
                else if constexpr (8 == nrBlocks) {
1123
                        _block[0] = bt(0);
20,323✔
1124
                        _block[1] = bt(0);
20,323✔
1125
                        _block[2] = bt(0);
20,323✔
1126
                        _block[3] = bt(0);
20,323✔
1127
                        _block[4] = bt(0);
20,323✔
1128
                        _block[5] = bt(0);
20,323✔
1129
                        _block[6] = bt(0);
20,323✔
1130
                        _block[7] = bt(0);
20,323✔
1131
                }
1132
                else {
1133
                        for (unsigned i = 0; i < nrBlocks; ++i) {
224,339✔
1134
                                _block[i] = bt(0);
203,992✔
1135
                        }
1136
                }
1137
        }
143,264,297✔
1138
        constexpr void setzero() noexcept { clear(); }
2,618,435✔
1139
        constexpr void setinf(bool sign = true) noexcept {
6,548,832✔
1140
                // the Inf encoding is the pattern 0b0'11...11'11...10 for a +inf, and 0b1'11...11'11...110 for a -inf
1141
                if constexpr (0 == nrBlocks) {
1142
                        return;
1143
                }
1144
                else if constexpr (1 == nrBlocks) {
1145
                        _block[MSU] = sign ? bt(MSU_MASK ^ LSB_BIT_MASK) : bt(~SIGN_BIT_MASK & (MSU_MASK ^ LSB_BIT_MASK));
2,192,471✔
1146
                }
1147
                else if constexpr (2 == nrBlocks) {
1148
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
1,859,357✔
1149
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
1,859,357✔
1150
                }
1151
                else if constexpr (3 == nrBlocks) {
1152
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
2,493,876✔
1153
                        _block[1] = BLOCK_MASK;
2,493,876✔
1154
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
2,493,876✔
1155
                }
1156
                else if constexpr (4 == nrBlocks) {
1157
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
2,832✔
1158
                        _block[1] = BLOCK_MASK;
2,832✔
1159
                        _block[2] = BLOCK_MASK;
2,832✔
1160
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
2,832✔
1161
                }
1162
                else if constexpr (5 == nrBlocks) {
1163
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
83✔
1164
                        _block[1] = BLOCK_MASK;
83✔
1165
                        _block[2] = BLOCK_MASK;
83✔
1166
                        _block[3] = BLOCK_MASK;
83✔
1167
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
83✔
1168
                }
1169
                else if constexpr (6 == nrBlocks) {
1170
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
45✔
1171
                        _block[1] = BLOCK_MASK;
45✔
1172
                        _block[2] = BLOCK_MASK;
45✔
1173
                        _block[3] = BLOCK_MASK;
45✔
1174
                        _block[4] = BLOCK_MASK;
45✔
1175
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
45✔
1176
                }
1177
                else if constexpr (7 == nrBlocks) {
1178
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
49✔
1179
                        _block[1] = BLOCK_MASK;
49✔
1180
                        _block[2] = BLOCK_MASK;
49✔
1181
                        _block[3] = BLOCK_MASK;
49✔
1182
                        _block[4] = BLOCK_MASK;
49✔
1183
                        _block[5] = BLOCK_MASK;
49✔
1184
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
49✔
1185
                }
1186
                else if constexpr (8 == nrBlocks) {
1187
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
79✔
1188
                        _block[1] = BLOCK_MASK;
79✔
1189
                        _block[2] = BLOCK_MASK;
79✔
1190
                        _block[3] = BLOCK_MASK;
79✔
1191
                        _block[4] = BLOCK_MASK;
79✔
1192
                        _block[5] = BLOCK_MASK;
79✔
1193
                        _block[6] = BLOCK_MASK;
79✔
1194
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
79✔
1195
                }
1196
                else {
1197
                        _block[0] = BLOCK_MASK ^ LSB_BIT_MASK;
40✔
1198
                        for (unsigned i = 1; i < nrBlocks - 1; ++i) {
360✔
1199
                                _block[i] = BLOCK_MASK;
320✔
1200
                        }
1201
                        _block[MSU] = sign ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
40✔
1202
                }        
1203
        }
6,548,832✔
1204
        constexpr void setnan(int NaNType = NAN_TYPE_SIGNALLING) noexcept {
7,118,298✔
1205
                // the NaN encoding is the pattern 0b0'11...11'11...11 for a quiet Nan, and 0b1'11...11'11...111 for a signalling NaN
1206
                if constexpr (0 == nrBlocks) {
1207
                        return;
1208
                }
1209
                else if constexpr (1 == nrBlocks) {
1210
                        // fall through
1211
                }
1212
                else if constexpr (2 == nrBlocks) {
1213
                        _block[0] = BLOCK_MASK;
1,994,923✔
1214
                }
1215
                else if constexpr (3 == nrBlocks) {
1216
                        _block[0] = BLOCK_MASK;
1,048,701✔
1217
                        _block[1] = BLOCK_MASK;
1,048,701✔
1218
                }
1219
                else if constexpr (4 == nrBlocks) {
1220
                        _block[0] = BLOCK_MASK;
30✔
1221
                        _block[1] = BLOCK_MASK;
30✔
1222
                        _block[2] = BLOCK_MASK;
30✔
1223
                }
1224
                else if constexpr (5 == nrBlocks) {
1225
                        _block[0] = BLOCK_MASK;
10✔
1226
                        _block[1] = BLOCK_MASK;
10✔
1227
                        _block[2] = BLOCK_MASK;
10✔
1228
                        _block[3] = BLOCK_MASK;
10✔
1229
                }
1230
                else if constexpr (6 == nrBlocks) {
1231
                        _block[0] = BLOCK_MASK;
4✔
1232
                        _block[1] = BLOCK_MASK;
4✔
1233
                        _block[2] = BLOCK_MASK;
4✔
1234
                        _block[3] = BLOCK_MASK;
4✔
1235
                        _block[4] = BLOCK_MASK;
4✔
1236
                }
1237
                else if constexpr (7 == nrBlocks) {
1238
                        _block[0] = BLOCK_MASK;
4✔
1239
                        _block[1] = BLOCK_MASK;
4✔
1240
                        _block[2] = BLOCK_MASK;
4✔
1241
                        _block[3] = BLOCK_MASK;
4✔
1242
                        _block[4] = BLOCK_MASK;
4✔
1243
                        _block[5] = BLOCK_MASK;
4✔
1244
                }
1245
                else if constexpr (8 == nrBlocks) {
1246
                        _block[0] = BLOCK_MASK;
4✔
1247
                        _block[1] = BLOCK_MASK;
4✔
1248
                        _block[2] = BLOCK_MASK;
4✔
1249
                        _block[3] = BLOCK_MASK;
4✔
1250
                        _block[4] = BLOCK_MASK;
4✔
1251
                        _block[5] = BLOCK_MASK;
4✔
1252
                        _block[6] = BLOCK_MASK;
4✔
1253
                }
1254
                else {
1255
                        for (unsigned i = 0; i < nrBlocks - 1; ++i) {
100✔
1256
                                _block[i] = BLOCK_MASK;
90✔
1257
                        }
1258
                }
1259
                _block[MSU] = NaNType == NAN_TYPE_SIGNALLING ? MSU_MASK : bt(~SIGN_BIT_MASK & MSU_MASK);
7,118,298✔
1260
        }
7,118,298✔
1261
        constexpr void setsign(bool sign = true) {
3,404,112✔
1262
                if (sign) {
3,404,112✔
1263
                        _block[MSU] |= SIGN_BIT_MASK;
614,620✔
1264
                }
1265
                else {
1266
                        _block[MSU] &= ~SIGN_BIT_MASK;
2,789,492✔
1267
                }
1268
        }
3,404,112✔
1269
        constexpr bool setexponent(int scale) {
533,857✔
1270
                if (scale < MIN_EXP_SUBNORMAL || scale > MAX_EXP) return false; // this scale cannot be represented
533,857✔
1271
                if constexpr (nbits < 65) {
1272
                        uint32_t exponentBits = static_cast<uint32_t>(scale + EXP_BIAS);
532,315✔
1273
                        if (scale >= MIN_EXP_SUBNORMAL && scale < MIN_EXP_NORMAL) {
532,315✔
1274
                                // setexponent() only writes the encoding field. In the subnormal interval the true scale is carried
1275
                                // by a zero exponent field plus a left-shifted fraction, so callers must have already denormalized
1276
                                // the significand before asking for this exponent pattern.
1277
                                // we are a subnormal number: all exponent bits are 0
1278
                                exponentBits = 0;
45✔
1279
                        }
1280
                        // TODO: optimize
1281
                        uint32_t mask = (1ul << (es - 1));
532,315✔
1282
                        for (unsigned i = nbits - 2; i > nbits - 2 - es; --i) {
4,800,718✔
1283
                                setbit(i, (mask & exponentBits));
4,268,403✔
1284
                                mask >>= 1;
4,268,403✔
1285
                        }
1286
                }
1287
                else {
1288
                        uint32_t exponentBits = static_cast<uint32_t>(scale + EXP_BIAS);
1,542✔
1289
                        uint32_t mask = (1ul << (es - 1));
1,542✔
1290
                        for (unsigned i = nbits - 2; i > nbits - 2 - es; --i) {
25,172✔
1291
                                setbit(i, (mask & exponentBits));
23,630✔
1292
                                mask >>= 1;
23,630✔
1293
                        }
1294
                }
1295
                return true;
533,857✔
1296
        }
1297
        constexpr void setfraction(uint64_t raw_bits) {
610✔
1298
                // unoptimized as it is not meant to be an end-user API, it is a test API
1299
                // raw_bits is uint64_t so can have at most 64 bits of fraction data
1300
                constexpr unsigned bitsToSet = (fbits < 64) ? fbits : 64;
610✔
1301
                uint64_t mask{ 1ull };
610✔
1302
                for (unsigned i = 0; i < bitsToSet; ++i) {
24,744✔
1303
                        setbit(i, (mask & raw_bits));
24,134✔
1304
                        mask <<= 1;
24,134✔
1305
                }
1306
        }
610✔
1307
        constexpr void setbit(unsigned i, bool v = true) noexcept {
8,137,756✔
1308
                unsigned blockIndex = i / bitsInBlock;
8,137,756✔
1309
                if (blockIndex < nrBlocks) {
8,137,756✔
1310
                        bt block = _block[blockIndex];
8,137,756✔
1311
                        bt null = ~(1ull << (i % bitsInBlock));
8,137,756✔
1312
                        bt bit = bt(v ? 1 : 0);
8,137,756✔
1313
                        bt mask = bt(bit << (i % bitsInBlock));
8,137,756✔
1314
                        _block[blockIndex] = bt((block & null) | mask);
8,137,756✔
1315
                }
1316
        }
8,137,756✔
1317
        constexpr cfloat& setbits(uint64_t raw_bits) noexcept {
307,384,313✔
1318
                if constexpr (0 == nrBlocks) {
1319
                        return *this;
1320
                }
1321
                else if constexpr (1 == nrBlocks) {
1322
                        _block[0] = raw_bits & storageMask;
90,000,831✔
1323
                }
1324
                else if constexpr (2 == nrBlocks) {
1325
                        if constexpr (bitsInBlock < 64) {
1326
                                _block[0] = raw_bits & storageMask;
103,266,054✔
1327
                                raw_bits >>= bitsInBlock;
103,266,054✔
1328
                                _block[1] = raw_bits & storageMask;
103,266,054✔
1329
                        }
1330
                        else {
1331
                                _block[0] = raw_bits & storageMask;
1332
                                _block[1] = 0;
1333
                        }
1334
                }
1335
                else if constexpr (3 == nrBlocks) {
1336
                        if constexpr (bitsInBlock < 64) {
1337
                                _block[0] = raw_bits & storageMask;
113,957,383✔
1338
                                raw_bits >>= bitsInBlock;
113,957,383✔
1339
                                _block[1] = raw_bits & storageMask;
113,957,383✔
1340
                                raw_bits >>= bitsInBlock;
113,957,383✔
1341
                                _block[2] = raw_bits & storageMask;
113,957,383✔
1342
                        }
1343
                        else {
1344
                                _block[0] = raw_bits & storageMask;
1345
                                _block[1] = 0;
1346
                                _block[2] = 0;
1347
                        }
1348
                }
1349
                else if constexpr (4 == nrBlocks) {
1350
                        if constexpr (bitsInBlock < 64) {
1351
                                _block[0] = raw_bits & storageMask;
70,059✔
1352
                                raw_bits >>= bitsInBlock;
70,059✔
1353
                                _block[1] = raw_bits & storageMask;
70,059✔
1354
                                raw_bits >>= bitsInBlock;
70,059✔
1355
                                _block[2] = raw_bits & storageMask;
70,059✔
1356
                                raw_bits >>= bitsInBlock;
70,059✔
1357
                                _block[3] = raw_bits & storageMask;
70,059✔
1358
                        }
1359
                        else {
1360
                                _block[0] = raw_bits & storageMask;
1361
                                _block[1] = 0;
1362
                                _block[2] = 0;
1363
                                _block[3] = 0;
1364
                        }
1365
                }
1366
                else {
1367
                        if constexpr (bitsInBlock < 64) {
1368
                                for (unsigned i = 0; i < nrBlocks; ++i) {
730,503✔
1369
                                        _block[i] = raw_bits & storageMask;
640,517✔
1370
                                        raw_bits >>= bitsInBlock;
640,517✔
1371
                                }
1372
                        }
1373
                        else {
1374
                                _block[0] = raw_bits & storageMask;
1375
                                for (unsigned i = 1; i < nrBlocks; ++i) {
1376
                                        _block[i] = 0;
1377
                                }
1378
                        }
1379
                }
1380
                _block[MSU] &= MSU_MASK; // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
307,384,313✔
1381
                return *this;
307,384,313✔
1382
        }
1383
        constexpr void setblock(unsigned b, bt data) noexcept {
542,410✔
1384
                if (b < nrBlocks) _block[b] = data;
542,410✔
1385
        }
542,410✔
1386
        
1387
        // create specific number system values of interest
1388
        constexpr cfloat& maxpos() noexcept {
936,038✔
1389
                if constexpr (isSaturating) {
1390
                        // in a saturating encoding with max-exponent values we are removing the Inf encoding pattern 0b0'11...11'11...10 for a +inf, 
1391
                        // and 0b1'11...11'11...110 for a -inf and using it as a value
1392
                        if constexpr (hasMaxExpValues) {
1393
                                // maximum positive value has this bit pattern: 0-1...1-111...110, that is, sign = 0, e = 11..11, f = 111...110
1394
                                clear();
935,334✔
1395
                                flip();
935,334✔
1396
                                setbit(nbits - 1ull, false); // sign = 0
935,334✔
1397
                                setbit(0ull, false); // bit0 = 0
935,334✔
1398
                        }
1399
                        else {
1400
                                // maximum positive value has this bit pattern: 0-11...10-111...111, that is, sign = 0, e = 11..10, f = 111...111
1401
                                clear();
423✔
1402
                                flip();
423✔
1403
                                setbit(fbits, false); // set least significant exponent bit to 0
423✔
1404
                                setbit(nbits - 1ull, false); // set sign to 0
423✔
1405
                        }
1406
                }
1407
                else {
1408
                        // the Inf encoding is the pattern 0b0'11...11'11...10 for a +inf, and 0b1'11...11'11...110 for a -inf
1409
                        // the maxpos is the encoding before that
1410
                        if constexpr (hasMaxExpValues) {
1411
                                // maximum positive value has this bit pattern: 0-1...1-111...101, that is, sign = 0, e = 11..11, f = 111...101
1412
                                clear();
113✔
1413
                                flip();
113✔
1414
                                setbit(nbits - 1ull, false); // sign = 0
113✔
1415
                                setbit(1ull, false); // bit1 = 0
113✔
1416
                        }
1417
                        else {
1418
                                // maximum positive value has this bit pattern: 0-1...0-111...111, that is, sign = 0, e = 11..10, f = 111...111
1419
                                clear();
168✔
1420
                                flip();
168✔
1421
                                setbit(fbits, false); // set least significant exponent bit to 0
168✔
1422
                                setbit(nbits - 1ull, false); // set sign to 0
168✔
1423
                        }
1424
                }
1425
                return *this;
936,038✔
1426
        }
1427
        constexpr cfloat& minpos() noexcept {
13,290✔
1428
                // minpos encoding is not impacted by saturating encodings, which only affects maxpos and inf
1429
                if constexpr (hasSubnormals) {
1430
                        // minimum positive value has this bit pattern: 0-000-00...01, that is, sign = 0, e = 000, f = 00001
1431
                        clear();
327✔
1432
                        setbit(0);
327✔
1433
                }
1434
                else {
1435
                        // minimum positive value has this bit pattern: 0-001-00...0, that is, sign = 0, e = 001, f = 0000
1436
                        clear();
12,963✔
1437
                        setbit(fbits);
12,963✔
1438
                }
1439
                return *this;
13,290✔
1440
        }
1441
        constexpr cfloat& zero() noexcept {
1✔
1442
                // the zero value
1443
                clear();
1✔
1444
                return *this;
1✔
1445
        }
1446
        constexpr cfloat& minneg() noexcept {
14✔
1447
                // minneg encoding is not impacted by saturating encodings, which only affects maxpos and inf
1448
                if constexpr (hasSubnormals) {
1449
                        // minimum negative value has this bit pattern: 1-000-00...01, that is, sign = 1, e = 00, f = 00001
1450
                        clear();
8✔
1451
                        setbit(nbits - 1ull);
8✔
1452
                        setbit(0);
8✔
1453
                }
1454
                else {
1455
                        // minimum negative value has this bit pattern: 1-001-00...0, that is, sign = 1, e = 001, f = 0000
1456
                        clear();
6✔
1457
                        setbit(fbits);
6✔
1458
                        setbit(nbits - 1ull);
6✔
1459
                }
1460
                return *this;
14✔
1461
        }
1462
        constexpr cfloat& maxneg() noexcept {
935,702✔
1463
                if constexpr (isSaturating) {
1464
                        // in a saturating encoding with max-exponent values we are removing the Inf encoding pattern 0b0'11...11'11...10 for a +inf, 
1465
                        // and 0b1'11...11'11...110 for a -inf and using it as a value
1466
                        if constexpr (hasMaxExpValues) {
1467
                                // maximum negative value has this bit pattern: 1-1...1-111...110, that is, sign = 1, e = 1..1, f = 111...110
1468
                                clear();
935,240✔
1469
                                flip();
935,240✔
1470
                                setbit(0ull, false);
935,240✔
1471
                        }
1472
                        else {
1473
                                // maximum negative value has this bit pattern: 1-1...0-111...111, that is, sign = 1, e = 11..10, f = 111...111
1474
                                clear();
401✔
1475
                                flip();
401✔
1476
                                setbit(fbits, false);
401✔
1477
                        }
1478
                }
1479
                else {
1480
                        if constexpr (hasMaxExpValues) {
1481
                                // maximum negative value has this bit pattern: 1-1...1-111...101, that is, sign = 1, e = 1..1, f = 111...101
1482
                                clear();
12✔
1483
                                flip();
12✔
1484
                                setbit(1ull, false);
12✔
1485
                        }
1486
                        else {
1487
                                // maximum negative value has this bit pattern: 1-1...0-111...111, that is, sign = 1, e = 11..10, f = 111...111
1488
                                clear();
49✔
1489
                                flip();
49✔
1490
                                setbit(fbits, false);
49✔
1491
                        }
1492
                }
1493
                return *this;
935,702✔
1494
        }
1495

1496

1497
        /// <summary>
1498
        /// assign the value of the string representation to the cfloat
1499
        /// </summary>
1500
        /// <param name="stringRep">decimal scientific notation of a real number to be assigned</param>
1501
        /// <returns>reference to this cfloat</returns>
1502
        /// Clang doesn't support constexpr yet on string manipulations, so we need to make it conditional
1503
        CONSTEXPRESSION cfloat& assign(const std::string& str) noexcept {
7✔
1504
                clear();
7✔
1505
                unsigned nrChars = static_cast<unsigned>(str.size());
7✔
1506
                unsigned nrBits = 0;
7✔
1507
                unsigned nrDots = 0;
7✔
1508
                std::string bits;
7✔
1509
                if (nrChars > 2) {
7✔
1510
                        if (str[0] == '0' && str[1] == 'b') {
7✔
1511
                                for (unsigned i = 2; i < nrChars; ++i) {
211✔
1512
                                        char c = str[i];
204✔
1513
                                        switch (c) {
204✔
1514
                                        case '0':
186✔
1515
                                        case '1':
1516
                                                ++nrBits;
186✔
1517
                                                bits += c;
186✔
1518
                                                break;
186✔
1519
                                        case '.':
14✔
1520
                                                ++nrDots;
14✔
1521
                                                bits += c;
14✔
1522
                                                break;
14✔
1523
                                        case '\'':
4✔
1524
                                                // consume this delimiting character
1525
                                                break;
4✔
1526
                                        default:
×
1527
                                                std::cerr << "string contained a non-standard character: " << c << '\n';
×
1528
                                                return *this;
×
1529
                                        }
1530
                                }
1531
                        }
1532
                        else {
1533
                                std::cerr << "string must start with 0b: instead input pattern was " << str << '\n';
×
1534
                                return *this;
×
1535
                        }
1536
                }
1537
                else {
1538
                        std::cerr << "string is too short\n";
×
1539
                        return *this;
×
1540
                }
1541

1542
                if (nrBits != nbits) {
7✔
1543
                        std::cerr << "number of bits in the string is " << nrBits << " and needs to be " << nbits << '\n';
×
1544
                        return *this;
×
1545
                }
1546
                if (nrDots != 2) {
7✔
1547
                        std::cerr << "number of segment delimiters in string is " << nrDots << " and needs to be 2 for a cfloat<>\n";
×
1548
                        return *this;
×
1549
                }
1550

1551
                // assign the bits
1552
                int field{ 0 };  // three fields: sign, exponent, mantissa: fields are separated by a '.'
7✔
1553
                int nrExponentBits{ -1 };
7✔
1554
                unsigned bit = nrBits;
7✔
1555
                for (unsigned i = 0; i < bits.size(); ++i) {
207✔
1556
                        char c = bits[i];
200✔
1557
                        if (c == '.') {
200✔
1558
                                ++field;
14✔
1559
                                if (field == 2) { // just finished parsing exponent field: we can now check the number of exponent bits
14✔
1560
                                        if (nrExponentBits != es) {
7✔
1561
                                                std::cerr << "provided binary string representation does not contain " << es << " exponent bits. Found " << nrExponentBits << ". Reset to 0\n";
×
1562
                                                clear();
×
1563
                                                return *this;
×
1564
                                        }
1565
                                }
1566
                        }
1567
                        else {
1568
                                setbit(--bit, c == '1');
186✔
1569
                        }
1570
                        if (field == 1) { // exponent field
200✔
1571
                                ++nrExponentBits;
51✔
1572
                        }
1573
                }
1574
                if (field != 2) {
7✔
1575
                        std::cerr << "provided binary string did not contain three fields separated by '.': Reset to 0\n";
×
1576
                        clear();
×
1577
                        return *this;
×
1578
                }
1579
                return *this;
7✔
1580
        }
7✔
1581

1582
        // selectors
1583
        constexpr bool sign() const noexcept { return (_block[MSU] & SIGN_BIT_MASK) == SIGN_BIT_MASK; }
259,355,335✔
1584
        constexpr int  scale() const noexcept {
62,246,191✔
1585
                int e{ 0 };
62,246,191✔
1586
                if constexpr (MSU_CAPTURES_EXP) {
1587
                        e = static_cast<int>((_block[MSU] & ~SIGN_BIT_MASK) >> EXP_SHIFT);
50,594,615✔
1588
                        if (e == 0) {
50,594,615✔
1589
                                // subnormal scale is determined by fraction
1590
                                // subnormals: (-1)^s * 2^(2-2^(es-1)) * (f/2^fbits))
1591
                                e = (2l - (1l << (es - 1ull))) - 1;
6,565,922✔
1592
                                if constexpr (nbits > 2 + es) {
1593
                                        for (unsigned i = nbits - 2ull - es; i > 0; --i) {
12,641,775✔
1594
                                                if (test(i)) break;
12,535,882✔
1595
                                                --e;
6,098,389✔
1596
                                        }
1597
                                }
1598
                        }
1599
                        else {
1600
                                e -= EXP_BIAS;
44,028,693✔
1601
                        }
1602
                }
1603
                else {
1604
                        blockbinary<es, bt> ebits{};
11,651,924✔
1605
                        exponent(ebits);
11,651,576✔
1606
                        if (ebits.iszero()) {
11,651,576✔
1607
                                // subnormal scale is determined by fraction
1608
                                // subnormals: (-1)^s * 2^(2-2^(es-1)) * (f/2^fbits))
1609
                                e = (2l - (1l << (es - 1ull))) - 1;
1,599,655✔
1610
                                if constexpr (nbits > 2 + es) {
1611
                                        for (unsigned i = nbits - 2ull - es; i > 0; --i) {
3,160,458✔
1612
                                                if (test(i)) break;
3,153,336✔
1613
                                                --e;
1,560,814✔
1614
                                        }
1615
                                }
1616
                        }
1617
                        else {
1618
                                e = static_cast<int>(unsigned(ebits) - EXP_BIAS);
10,051,921✔
1619
                        }
1620
                }
1621
                return e;
62,246,191✔
1622
        }
1623
        constexpr bool isneg() const noexcept {
558✔
1624
                if (isnan()) return false;
558✔
1625
                return sign(); 
541✔
1626
        }
1627
        constexpr bool ispos() const noexcept { 
34,477✔
1628
                if (isnan()) return false;
34,477✔
1629
                return !sign(); 
34,477✔
1630
        }
1631
        constexpr bool iszero() const noexcept {
430,908,538✔
1632
                // NOTE: this is a very specific design that makes the decsion that
1633
                // for subnormal encodings found in a configuration that doesn't
1634
                // support them, we assume that these values map to 0.
1635
                if constexpr (hasSubnormals) {
1636
                        return iszeroencoding();
251,798,937✔
1637
                }
1638
                else { // all subnormals round to 0
1639
                        blockbinary<es, bt> ebits{};
179,110,041✔
1640
                        exponent(ebits);
179,109,601✔
1641
                        if (ebits.iszero()) return true; else return false;
179,109,601✔
1642
                }
1643
        }
1644
        constexpr bool isone() const noexcept {
1645
                // unbiased exponent = scale = 0, fraction = 0
1646
                int s = scale();
1647
                if (s == 0) {
1648
                        blockbinary<fbits, bt> f{};
1649
                        fraction(f);
1650
                        return f.iszero();
1651
                }
1652
                return false;
1653
        }
1654
        constexpr bool isinf(int InfType = INF_TYPE_EITHER) const noexcept {
383,901,030✔
1655
                // the bit pattern encoding of Inf is independent of the max-exponent value configuration
1656
                bool isNegInf = false;
383,901,030✔
1657
                bool isPosInf = false;
383,901,030✔
1658
                if constexpr (0 == nrBlocks) {
1659
                        return false;
1660
                }
1661
                else if constexpr (1 == nrBlocks) {
1662
                        isNegInf = (_block[MSU] & MSU_MASK) == (MSU_MASK ^ LSB_BIT_MASK);
171,984,502✔
1663
                        isPosInf = (_block[MSU] & MSU_MASK) == ((MSU_MASK ^ SIGN_BIT_MASK) ^ LSB_BIT_MASK);
171,984,502✔
1664
                }
1665
                else if constexpr (2 == nrBlocks) {
1666
                        bool isInf = (_block[0] == (BLOCK_MASK ^ LSB_BIT_MASK));
145,324,126✔
1667
                        isNegInf = isInf && ((_block[MSU] & MSU_MASK) == MSU_MASK);
145,324,126✔
1668
                        isPosInf = isInf && (_block[MSU] & MSU_MASK) == (MSU_MASK ^ SIGN_BIT_MASK);
145,324,126✔
1669
                }
1670
                else if constexpr (3 == nrBlocks) {
1671
                        bool isInf = (_block[0] == (BLOCK_MASK ^ LSB_BIT_MASK)) && (_block[1] == BLOCK_MASK);
66,400,025✔
1672
                        isNegInf = isInf && ((_block[MSU] & MSU_MASK) == MSU_MASK);
66,400,025✔
1673
                        isPosInf = isInf && (_block[MSU] & MSU_MASK) == (MSU_MASK ^ SIGN_BIT_MASK);
66,400,025✔
1674
                }
1675
                else if constexpr (4 == nrBlocks) {
1676
                        bool isInf = (_block[0] == (BLOCK_MASK ^ LSB_BIT_MASK)) && (_block[1] == BLOCK_MASK) && (_block[2] == BLOCK_MASK);
100,280✔
1677
                        isNegInf = isInf && ((_block[MSU] & MSU_MASK) == MSU_MASK);
100,280✔
1678
                        isPosInf = isInf && (_block[MSU] & MSU_MASK) == (MSU_MASK ^ SIGN_BIT_MASK);
100,280✔
1679
                }
1680
                else {
1681
                        bool isInf = (_block[0] == (BLOCK_MASK ^ LSB_BIT_MASK));
92,097✔
1682
                        for (unsigned i = 1; i < nrBlocks - 1; ++i) {
93,740✔
1683
                                if (_block[i] != BLOCK_MASK) {
93,417✔
1684
                                        isInf = false;
91,774✔
1685
                                        break;
91,774✔
1686
                                }
1687
                        }
1688
                        isNegInf = isInf && ((_block[MSU] & MSU_MASK) == MSU_MASK);
92,097✔
1689
                        isPosInf = isInf && (_block[MSU] & MSU_MASK) == (MSU_MASK ^ SIGN_BIT_MASK);
92,097✔
1690
                }
1691

1692
                return (InfType == INF_TYPE_EITHER ? (isNegInf || isPosInf) :
442,191,449✔
1693
                        (InfType == INF_TYPE_NEGATIVE ? isNegInf :
87,437,170✔
1694
                                (InfType == INF_TYPE_POSITIVE ? isPosInf : false)));
442,194,532✔
1695
        }
58,290,419✔
1696
        constexpr bool isnan(int NaNType = NAN_TYPE_EITHER) const noexcept {
1,002,604,508✔
1697
                if constexpr (hasMaxExpValues) {
1698
                        return isnanencoding(NaNType);
596,765,514✔
1699
                }
1700
                else {
1701
                        if (ismaxexpvalue()) {
405,838,994✔
1702
                                // all these max-exponent encodings are NANs, except for the encoding representing INF
1703
                                bool isNaN = isinf() ? false : true;
24,520,161✔
1704
                                bool isNegNaN = isNaN && sign();
24,520,161✔
1705
                                bool isPosNaN = isNaN && !sign();
24,520,161✔
1706
                                return (NaNType == NAN_TYPE_EITHER ? (isNaN) :
27,937,407✔
1707
                                        (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN :
4,551,922✔
1708
                                                (NaNType == NAN_TYPE_QUIET ? isPosNaN : false)));
26,789,513✔
1709
                        }
1710
                        else {
1711
                                return false;
381,318,833✔
1712
                        }
1713
                }
1714
        }
24,520,161✔
1715
        // iszeroencoding returns true if it finds a pure -0 or +0 pattern and returns false otherwise
1716
        constexpr bool iszeroencoding() const noexcept {
359,180,254✔
1717
                if constexpr (0 == nrBlocks) {
1718
                        return true;
1719
                }
1720
                else if constexpr (1 == nrBlocks) {
1721
                        return (_block[MSU] & ~SIGN_BIT_MASK) == 0;
138,359,099✔
1722
                }
1723
                else if constexpr (2 == nrBlocks) {
1724
                        return (_block[0] == 0) && (_block[MSU] & ~SIGN_BIT_MASK) == 0;
146,674,671✔
1725
                }
1726
                else if constexpr (3 == nrBlocks) {
1727
                        return (_block[0] == 0) && _block[1] == 0 && (_block[MSU] & ~SIGN_BIT_MASK) == 0;
73,918,368✔
1728
                }
1729
                else if constexpr (4 == nrBlocks) {
1730
                        return (_block[0] == 0) && _block[1] == 0 && _block[2] == 0 && (_block[MSU] & ~SIGN_BIT_MASK) == 0;
135,387✔
1731
                }
1732
                else {
1733
                        for (unsigned i = 0; i < nrBlocks - 1; ++i) if (_block[i] != 0) return false;
338,411✔
1734
                        return (_block[MSU] & ~SIGN_BIT_MASK) == 0;
555✔
1735
                }
1736
        }
1737
        // isminnegencoding returns true if it find the pattern 1.00.00001 and returns false otherwise
1738
        constexpr bool isminnegencoding() const noexcept {
66,947✔
1739
                if constexpr (0 == nrBlocks) {
1740
                        return false;
1741
                }
1742
                else if constexpr (1 == nrBlocks) {
1743
                        return (_block[MSU] & (SIGN_BIT_MASK | 1ul));
1744
                }
1745
                else if constexpr (2 == nrBlocks) {
1746
                        return ((_block[0] == 1ul) && (_block[1] == SIGN_BIT_MASK));
1,408✔
1747
                }
1748
                else if constexpr (3 == nrBlocks) {
1749
                        return ((_block[0] == 1ul) && (_block[1] == 0) && (_block[2] == SIGN_BIT_MASK));
65,536✔
1750
                }
1751
                else if constexpr (4 == nrBlocks) {
1752
                        return ((_block[0] == 1ul) && (_block[1] == 0) && (_block[2] == 0) && (_block[3] == SIGN_BIT_MASK));
3✔
1753
                }
1754
                else {
1755
                        if (_block[0] != 1ul) return false;
×
1756
                        for (unsigned i = 1; i < nrBlocks - 2; ++i) if (_block[i] != 0) return false;
×
1757
                        return (_block[MSU] == SIGN_BIT_MASK);
×
1758
                }
1759
        }
1760
        constexpr bool isnanencoding(int NaNType = NAN_TYPE_EITHER) const noexcept {
600,432,960✔
1761
                // the bit encoding of NaN is independent of the gradual overflow configuration
1762
                bool isNaN = true;
600,432,960✔
1763
                bool isNegNaN = false;
600,432,960✔
1764
                bool isPosNaN = false;
600,432,960✔
1765

1766
                if constexpr (0 == nrBlocks) {
1767
                        return false;
1768
                }
1769
                else if constexpr (1 == nrBlocks) {
1770
                }
1771
                else if constexpr (2 == nrBlocks) {
1772
                        isNaN = (_block[0] == BLOCK_MASK);
224,000,086✔
1773
                }
1774
                else if constexpr (3 == nrBlocks) {
1775
                        isNaN = (_block[0] == BLOCK_MASK) && (_block[1] == BLOCK_MASK);
201,348,442✔
1776
                }
1777
                else if constexpr (4 == nrBlocks) {
1778
                        isNaN = (_block[0] == BLOCK_MASK) && (_block[1] == BLOCK_MASK) && (_block[2] == BLOCK_MASK);
226,637✔
1779
                }
1780
                else {
1781
                        for (unsigned i = 0; i < nrBlocks - 1; ++i) {
270,051✔
1782
                                if (_block[i] != BLOCK_MASK) {
270,003✔
1783
                                        isNaN = false;
269,713✔
1784
                                        break;
269,713✔
1785
                                }
1786
                        }
1787
                }
1788
                isNegNaN = isNaN && ((_block[MSU] & MSU_MASK) == MSU_MASK);
600,432,960✔
1789
                isPosNaN = isNaN && (_block[MSU] & MSU_MASK) == (MSU_MASK ^ SIGN_BIT_MASK);
600,432,960✔
1790

1791
                return (NaNType == NAN_TYPE_EITHER ? (isNegNaN || isPosNaN) :
816,798,001✔
1792
                        (NaNType == NAN_TYPE_SIGNALLING ? isNegNaN :
324,399,941✔
1793
                                (NaNType == NAN_TYPE_QUIET ? isPosNaN : false)));
816,502,760✔
1794
        }
216,365,041✔
1795
        // isnormal returns true if 0 or exponent bits are not all zero or one, false otherwise
1796
        constexpr bool isnormal() const noexcept {
62,088,152✔
1797
                if (iszeroencoding()) return true; // filter out the one special case
62,088,152✔
1798
                blockbinary<es, bt> e{};
62,088,499✔
1799
                exponent(e);
62,088,151✔
1800
                return !e.iszero() && !e.all();
62,088,151✔
1801
        }
1802
        // isdenormal returns true if exponent bits are all zero, false otherwise
1803
        constexpr bool isdenormal() const noexcept {
45,226,207✔
1804
                if (iszeroencoding()) return false; // filter out the one special case
45,226,207✔
1805
                blockbinary<es, bt> e{};
45,217,428✔
1806
                exponent(e);
45,217,428✔
1807
                return e.iszero(); 
45,217,428✔
1808
        }
1809
        // ismaxexpvalue returns true if exponent bits are all one, false otherwise
1810
        constexpr bool ismaxexpvalue() const noexcept {
409,068,188✔
1811
                blockbinary<es, bt> e{};
409,069,235✔
1812
                exponent(e);
409,068,188✔
1813
                return e.all();
409,068,188✔
1814
        }
1815
        // isinteger is TBD
1816
        constexpr bool isinteger() const noexcept { return false; } // return (floor(*this) == *this) ? true : false; }
1817
        
1818
        template<typename NativeReal>
1819
        constexpr bool inrange(NativeReal v) const noexcept {
9,306,296✔
1820
                // the valid range for this cfloat includes the interval between 
1821
                // maxpos and the value that would round down to maxpos
1822
                bool bIsInRange = true;                
9,306,296✔
1823
                if (v > 0) {
9,306,296✔
1824
                        cfloat c(SpecificValue::maxpos);
4,427,047✔
1825
                        cfloat<nbits + 1, es, BlockType, hasSubnormals, hasMaxExpValues, isSaturating> d{};
8,027,370✔
1826
                        d = NativeReal(c);
4,427,047✔
1827
                        ++d;
4,427,047✔
1828
                        if (v >= NativeReal(d)) bIsInRange = false;
4,427,047✔
1829
                }
1830
                else {
1831
                        cfloat c(SpecificValue::maxneg);
4,879,249✔
1832
                        cfloat<nbits + 1, es, BlockType, hasSubnormals, hasMaxExpValues, isSaturating> d{};
8,816,662✔
1833
                        d = NativeReal(c);
4,879,249✔
1834
                        --d;
4,879,249✔
1835
                        if (v <= NativeReal(d)) bIsInRange = false;
4,879,249✔
1836
                }
1837

1838
                return bIsInRange;
9,306,296✔
1839
        }
1840
        constexpr bool test(unsigned bitIndex) const noexcept {
15,704,725✔
1841
                return at(bitIndex);
15,704,725✔
1842
        }
1843
        constexpr bool at(unsigned bitIndex) const noexcept {
1,879,059,173✔
1844
                if (bitIndex < nbits) {
1,879,059,173✔
1845
                        bt word = _block[bitIndex / bitsInBlock];
1,879,059,173✔
1846
                        bt mask = bt(1ull << (bitIndex % bitsInBlock));
1,879,059,173✔
1847
                        return (word & mask);
1,879,059,173✔
1848
                }
1849
                return false;
×
1850
        }
1851
        constexpr uint8_t nibble(unsigned n) const noexcept {
656✔
1852
                if (n < (1 + ((nbits - 1) >> 2))) {
656✔
1853
                        bt word = _block[(n * 4) / bitsInBlock];
656✔
1854
                        int nibbleIndexInWord = int(n % (bitsInBlock >> 2ull));
656✔
1855
                        bt mask = bt(0xF << (nibbleIndexInWord * 4));
656✔
1856
                        bt nibblebits = bt(mask & word);
656✔
1857
                        return uint8_t(nibblebits >> (nibbleIndexInWord * 4));
656✔
1858
                }
1859
                return 0;
×
1860
        }
1861
        constexpr bt block(unsigned b) const noexcept {
1862
                if (b < nrBlocks) {
1863
                        return _block[b];
1864
                }
1865
                return 0;
1866
        }
1867

1868
        constexpr void sign(bool& s) const {
804✔
1869
                s = sign();
804✔
1870
        }
804✔
1871
        constexpr void exponent(blockbinary<es, bt>& e) const {
813,491,092✔
1872
                e.clear();
813,491,092✔
1873
                if constexpr (0 == nrBlocks) return;
1874
                else if constexpr (1 == nrBlocks) {
1875
                        bt ebits = bt(_block[MSU] & ~SIGN_BIT_MASK);
376,849,810✔
1876
                        e.setbits(uint64_t(ebits >> EXP_SHIFT));
376,849,810✔
1877
                }
1878
                else if constexpr (nrBlocks > 1) {
1879
                        if (MSU_CAPTURES_EXP) {
1880
                                bt ebits = bt(_block[MSU] & ~SIGN_BIT_MASK);
250,193,937✔
1881
                                e.setbits(uint64_t(ebits >> ((nbits - 1ull - es) % bitsInBlock)));
250,193,937✔
1882
                        }
1883
                        else {
1884
                                for (unsigned i = 0; i < es; ++i) { e.setbit(i, at(nbits - 1ull - es + i)); }
841,377,239✔
1885
                        }
1886
                }
1887
        }
813,491,092✔
1888
        template<unsigned targetFractionBits>
1889
        constexpr blockbinary<targetFractionBits, bt>& fraction(blockbinary<targetFractionBits, bt>& f) const {
34,088✔
1890
                static_assert(targetFractionBits >= fbits, "target blockbinary is too small and can't receive all fraction bits");
1891
                f.clear();
34,088✔
1892
                if constexpr (0 == nrBlocks) return f;
1893
                else if constexpr (1 == nrBlocks) {
1894
                        bt fraction = bt(_block[MSU] & ~MSU_EXP_MASK);
1,005✔
1895
                        f.setbits(fraction);
1,005✔
1896
                }
1897
                else if constexpr (nrBlocks > 1) {
1898
                        for (unsigned i = 0; i < fbits; ++i) { f.setbit(i, at(i)); }
242,396✔
1899
                }
1900
                return f;
34,088✔
1901
        }
1902
        constexpr uint64_t fraction_ull() const {
61,032,484✔
1903
                uint64_t raw{ 0 };
61,032,484✔
1904
                if constexpr (nbits - es - 1ull < 65ull) { // no-op if precondition doesn't hold
1905
                        if constexpr (1 == nrBlocks) {
1906
                                uint64_t fbitMask = 0xFFFF'FFFF'FFFF'FFFF >> (64 - fbits);
27,960,319✔
1907
                                raw = fbitMask & uint64_t(_block[0]);
27,960,319✔
1908
                        }
1909
                        else if constexpr (2 == nrBlocks) {
1910
                                uint64_t fbitMask = 0xFFFF'FFFF'FFFF'FFFF >> (64 - fbits);
22,799,715✔
1911
                                raw = fbitMask & ((uint64_t(_block[1]) << bitsInBlock) | uint64_t(_block[0]));
22,799,715✔
1912
                        }
1913
                        else if constexpr (3 == nrBlocks) {
1914
                                uint64_t fbitMask = 0xFFFF'FFFF'FFFF'FFFF >> (64 - fbits);
10,249,633✔
1915
                                raw = fbitMask & ((uint64_t(_block[2]) << (2 * bitsInBlock)) | (uint64_t(_block[1]) << bitsInBlock) | uint64_t(_block[0]));
10,249,633✔
1916
                        }
1917
                        else if constexpr (4 == nrBlocks) {
1918
                                uint64_t fbitMask = 0xFFFF'FFFF'FFFF'FFFF >> (64 - fbits);
22,453✔
1919
                                raw = fbitMask & ((uint64_t(_block[3]) << (3 * bitsInBlock)) | (uint64_t(_block[2]) << (2 * bitsInBlock)) | (uint64_t(_block[1]) << bitsInBlock) | uint64_t(_block[0]));
22,453✔
1920
                        }
1921
                        else {
1922
                                uint64_t mask{ 1 };
364✔
1923
                                for (unsigned i = 0; i < fbits; ++i) { 
15,871✔
1924
                                        if (test(i)) {
15,507✔
1925
                                                raw |= mask;
2,270✔
1926
                                        }
1927
                                        mask <<= 1;
15,507✔
1928
                                }
1929
                        }
1930
                }
1931
                return raw;
61,032,484✔
1932
        }
1933
        // construct the significant from the encoding, returns normalization offset
1934
        constexpr unsigned significant(blockbinary<fhbits, bt>& s, bool isNormal = true) const {
23✔
1935
                unsigned shift = 0;
23✔
1936
                if (iszero()) return 0;
23✔
1937
                if constexpr (0 == nrBlocks) return 0;
1938
                else if constexpr (1 == nrBlocks) {
1939
                        bt significant = bt(_block[MSU] & ~MSU_EXP_MASK & ~SIGN_BIT_MASK);
23✔
1940
                        if (isNormal) {
23✔
1941
                                significant |= (bt(0x1ul) << fbits);
×
1942
                        }
1943
                        else {
1944
                                unsigned msb = find_msb(significant);
23✔
1945
//                                std::cout << "msb : " << msb << " : fhbits : " << fhbits << " : " << to_binary(significant, true) << std::endl;
1946
                                shift = fhbits - msb;
23✔
1947
                                significant <<= shift;
23✔
1948
                        }
1949
                        s.setbits(significant);
23✔
1950
                }
1951
                else if constexpr (nrBlocks > 1) {
1952
                        s.clear();
1953
                        // TODO: design and implement a block-oriented algorithm, this sequential algorithm is super slow
1954
                        if (isNormal) {
1955
                                s.setbit(fbits);
1956
                                for (unsigned i = 0; i < fbits; ++i) { s.setbit(i, at(i)); }
1957
                        }
1958
                        else {
1959
                                // Find the MSB of the subnormal: 
1960
                                unsigned msb = 0;
1961
                                for (unsigned i = 0; i < fbits; ++i) { // msb protected from not being assigned through iszero test at prelude of function
1962
                                        msb = fbits - 1ull - i;
1963
                                        if (test(msb)) break;
1964
                                }
1965
                                //      m-----lsb
1966
                                // h00001010101
1967
                                // 101010100000
1968
                                for (unsigned i = 0; i <= msb; ++i) {
1969
                                        s.setbit(fbits - msb + i, at(i));
1970
                                }
1971
                                shift = fhbits - msb;
1972
                        }
1973
                }
1974
                return shift;
23✔
1975
        }
1976
        template<unsigned targetbits>
1977
        constexpr void bits(blockbinary<targetbits, bt>& b) const {
640✔
1978
                unsigned upperbound = (nbits > targetbits ? targetbits : nbits);
640✔
1979
                b.clear();
640✔
1980
                for (unsigned i = 0; i < upperbound; ++i) { b.setbit(i, at(i)); }
3,840✔
1981
        }
640✔
1982

1983
        // casts to native types
1984
        // Float-to-integer conversion is undefined when the (truncated) source value
1985
        // is outside the destination type's representable range [conv.fpint]. Clamp
1986
        // to min/max before the cast. Note: float(INT_MAX) typically rounds up to
1987
        // 2^31, so we use >= (not >) to catch the boundary.
1988
        constexpr int to_int() const {
1989
                if (isnan()) return 0;
1990
                if (isinf()) return sign() ? std::numeric_limits<int>::min() : std::numeric_limits<int>::max();
1991
                float f = to_native<float>();
1992
                if (f >= static_cast<float>(std::numeric_limits<int>::max())) return std::numeric_limits<int>::max();
1993
                if (f <  static_cast<float>(std::numeric_limits<int>::min())) return std::numeric_limits<int>::min();
1994
                return int(f);
1995
        }
1996
        constexpr long to_long() const {
1997
                if (isnan()) return 0;
1998
                if (isinf()) return sign() ? std::numeric_limits<long>::min() : std::numeric_limits<long>::max();
1999
                double d = to_native<double>();
2000
                if (d >= static_cast<double>(std::numeric_limits<long>::max())) return std::numeric_limits<long>::max();
2001
                if (d <  static_cast<double>(std::numeric_limits<long>::min())) return std::numeric_limits<long>::min();
2002
                return long(d);
2003
        }
2004
        constexpr long long to_long_long() const {
1✔
2005
                if (isnan()) return 0;
1✔
2006
                if (isinf()) return sign() ? std::numeric_limits<long long>::min() : std::numeric_limits<long long>::max();
1✔
2007
                double d = to_native<double>();
1✔
2008
                if (d >= static_cast<double>(std::numeric_limits<long long>::max())) return std::numeric_limits<long long>::max();
1✔
2009
                if (d <  static_cast<double>(std::numeric_limits<long long>::min())) return std::numeric_limits<long long>::min();
1✔
2010
                return (long long)(d);
1✔
2011
        }
2012

2013
        // transform an cfloat to a native C++ floating-point. We are using the native
2014
        // precision to compute, which means that all sub-values need to be representable 
2015
        // by the native precision.
2016
        // A more accurate approximation would require an adaptive precision algorithm
2017
        // with a final rounding step.
2018
        template<typename TargetFloat>
2019
        constexpr
2020
        TargetFloat to_native() const { 
113,547,609✔
2021
                TargetFloat v{ 0.0 };
113,547,609✔
2022
                if (iszero()) {
113,547,609✔
2023
                        // the optimizer might destroy the sign
2024
                        return sign() ? -TargetFloat(0) : TargetFloat(0);
904,819✔
2025
                }
2026
                else if (isnan()) {
112,642,790✔
2027
                        v = sign() ? std::numeric_limits<TargetFloat>::signaling_NaN() : std::numeric_limits<TargetFloat>::quiet_NaN();
6,143,265✔
2028
                }
2029
                else if (isinf()) {
106,499,525✔
2030
                        v = sign() ? -std::numeric_limits<TargetFloat>::infinity() : std::numeric_limits<TargetFloat>::infinity();
144,181✔
2031
                }
2032
                else { // TODO: this approach has catastrophic cancellation when nbits is large and native target float is too small
2033
                        TargetFloat f{ 0 };
106,355,344✔
2034
                        TargetFloat fbit{ 0.5 };
106,355,344✔
2035
                        for (int i = static_cast<int>(nbits - 2ull - es); i >= 0; --i) {
1,314,526,640✔
2036
                                f += at(static_cast<unsigned>(i)) ? fbit : TargetFloat(0);
1,208,171,296✔
2037
                                fbit *= TargetFloat(0.5);
1,208,171,296✔
2038
                        }
2039
                        blockbinary<es, bt> ebits;
2040
                        exponent(ebits);
106,355,344✔
2041
                        if constexpr (hasSubnormals) {
2042
                                if (ebits.iszero()) {
66,581,746✔
2043
                                        // subnormals: (-1)^s * 2^(2-2^(es-1)) * (f/2^fbits))
2044
                                        TargetFloat exponentiation = TargetFloat(subnormal_exponent[es]); // precomputed values for 2^(2-2^(es-1))
14,669,346✔
2045
                                        v = exponentiation * f;  // f is already f/2^fbits
14,669,346✔
2046
                                        return sign() ? -v : v;
14,669,346✔
2047
                                }
2048
                        }
2049
                        else {
2050
                                if (ebits.iszero()) { // underflow to 0
39,773,598✔
2051
                                        // compiler fast float optimization might destroy the sign
2052
                                        return sign() ? -TargetFloat(0) : TargetFloat(0);
×
2053
                                }
2054
                        }
2055
                        if constexpr (hasMaxExpValues) {
2056
                                // regular: (-1)^s * 2^(e+1-2^(es-1)) * (1 + f/2^fbits))
2057
                                int exponent = static_cast<int>(unsigned(ebits) - EXP_BIAS);
52,360,650✔
2058
                                if (-64 < exponent && exponent < 64) {
52,360,650✔
2059
                                        // NB: the negative branch uses std::ldexp instead of
2060
                                        // 1.0f / TargetFloat(1ull << -exponent) because clang -O2
2061
                                        // miscompiles the latter for TargetFloat == long double via
2062
                                        // a buggy direct-bit-pattern optimisation (see issue #937).
2063
                                        // gcc handles either form correctly; ldexp is portable.
2064
                                        TargetFloat exponentiation = (exponent >= 0
52,037,061✔
2065
                                                ? TargetFloat(1ull << exponent)
52,037,061✔
2066
                                                : std::ldexp(TargetFloat(1), exponent));
8,748,209✔
2067
                                        v = exponentiation * (TargetFloat(1.0) + f);
52,037,061✔
2068
                                }
52,037,061✔
2069
                                else {
2070
                                        double exponentiation = ipow(exponent);
323,589✔
2071
                                        v = TargetFloat(exponentiation * (1.0 + f));
323,589✔
2072
                                }
2073
                        }
2074
                        else {
2075
                                if (ebits.all()) {
39,325,348✔
2076
                                        // max-exponent values are mapped to quiet NaNs
2077
                                        v = std::numeric_limits<TargetFloat>::quiet_NaN();
×
2078
                                        return v;
×
2079
                                }
2080
                                else {
2081
                                        // regular: (-1)^s * 2^(e+1-2^(es-1)) * (1 + f/2^fbits))
2082
                                        int exponent = static_cast<int>(unsigned(ebits) - EXP_BIAS);
39,325,348✔
2083
                                        if (-64 < exponent && exponent < 64) {
39,325,348✔
2084
                                                // See above re: clang -O2 miscompile, issue #937.
2085
                                                TargetFloat exponentiation = (exponent >= 0
39,124,212✔
2086
                                                        ? TargetFloat(1ull << exponent)
39,124,212✔
2087
                                                        : std::ldexp(TargetFloat(1), exponent));
10,145,656✔
2088
                                                v = exponentiation * (TargetFloat(1.0) + f);
39,124,212✔
2089
                                        }
39,124,212✔
2090
                                        else {
2091
                                                double exponentiation = ipow(exponent);
201,136✔
2092
                                                v = TargetFloat(exponentiation * (1.0 + f));
201,136✔
2093
                                        }
2094
                                }
2095
                        }
2096
                        v = sign() ? -v : v;
91,685,998✔
2097
                }
2098
                return v;
97,973,444✔
2099
        }
2100

2101
        // convert a cfloat to a blocktriple with the fraction format 1.ffff
2102
        // we are using the same block type so that we can use block copies to move bits around.
2103
        // Since we tend to have at least two exponent bits, this will lead to
2104
        // most cfloat<->blocktriple cases being efficient as the block types are aligned.
2105
        // The relationship between the source cfloat and target blocktriple is not
2106
        // arbitrary, enforce it by setting the blocktriple fbits to the cfloat's (nbits - es - 1)
2107
        template<typename TargetBlockType = bt>
2108
        constexpr void normalize(blocktriple<fbits, BlockTripleOperator::REP, TargetBlockType>& tgt) const {
3,170✔
2109
                // test special cases
2110
                if (isnan()) {
3,170✔
2111
                        tgt.setnan();
253✔
2112
                }
2113
                else if (isinf()) {
2,917✔
2114
                        tgt.setinf();
245✔
2115
                }
2116
                else if (iszero()) {
2,672✔
2117
                        tgt.setzero();
313✔
2118
                }
2119
                else {
2120
                        tgt.setnormal(); // a blocktriple is always normalized
2,359✔
2121
                        int scale = this->scale();
2,359✔
2122
                        tgt.setsign(sign());
2,359✔
2123
                        tgt.setscale(scale);
2,359✔
2124
                        // set significant
2125
                        // we are going to unify to the format 01.ffffeeee
2126
                        // where 'f' is a fraction bit, and 'e' is an extension bit
2127
                        // so that normalize can be used to generate blocktriples for add/sub/mul/div/sqrt
2128
                        if (isnormal() || ismaxexpvalue()) {
2,359✔
2129
                                // normal encoding or max-exponent encoding (hasMaxExpValues=true):
2130
                                // significand has a hidden bit at position fbits
2131
                                if constexpr (fbits < 64) { // max 63 bits of fraction to yield 64bit of raw significant bits
2132
                                        uint64_t raw = fraction_ull();
1,686✔
2133
                                        raw |= (1ull << fbits);
1,686✔
2134
                                        tgt.setbits(raw);
1,686✔
2135
                                }
2136
                                else {
2137
                                        blockcopy(tgt);
111✔
2138
                                        tgt.setbit(fbits);
111✔
2139
                                }
2140
                        }
2141
                        else { // it is a subnormal encoding in this target cfloat
2142
                                int shift = MIN_EXP_NORMAL - scale;
562✔
2143
                                if (shift < 0) shift = 0; // guard against negative shifts
562✔
2144
                                // Stored subnormals have no hidden bit, but blocktriple REP expects a normalized 1.ffff payload.
2145
                                // Shift the encoded fraction up until the leading 1 reaches the hidden-bit position, while leaving
2146
                                // scale at the value returned by cfloat::scale(); that preserves the original real value in canonical form.
2147
                                if constexpr (fbits < 64) {
2148
                                        uint64_t raw = fraction_ull();
545✔
2149
                                        raw <<= shift;
545✔
2150
                                        raw |= (1ull << fbits);
545✔
2151
                                        tgt.setbits(raw);
545✔
2152
                                }
2153
                                else {
2154
                                        blockcopy(tgt);
17✔
2155
                                        tgt <<= shift;
17✔
2156
                                        tgt.setbit(fbits);
17✔
2157
                                }
2158
                        }
2159
                }
2160
        }
3,170✔
2161

2162
        // normalize a cfloat to a blocktriple used in add/sub, which has the form 00h.fffff
2163
        // that is 3 + fbits, the 3 extra bits are required to be able to use 2's complement 
2164
        // and capture the largest value of an addition/subtraction.
2165
        // TODO: currently abits = 2*fhbits as the worst case input argument size to
2166
        // capture the smallest normal value in aligned form. There is a faster/smaller
2167
        // implementation where the input is constrainted to just the round, guard, and sticky bits.
2168
        constexpr void normalizeAddition(blocktriple<fbits, BlockTripleOperator::ADD, bt>& tgt) const {
41,195,807✔
2169
                using BlockTripleConfiguration = blocktriple<fbits, BlockTripleOperator::ADD, bt>;
2170
                // test special cases
2171
                if (isnan()) {
41,195,807✔
2172
                        tgt.setnan();
396,494✔
2173
                }
2174
                else if (isinf()) {
40,799,313✔
2175
                        tgt.setinf();
326✔
2176
                }
2177
                else if (iszero()) {
40,798,987✔
2178
                        tgt.setzero();
325,672✔
2179
                }
2180
                else {
2181
                        tgt.setnormal(); // a blocktriple is always normalized
40,473,315✔
2182
                        int scale = this->scale();
40,473,315✔
2183
                        tgt.setsign(sign());
40,473,315✔
2184
                        tgt.setscale(scale);
40,473,315✔
2185
                        // set significant
2186
                        // we are going to unify to the format 001.ffffeeee
2187
                        // where 'f' is a fraction bit, and 'e' is an extension bit
2188
                        // so that normalize can be used to generate blocktriples for add/sub/mul/div/sqrt
2189
                        if (isnormal()) {
40,473,315✔
2190
                                if constexpr (fbits < 64 && BlockTripleConfiguration::rbits < (64 - fbits)) {
2191
                                        uint64_t raw = fraction_ull();
27,663,148✔
2192
                                        raw |= (1ull << fbits); // add the hidden bit
27,663,148✔
2193
                                        //std::cout << "normalize      : " << *this << '\n';
2194
                                        //std::cout << "significant    : " << to_binary(raw, fbits + 2) << '\n';
2195
                                        raw <<= BlockTripleConfiguration::rbits;  // rounding bits required for correct rounding
27,663,148✔
2196
                                        //std::cout << "rounding shift : " << to_binary(raw, fbits + 2 + BlockTripleConfiguration::rbits) << '\n';
2197
                                        tgt.setbits(raw);
27,663,148✔
2198
                                }
2199
                                else {
2200
                                        blockcopy(tgt);
962✔
2201
                                        tgt.setradix();
962✔
2202
                                        tgt.setbit(fbits); // add the hidden bit
962✔
2203
                                        tgt.bitShift(BlockTripleConfiguration::rbits);  // rounding bits required for correct rounding
962✔
2204
                                }
2205
                        }
2206
                        else {
2207
                                if (isdenormal()) { // it is a subnormal encoding in this target cfloat
12,809,205✔
2208
                                        if constexpr (hasSubnormals) {
2209
                                                if constexpr (BlockTripleConfiguration::rbits < (64 - fbits)) {
2210
                                                        uint64_t raw = fraction_ull();
6,517,537✔
2211
                                                        int shift = MIN_EXP_NORMAL - scale;
6,517,537✔
2212
                                                        raw <<= shift; // shift but do NOT add a hidden bit as the MSB of the subnormal is shifted in the hidden bit position
6,517,537✔
2213
                                                        raw <<= BlockTripleConfiguration::rbits;  // rounding bits required for correct rounding
6,517,537✔
2214
                                                        tgt.setbits(raw);
6,517,537✔
2215
                                                }
2216
                                                else {
2217
                                                        blockcopy(tgt);
2218
                                                        tgt.setradix();
2219
                                                        int shift = MIN_EXP_NORMAL - scale;
2220
                                                        tgt.bitShift(shift + BlockTripleConfiguration::rbits);  // rounding bits required for correct rounding
2221
                                                }
2222
                                        }
2223
                                        else {  // this cfloat has no subnormals
2224
                                                tgt.setzero(tgt.sign()); // preserve the sign
×
2225
                                        }
2226
                                }
2227
                                else {
2228
                                        // by design, a cfloat is either normal, subnormal, or max-exponent value, so this else clause is by deduction covering a max-exponent value
2229
                                        if constexpr (hasMaxExpValues) {
2230
                                                if constexpr (fbits < 64 && BlockTripleConfiguration::rbits < (64 - fbits)) {
2231
                                                        uint64_t raw = fraction_ull();
6,291,668✔
2232
                                                        raw |= (1ull << fbits); // add the hidden bit
6,291,668✔
2233
                                                        raw <<= BlockTripleConfiguration::rbits;  // rounding bits required for correct rounding
6,291,668✔
2234
                                                        tgt.setbits(raw);
6,291,668✔
2235
                                                }
2236
                                                else {
2237
                                                        blockcopy(tgt);
×
2238
                                                        tgt.setradix();
×
2239
                                                        tgt.setbit(fbits); // add the hidden bit
×
2240
                                                        tgt.bitShift(BlockTripleConfiguration::rbits);  // rounding bits required for correct rounding
×
2241
                                                }
2242
                                        }
2243
                                        else {  // this cfloat has no max-exponent values and thus this represents a nan, signalling or quiet determined by the sign
2244
                                                tgt.setnan(tgt.sign());
×
2245
                                        }                        
2246
                                }
2247
                        }
2248
                }
2249
                // tgt.setradix(radix);
2250
        }
41,195,807✔
2251

2252
        // Normalize a cfloat to a blocktriple used in mul, which has the form 0'00001.fffff
2253
        // that is 2*fbits, plus 1 overflow bit, and the radix set at <fbits>.
2254
        // The result radix will go to 2*fbits after multiplication.
2255
        constexpr void normalizeMultiplication(blocktriple<fbits, BlockTripleOperator::MUL, bt>& tgt) const {
13,834,472✔
2256
                // test special cases
2257
                if (isnan()) {
13,834,472✔
2258
                        tgt.setnan();
450,696✔
2259
                }
2260
                else if (isinf()) {
13,383,776✔
2261
                        tgt.setinf();
652✔
2262
                }
2263
                else if (iszero()) {
13,383,124✔
2264
                        tgt.setzero();
450,960✔
2265
                }
2266
                else {
2267
                        tgt.setnormal(); // a blocktriple is always normalized
12,932,164✔
2268
                        int scale = this->scale();
12,932,164✔
2269
                        tgt.setsign(sign());
12,932,164✔
2270
                        tgt.setscale(scale);
12,932,164✔
2271

2272
                        // set significant
2273
                        // we are going to unify to the format 01.ffffeeee
2274
                        // where 'f' is a fraction bit, and 'e' is an extension bit
2275
                        // so that normalize can be used to generate blocktriples for add/sub/mul/div/sqrt
2276
                        if (isnormal() || ismaxexpvalue()) {
12,932,164✔
2277
                                if constexpr (fbits < 64) { // max 63 bits of fraction to yield 64bit of raw significant bits
2278
                                        uint64_t raw = fraction_ull();
11,718,071✔
2279
                                        raw |= (1ull << fbits);
11,718,071✔
2280
                                        tgt.setbits(raw);
11,718,071✔
2281
                                }
2282
                                else {
2283
                                        blockcopy(tgt);
832✔
2284
                                        tgt.setradix();
832✔
2285
                                        tgt.setbit(fbits); // add the hidden bit
832✔
2286
                                }
2287
                        }
2288
                        else { 
2289
                                // it is a subnormal encoding in this target cfloat
2290
                                if constexpr (hasSubnormals) {
2291
                                        if constexpr (fbits < 64) {
2292
                                                uint64_t raw = fraction_ull();
1,213,261✔
2293
                                                int shift = MIN_EXP_NORMAL - scale;
1,213,261✔
2294
                                                raw <<= shift;
1,213,261✔
2295
                                                raw |= (1ull << fbits);
1,213,261✔
2296
                                                tgt.setbits(raw);
1,213,261✔
2297
                                        }
2298
                                        else {
2299
                                                blockcopy(tgt);
×
2300
                                                int shift = MIN_EXP_NORMAL - scale;
×
2301
                                                tgt.bitShift(shift);
×
2302
                                                tgt.setbit(fbits);
×
2303
                                        }
2304
                                }
2305
                                else { // this cfloat has no subnormals
2306
                                        tgt.setzero(tgt.sign()); // preserve the sign
×
2307
                                }
2308
                        }
2309
                }
2310
                tgt.setradix(fbits); // override the radix with the input scale for accurate value printing
13,834,472✔
2311
        }
13,834,472✔
2312

2313
        // normalize a cfloat to a blocktriple used in div, which has the form 0'00000'00001.fffff
2314
        // that is 3*fbits, plus 1 overflow bit, and the radix set at <fbits>.
2315
        // the result radix will go to 2*fbits after multiplication.
2316
        // TODO: needs implementation
2317
        constexpr void normalizeDivision(blocktriple<fbits, BlockTripleOperator::DIV, bt>& tgt) const {
8,675,412✔
2318
                constexpr unsigned divshift = blocktriple<fbits, BlockTripleOperator::DIV, bt>::divshift;
8,675,412✔
2319
                // test special cases
2320
                if (isnan()) {
8,675,412✔
2321
                        tgt.setnan();
38✔
2322
                }
2323
                else if (isinf()) {
8,675,374✔
2324
                        tgt.setinf();
6✔
2325
                }
2326
                else if (iszero()) {
8,675,368✔
2327
                        tgt.setzero();
44✔
2328
                }
2329
                else {
2330
                        tgt.setnormal(); // a blocktriple is always normalized
8,675,324✔
2331
                        int scale = this->scale();
8,675,324✔
2332
                        tgt.setsign(sign());
8,675,324✔
2333
                        tgt.setscale(scale);
8,675,324✔
2334
                        // set significant
2335
                        // we are going to unify to the format 01.ffffeeee
2336
                        // where 'f' is a fraction bit, and 'e' is an extension bit
2337
                        // so that normalize can be used to generate blocktriples for add/sub/mul/div/sqrt
2338
                        if (isnormal() || ismaxexpvalue()) {
8,675,324✔
2339
                                if constexpr (fbits < 64 && divshift < (64 - fbits)) {
2340
                                        uint64_t raw = fraction_ull();
7,188,471✔
2341
                                        raw |= (1ull << fbits);
7,188,471✔
2342
                                        raw <<= divshift; // shift the input value to the output radix
7,188,471✔
2343
                                        tgt.setbits(raw);
7,188,471✔
2344
                                }
2345
                                else {
2346
                                        // brute force copy of blocks
2347
                                        blockcopy(tgt);
1,054,322✔
2348
                                        tgt.setbit(fbits);
1,054,322✔
2349
                                        tgt.bitShift(divshift); // shift the input value to the output radix
1,054,322✔
2350
                                }
2351
                        }
2352
                        else { // it is a subnormal encoding in this target cfloat
2353
                                if constexpr (fbits < 64 && divshift < (64 - fbits)) {
2354
                                        uint64_t raw = fraction_ull();
432,529✔
2355
                                        int shift = MIN_EXP_NORMAL - scale;
432,529✔
2356
                                        raw <<= shift;
432,529✔
2357
                                        raw |= (1ull << fbits);
432,529✔
2358
                                        raw <<= divshift; // shift the input value to the output radix
432,529✔
2359
                                        tgt.setbits(raw);
432,529✔
2360
                                }
2361
                                else {
2362
                                        blockcopy(tgt);
2✔
2363
                                        int shift = MIN_EXP_NORMAL - scale;
2✔
2364
                                        tgt.bitShift(shift);
2✔
2365
                                        tgt.setbit(fbits);
2✔
2366
                                        tgt.bitShift(divshift); // shift the input value to the output radix
2✔
2367
                                }
2368
                        }
2369
                }
2370
                tgt.setradix(blocktriple<fbits, BlockTripleOperator::DIV, bt>::radix);
8,675,412✔
2371
        }
8,675,412✔
2372

2373
        // helper debug function
2374
        void constexprClassParameters() const noexcept {
8✔
2375
                std::cout << "-------------------------------------------------------------\n";
8✔
2376
                std::cout << "type              : " << typeid(*this).name() << '\n';
8✔
2377
                std::cout << "nbits             : " << nbits << '\n';
8✔
2378
                std::cout << "es                : " << es << std::endl;
8✔
2379
                std::cout << "hasSubnormals     : " << (hasSubnormals ? "true" : "false") << '\n';
8✔
2380
                std::cout << "hasMaxExpValues   : " << (hasMaxExpValues ? "true" : "false") << '\n';
8✔
2381
                std::cout << "isSaturating      : " << (isSaturating ? "true" : "false") << '\n';
8✔
2382
                std::cout << "ALL_ONES          : " << to_binary(ALL_ONES, 0, true) << '\n';
8✔
2383
                std::cout << "BLOCK_MASK        : " << to_binary(BLOCK_MASK, 0, true) << '\n';
8✔
2384
                std::cout << "nrBlocks          : " << nrBlocks << '\n';
8✔
2385
                std::cout << "bits in MSU       : " << bitsInMSU << '\n';
8✔
2386
                std::cout << "MSU               : " << MSU << '\n';
8✔
2387
                std::cout << "MSU MASK          : " << to_binary(MSU_MASK, 0, true) << '\n';
8✔
2388
                std::cout << "SIGN_BIT_MASK     : " << to_binary(SIGN_BIT_MASK, 0, true) << '\n';
8✔
2389
                std::cout << "LSB_BIT_MASK      : " << to_binary(LSB_BIT_MASK, 0, true) << '\n';
8✔
2390
                std::cout << "MSU CAPTURES_EXP  : " << (MSU_CAPTURES_EXP ? "yes\n" : "no\n");
8✔
2391
                std::cout << "EXP_SHIFT         : " << EXP_SHIFT << '\n';
8✔
2392
                std::cout << "MSU EXP MASK      : " << to_binary(MSU_EXP_MASK, 0, true) << '\n';
8✔
2393
                std::cout << "ALL_ONE_MASK_ES   : " << to_binary(ALL_ONES_ES) << '\n';
8✔
2394
                std::cout << "EXP_BIAS          : " << EXP_BIAS << '\n';
8✔
2395
                std::cout << "MAX_EXP           : " << MAX_EXP << '\n';
8✔
2396
                std::cout << "MIN_EXP_NORMAL    : " << MIN_EXP_NORMAL << '\n';
8✔
2397
                std::cout << "MIN_EXP_SUBNORMAL : " << MIN_EXP_SUBNORMAL << '\n';
8✔
2398
                std::cout << "fraction Blocks   : " << fBlocks << '\n';
8✔
2399
                std::cout << "bits in FSU       : " << bitsInFSU << '\n';
8✔
2400
                std::cout << "FSU               : " << FSU << '\n';
8✔
2401
                std::cout << "FSU MASK          : " << to_binary(FSU_MASK, 0, true) << '\n';
8✔
2402
                std::cout << "topfbits          : " << topfbits << '\n';
8✔
2403
                std::cout << "ALL_ONE_MASK_FR   : " << to_binary(ALL_ONES_FR) << '\n';
8✔
2404
        }
8✔
2405
        void showLimbs() const {
2406
                for (unsigned b = 0; b < nrBlocks; ++b) {
2407
                        std::cout << to_binary(_block[nrBlocks - b - 1], sizeof(bt) * 8) << ' ';
2408
                }
2409
                std::cout << '\n';
2410
        }
2411

2412
protected:
2413
        // HELPER methods
2414

2415
        /// <summary>
2416
        /// 1's complement of the encoding used to set up specific encoding patterns.
2417
        /// This is not an arithmetic operator that makes sense for floating-point numbers.
2418
        /// </summary>
2419
        /// <returns>reference to this cfloat object</returns>
2420
        constexpr cfloat& flip() noexcept { // in-place one's complement
1,871,740✔
2421
                for (unsigned i = 0; i < nrBlocks; ++i) {
7,118,183✔
2422
                        _block[i] = bt(~_block[i]);
5,246,443✔
2423
                }
2424
                _block[MSU] &= MSU_MASK; // assert precondition of properly nulled leading non-bits
1,871,740✔
2425
                return *this;
1,871,740✔
2426
        }
2427

2428
        /// <summary>
2429
        /// shift left is a bit level encoding helper for fast limb-based conversions between different cfloats
2430
        /// </summary>
2431
        /// <param name="bitsToShift"></param>
2432
        void shiftLeft(unsigned bitsToShift) {
60,253✔
2433
                if (bitsToShift == 0) return;
60,253✔
2434
                if (bitsToShift > nbits) {
60,253✔
2435
                        setzero();
×
2436
                }
2437
                if (bitsToShift >= bitsInBlock) {
60,253✔
2438
                        int blockShift = static_cast<int>(bitsToShift / bitsInBlock);
60,253✔
2439
                        for (int i = static_cast<int>(MSU); i >= blockShift; --i) {
140,530✔
2440
                                _block[i] = _block[i - blockShift];
80,277✔
2441
                        }
2442
                        for (int i = blockShift - 1; i >= 0; --i) {
341,419✔
2443
                                _block[i] = bt(0);
281,166✔
2444
                        }
2445
                        // adjust the shift
2446
                        bitsToShift -= blockShift * bitsInBlock;
60,253✔
2447
                        if (bitsToShift == 0) return;
60,253✔
2448
                }
2449
                if constexpr (MSU > 0) {
2450
                        // construct the mask for the upper bits in the block that need to move to the higher word
2451
                        bt mask = 0xFFFFFFFFFFFFFFFF << (bitsInBlock - bitsToShift);
60,207✔
2452
                        for (unsigned i = MSU; i > 0; --i) {
360,815✔
2453
                                _block[i] <<= bitsToShift;
300,608✔
2454
                                // mix in the bits from the right
2455
                                bt bits = bt(mask & _block[i - 1]);
300,608✔
2456
                                _block[i] |= (bits >> (bitsInBlock - bitsToShift));
300,608✔
2457
                        }
2458
                }
2459
                _block[0] <<= bitsToShift;
60,207✔
2460
        }
2461

2462
        // convert an unsigned integer into a cfloat
2463
        // TODO: this method does not protect against being called with a signed integer
2464
        template<typename Ty>
2465
        constexpr cfloat& convert_unsigned_integer(const Ty& rhs) noexcept {
2,795✔
2466
                clear();
2,795✔
2467
                if (0 == rhs) return *this;
2,795✔
2468

2469
                uint64_t raw = static_cast<uint64_t>(rhs);
2,772✔
2470
                int msb = static_cast<int>(find_msb(raw)) - 1; // msb > 0 due to zero test above
2,772✔
2471
                int exponent = msb;
2,772✔
2472
                // remove the MSB as it represents the hidden bit in the cfloat representation
2473
                uint64_t hmask = ~(1ull << msb);
2,772✔
2474
                raw &= hmask;
2,772✔
2475

2476
                constexpr uint32_t sizeInBits = 8 * sizeof(Ty);
2,772✔
2477
                uint32_t shift = sizeInBits - exponent - 1;
2,772✔
2478
                raw <<= shift;
2,772✔
2479
                raw = round<sizeInBits, uint64_t>(raw, exponent);
2,772✔
2480

2481
                // check for exponent overflow after rounding
2482
                if (exponent > MAX_EXP) {
2,772✔
2483
                        if constexpr (isSaturating) {
2484
                                this->maxpos();
×
2485
                        }
2486
                        else {
2487
                                setinf(false);
×
2488
                        }
2489
                        return *this;
×
2490
                }
2491

2492
                // construct the target cfloat
2493
                if constexpr (fbits < (64 - es)) {
2494
                        uint64_t biasedExponent = static_cast<uint64_t>(static_cast<long long>(exponent) + static_cast<long long>(EXP_BIAS));
2,742✔
2495
                        uint64_t bits = 0;
2,742✔
2496
                        bits <<= es;
2,742✔
2497
                        bits |= biasedExponent;
2,742✔
2498
                        bits <<= fbits;
2,742✔
2499
                        bits |= raw;
2,742✔
2500
                        setbits(bits);
2,742✔
2501
                }
2502
                else {
2503
                        setsign(false);
30✔
2504
                        setexponent(exponent);
30✔
2505
                        for (int i = 0; i < exponent; ++i) {
291✔
2506
                                bool bit = (raw >> (sizeInBits - 2 - i)) & 1;
261✔
2507
                                setbit(static_cast<unsigned>(fbits - 1 - i), bit);
261✔
2508
                        }
2509
                }
2510
                // post-rounding cleanup: rounding at the maxpos boundary can
2511
                // produce a NaN encoding; project back to inf or maxpos
2512
                if (isnan()) {
2,772✔
2513
                        if constexpr (isSaturating) {
2514
                                this->maxpos();
2✔
2515
                        }
2516
                        else {
2517
                                setinf(false);
×
2518
                        }
2519
                }
2520
                return *this;
2,772✔
2521
        }
2522
        // convert a signed integer into a cfloat
2523
        // TODO: this method does not protect against being called with a signed integer
2524
        template<typename Ty>
2525
        constexpr cfloat& convert_signed_integer(const Ty& rhs) noexcept {
23,187✔
2526
                clear();
23,187✔
2527
                if (0 == rhs) return *this;
23,187✔
2528
                bool s = (rhs < 0);
10,091✔
2529
                using UnsignedTy = std::make_unsigned_t<Ty>;
2530
                UnsignedTy urhs = static_cast<UnsignedTy>(rhs);
10,091✔
2531
                uint64_t raw = static_cast<uint64_t>(s ? (UnsignedTy(0) - urhs) : urhs);
10,091✔
2532

2533
                int msb = static_cast<int>(find_msb(raw)) - 1; // msb > 0 due to zero test above 
10,091✔
2534
                int exponent = msb;
10,091✔
2535
                // remove the MSB as it represents the hidden bit in the cfloat representation
2536
                uint64_t hmask = ~(1ull << msb);
10,091✔
2537
                raw &= hmask;
10,091✔
2538

2539
                // shift the msb to the msb of the fraction
2540
                constexpr uint32_t sizeInBits = 8 * sizeof(Ty);
10,091✔
2541
                uint32_t shift = sizeInBits - exponent - 1;
10,091✔
2542
                raw <<= shift;
10,091✔
2543
                raw = round<sizeInBits, uint64_t>(raw, exponent);
10,091✔
2544

2545
                // check for exponent overflow after rounding
2546
                if (exponent > MAX_EXP) {
10,091✔
2547
                        if constexpr (isSaturating) {
2548
                                if (s) this->maxneg(); else this->maxpos();
×
2549
                        }
2550
                        else {
2551
                                setinf(s);
×
2552
                        }
2553
                        return *this;
×
2554
                }
2555

2556
                // construct the target cfloat
2557
                if constexpr (fbits < (64 - es)) {
2558
                        uint64_t biasedExponent = static_cast<uint64_t>(static_cast<long long>(exponent) + static_cast<long long>(EXP_BIAS));
9,720✔
2559
                        uint64_t bits = (s ? 1ull : 0ull);
9,720✔
2560
                        bits <<= es;
9,720✔
2561
                        bits |= biasedExponent;
9,720✔
2562
                        bits <<= fbits;
9,720✔
2563
                        bits |= raw;
9,720✔
2564
                        setbits(bits);
9,720✔
2565
                }
2566
                else {
2567
                        setsign(s);
371✔
2568
                        setexponent(exponent);
371✔
2569
                        for (int i = 0; i < exponent; ++i) {
3,120✔
2570
                                bool bit = (raw >> (sizeInBits - 2 - i)) & 1;
2,749✔
2571
                                setbit(static_cast<unsigned>(fbits - 1 - i), bit);
2,749✔
2572
                        }
2573
                }
2574
                // post-rounding cleanup: rounding at the maxpos boundary can
2575
                // produce a NaN encoding; project back to inf or maxpos/maxneg
2576
                if (isnan()) {
10,091✔
2577
                        if constexpr (isSaturating) {
2578
                                if (s) this->maxneg(); else this->maxpos();
4✔
2579
                        }
2580
                        else {
2581
                                setinf(s);
×
2582
                        }
2583
                }
2584
                return *this;
10,091✔
2585
        }
2586

2587
public:
2588
        template<typename Real>
2589
        CONSTEXPRESSION cfloat& convert_ieee754(Real rhs) noexcept {
138,281,050✔
2590
                if constexpr (nbits == 32 && es == 8 && sizeof(Real) == 4) {
2591
                        // we CANNOT use the native conversion to float as cfloats have max-exponent values
2592
                        // which IEEE-754 does not have and thus a native conversion would destroy
2593
                        // only if the Real type is a float can we use the direct conversion
2594

2595
                        // when our cfloat is a perfect match to single precision IEEE-754
2596
                        bool s{ false };
65,752✔
2597
                        uint64_t rawExponent{ 0 };
65,752✔
2598
                        uint64_t rawFraction{ 0 };
65,752✔
2599
                        uint64_t bits{ 0 };
65,752✔
2600
                        extractFields(rhs, s, rawExponent, rawFraction, bits);
65,752✔
2601
                        if (rawExponent == ieee754_parameter<Real>::eallset) { // nan and inf need to be remapped
65,752✔
2602
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::snanmask) ||
24✔
2603
                                        rawFraction == (ieee754_parameter<Real>::fmask & (ieee754_parameter<Real>::qnanmask | ieee754_parameter<Real>::snanmask))) {
18✔
2604
                                        // 1.11111111.00000000.......00000001 signalling nan
2605
                                        // 0.11111111.00000000000000000000001 signalling nan
2606
                                        // MSVC
2607
                                        // 1.11111111.10000000.......00000001 signalling nan
2608
                                        // 0.11111111.10000000.......00000001 signalling nan
2609
                                        setnan(NAN_TYPE_SIGNALLING);
6✔
2610
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2611
                                        return *this;
6✔
2612
                                }
2613
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::qnanmask)) {
18✔
2614
                                        // 1.11111111.10000000.......00000000 quiet nan
2615
                                        // 0.11111111.10000000.......00000000 quiet nan
2616
                                        setnan(NAN_TYPE_QUIET);
5✔
2617
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2618
                                        return *this;
5✔
2619
                                }
2620
                                if (rawFraction == 0ull) {
13✔
2621
                                        // 1.11111111.0000000.......000000000 -inf
2622
                                        // 0.11111111.0000000.......000000000 +inf
2623
                                        setinf(s);
13✔
2624
                                        return *this;
13✔
2625
                                }
2626
                        }
2627
                        uint64_t raw{ s ? 1ull : 0ull };
65,728✔
2628
                        raw <<= 31;
65,728✔
2629
                        raw |= (rawExponent << fbits);
65,728✔
2630
                        raw |= rawFraction;
65,728✔
2631
                        setbits(raw);
65,728✔
2632
                        return *this;
65,728✔
2633
                }
2634
                else if constexpr (nbits == 64 && es == 11 && sizeof(Real) == 8) {
2635
                        // when our cfloat is a perfect match to double precision IEEE-754
2636
                        bool s{ false };
12,394✔
2637
                        uint64_t rawExponent{ 0 };
12,394✔
2638
                        uint64_t rawFraction{ 0 };
12,394✔
2639
                        uint64_t bits{ 0 };
12,394✔
2640
                        extractFields(rhs, s, rawExponent, rawFraction, bits);
12,394✔
2641
                        if (rawExponent == ieee754_parameter<Real>::eallset) { // nan and inf need to be remapped
12,394✔
2642
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::snanmask) ||
18✔
2643
                                        rawFraction == (ieee754_parameter<Real>::fmask & (ieee754_parameter<Real>::qnanmask | ieee754_parameter<Real>::snanmask))) {
13✔
2644
                                        // 1.11111111.00000000.......00000001 signalling nan
2645
                                        // 0.11111111.00000000000000000000001 signalling nan
2646
                                        // MSVC
2647
                                        // 1.11111111.10000000.......00000001 signalling nan
2648
                                        // 0.11111111.10000000.......00000001 signalling nan
2649
                                        setnan(NAN_TYPE_SIGNALLING);
5✔
2650
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2651
                                        return *this;
5✔
2652
                                }
2653
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::qnanmask)) {
13✔
2654
                                        // 1.11111111.10000000.......00000000 quiet nan
2655
                                        // 0.11111111.10000000.......00000000 quiet nan
2656
                                        setnan(NAN_TYPE_QUIET);
5✔
2657
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2658
                                        return *this;
5✔
2659
                                }
2660
                                if (rawFraction == 0ull) {
8✔
2661
                                        // 1.11111111.0000000.......000000000 -inf
2662
                                        // 0.11111111.0000000.......000000000 +inf
2663
                                        setinf(s);
8✔
2664
                                        return *this;
8✔
2665
                                }
2666
                        }
2667
                        // normal and subnormal handling
2668
                        uint64_t raw{ s ? 1ull : 0ull };
12,376✔
2669
                        raw <<= 63;
12,376✔
2670
                        raw |= (rawExponent << fbits);
12,376✔
2671
                        raw |= rawFraction;
12,376✔
2672
                        setbits(raw);
12,376✔
2673
                        return *this;
12,376✔
2674
                }
2675
                else {
2676
                        clear();
138,202,904✔
2677
                        // extract raw IEEE-754 bits
2678
                        bool s{ false };
138,202,904✔
2679
                        uint64_t rawExponent{ 0 };
138,202,904✔
2680
                        uint64_t rawFraction{ 0 };
138,202,904✔
2681
                        uint64_t bits{ 0 };
138,202,904✔
2682
                        extractFields(rhs, s, rawExponent, rawFraction, bits);
138,202,904✔
2683
                        // special case handling
2684
                        if (rawExponent == ieee754_parameter<Real>::eallset) { // nan and inf
138,202,904✔
2685
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::snanmask) ||
5,105,668✔
2686
                                        rawFraction == (ieee754_parameter<Real>::fmask & (ieee754_parameter<Real>::qnanmask | ieee754_parameter<Real>::snanmask))) {
2,589,630✔
2687
                                        // 1.11111111.00000000.......00000001 signalling nan
2688
                                        // 0.11111111.00000000000000000000001 signalling nan
2689
                                        // MSVC
2690
                                        // 1.11111111.10000000.......00000001 signalling nan
2691
                                        // 0.11111111.10000000.......00000001 signalling nan
2692
                                        setnan(NAN_TYPE_SIGNALLING);
2,523,246✔
2693
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2694
                                        return *this;
2,523,246✔
2695
                                }
2696
                                if (rawFraction == (ieee754_parameter<Real>::fmask & ieee754_parameter<Real>::qnanmask)) {
2,582,422✔
2697
                                        // 1.11111111.10000000.......00000000 quiet nan
2698
                                        // 0.11111111.10000000.......00000000 quiet nan
2699
                                        setnan(NAN_TYPE_QUIET);
2,553,517✔
2700
                                        //setsign(s);  a cfloat encodes a signalling nan with sign = 1, and a quiet nan with sign = 0
2701
                                        return *this;
2,553,517✔
2702
                                }
2703
                                if (rawFraction == 0ull) {
28,905✔
2704
                                        // 1.11111111.0000000.......000000000 -inf
2705
                                        // 0.11111111.0000000.......000000000 +inf
2706
                                        setinf(s);
28,881✔
2707
                                        return *this;
28,881✔
2708
                                }
2709
                        }
2710
                        if (rhs == 0.0) { // IEEE rule: this is valid for + and - 0.0
133,097,260✔
2711
                                setbit(nbits - 1ull, s);
578,639✔
2712
                                return *this;
578,639✔
2713
                        }
2714
        
2715
                        // normal number consists of fbits fraction bits and one hidden bit
2716
                        // subnormal number has no hidden bit
2717
                        int exponent = static_cast<int>(rawExponent) - ieee754_parameter<Real>::bias;  // unbias the exponent
132,518,621✔
2718

2719
                        // normalize subnormal source values so downstream code can treat them as normal
2720
                        // A subnormal has rawExponent == 0 and value = 0.fraction * 2^(1-bias).
2721
                        // We find the MSB of the fraction, shift it into the hidden bit position,
2722
                        // mask it off (like a normal's implicit 1), and adjust the exponent.
2723
                        // Setting rawExponent = 1 causes all downstream if(rawExponent != 0) branches
2724
                        // to take the normal-number path, which already handles normal-to-normal and
2725
                        // normal-to-subnormal target conversions correctly.
2726
                        if (rawExponent == 0 && rawFraction != 0) {
132,518,621✔
2727
                                exponent = 1 - ieee754_parameter<Real>::bias; // effective exponent for subnormals
1,223✔
2728
                                unsigned msb_pos = find_msb(rawFraction); // 1-indexed position of MSB
1,223✔
2729
                                unsigned normalizeShift = static_cast<unsigned>(ieee754_parameter<Real>::fbits) + 1u - msb_pos;
1,223✔
2730
                                rawFraction = (rawFraction << normalizeShift) & static_cast<uint64_t>(ieee754_parameter<Real>::fmask);
1,223✔
2731
                                exponent -= static_cast<int>(normalizeShift);
1,223✔
2732
                                rawExponent = 1; // mark as normalized so downstream paths treat this as a normal number
1,223✔
2733
                        }
2734

2735
                        // check special case of
2736
                        //  1- saturating to maxpos/maxneg, or 
2737
                        //  2- projecting to +-inf 
2738
                        // if the value is out of range.
2739
                        // 
2740
                        // One problem here is that at the rounding cusps of maxpos <-> inf <-> nan
2741
                        // you need to go through the rounding logic to know which encoding you end up
2742
                        // with. 
2743
                        // For each specific cfloat configuration, you can work out these rounding cusps
2744
                        // but they need to go through the value transformation to map them back to native
2745
                        // IEEE-754. That is a complex computation to do as a static constexpr as you need
2746
                        // to construct the value, then evaluate it, and store it.
2747
                        // 
2748
                        // The algorithm used here is to check for the obvious out of range values by
2749
                        // comparing their scale to the max scale this cfloat encoding can represent.
2750
                        // For the rounding cusps, we go through the rounding logic, and then clean up
2751
                        // after rounding using the observation that no conversion from a value can ever
2752
                        // yield the NaN encoding.
2753
                        //
2754
                        // The rounding logic will correctly sort between maxpos and inf, and we clean
2755
                        // up any NaN encodings by projecting back to the configuration's saturation rule.
2756
                        //
2757
                        // We could improve on this by creating the database of rounding cusps and
2758
                        // referencing them with a direct value comparison with the input. That would be
2759
                        // the most performant implementation.
2760
                        if (exponent > MAX_EXP) {
132,518,621✔
2761
                                if constexpr (isSaturating) {
2762
                                        if (s) this->maxneg(); else this->maxpos(); // saturate to maxpos or maxneg
934,374✔
2763
                                }
2764
                                else {
2765
                                        setinf(s);
971,730✔
2766
                                }
2767
                                return *this;
1,906,104✔
2768
                        }
2769
                        if constexpr (hasSubnormals) {
2770
                                if (exponent < MIN_EXP_SUBNORMAL - 1) { 
88,994,002✔
2771
                                        // map to +-0 any values that have a scale less than (MIN_EXP_SUBMORNAL - 1)
2772
                                        this->setbit(nbits - 1, s);
145,464✔
2773
                                        return *this;
145,464✔
2774
                                }
2775
                        }
2776
                        else {
2777
                                if (exponent < MIN_EXP_NORMAL - 1) {
41,618,515✔
2778
                                        // map to +-0 any values that have a scale less than (MIN_EXP_MORNAL - 1)
2779
                                        this->setbit(nbits - 1, s);
273,186✔
2780
                                        return *this;
273,186✔
2781
                                }
2782
                        }
2783

2784
                        /////////////////  
2785
                        /// end of special case processing, move on to value sampling and rounding
2786

2787
#if TRACE_CONVERSION
2788
                        std::cout << '\n';
2789
                        std::cout << "value             : " << rhs << '\n';
2790
                        std::cout << "segments          : " << to_binary(rhs) << '\n';
2791
                        std::cout << "sign     bit      : " << (s ? '1' : '0') << '\n';
2792
                        std::cout << "exponent bits     : " << to_binary(rawExponent, ieee754_parameter<Real>::ebits, true) << '\n';
2793
                        std::cout << "fraction bits     : " << to_binary(rawFraction, ieee754_parameter<Real>::fbits, true) << '\n';
2794
                        std::cout << "exponent value    : " << exponent << '\n';
2795
#endif
2796

2797
                        // do the following scenarios have different rounding bits?
2798
                        // input is normal, cfloat is normal           <-- rounding can happen with native ieee-754 bits
2799
                        // input is normal, cfloat is subnormal
2800
                        // input is subnormal, cfloat is normal
2801
                        // input is subnormal, cfloat is subnormal
2802

2803
                        // The first condition is the relationship between the number 
2804
                        // of fraction bits from the source and the number of fraction bits 
2805
                        // in the target cfloat: these are constexpressions and guard the shifts
2806
                        // input fbits >= cfloat fbits                 <-- need to round
2807
                        // input fbits < cfloat fbits                  <-- no need to round
2808

2809
                        // quick check if we are truncating to 0 for all subnormal values
2810
                        if constexpr (!hasSubnormals) {
2811
                                if (exponent < MIN_EXP_NORMAL) {
41,345,329✔
2812
                                        setsign(s); // rest of the bits, exponent and fraction, are already set correctly
124,352✔
2813
                                        return *this;
124,352✔
2814
                                }
2815
                        }
2816
                        if constexpr (fbits < ieee754_parameter<Real>::fbits) {
2817
                                // this is the common case for cfloats that are smaller in size compared to single and double precision IEEE-754
2818
                                constexpr int rightShift = ieee754_parameter<Real>::fbits - fbits; // this is the bit shift to get the MSB of the src to the MSB of the tgt
129,810,056✔
2819
                                uint32_t biasedExponent{ 0 };
129,810,056✔
2820
                                int adjustment{ 0 }; // right shift adjustment for subnormal representation
129,810,056✔
2821
                                uint64_t mask;
2822
                                if (rawExponent != 0) {
129,810,056✔
2823
                                        // the source real is a normal number, 
2824
//                                        if (exponent >= (MIN_EXP_SUBNORMAL - 1) && exponent < MIN_EXP_NORMAL) {
2825
                                        if (exponent < MIN_EXP_NORMAL) {
129,810,056✔
2826
//                                                exponent = (exponent < MIN_EXP_SUBNORMAL ? MIN_EXP_SUBNORMAL : exponent); // clip to the smallest subnormal exponent, otherwise the adjustment is off
2827
                                                // the value is a subnormal number in this representation: biasedExponent = 0
2828
                                                // add the hidden bit to the fraction bits so the denormalization has the correct MSB
2829
                                                rawFraction |= ieee754_parameter<Real>::hmask;
41,279,181✔
2830

2831
                                                // fraction processing: we have 1 hidden + 23 explicit fraction bits 
2832
                                                // f = 1.ffff 2^exponent * 2^fbits * 2^-(2-2^(es-1)) = 1.ff...ff >> (23 - (-exponent + fbits - (2 -2^(es-1))))
2833
                                                // -exponent because we are right shifting and exponent in this range is negative
2834
                                                adjustment = -(exponent + subnormal_reciprocal_shift[es]);
41,279,181✔
2835
                                                // this is the right shift adjustment required for subnormal representation due 
2836
                                                // to the scale of the input number, i.e. the exponent of 2^-adjustment
2837
                                        }
2838
                                        else {
2839
                                                // the value is a normal number in this representation: common case
2840
                                                biasedExponent = static_cast<uint32_t>(exponent + EXP_BIAS); // project the exponent into the target 
88,530,875✔
2841
                                                // fraction processing
2842
                                                // float structure is: seee'eeee'efff'ffff'ffff'ffff'ffff'ffff, s = sign, e - exponent bit, f = fraction bit
2843
                                                // target structure is for example cfloat<8,2>: seef'ffff
2844
                                                // since both are normals, we can shift the incoming fraction to the target structure bits, and round
2845
                                                // MSB of source = 23 - 1, MSB of target = fbits - 1: shift = MSB of src - MSB of tgt => 23 - fbits
2846
                                                adjustment = 0;
88,530,875✔
2847
                                        }
2848
                                        if constexpr (rightShift > 0) {
2849
                                                // if true we need to round
2850
                                                // round-to-even logic
2851
                                                //  ... lsb | guard  round sticky   round
2852
                                                //       x     0       x     x       down
2853
                                                //       0     1       0     0       down  round to even
2854
                                                //       1     1       0     0        up   round to even
2855
                                                //       x     1       0     1        up
2856
                                                //       x     1       1     0        up
2857
                                                //       x     1       1     1        up
2858
                                                // collect lsb, guard, round, and sticky bits
2859

2860

2861
#if TRACE_CONVERSION
2862
                                                std::cout << "fraction bits     : " << to_binary(rawFraction, ieee754_parameter<Real>::nbits, true) << '\n';
2863
                                                std::cout << "lsb mask bits     : " << to_binary(mask, ieee754_parameter<Real>::nbits, true) << '\n';
2864
#endif
2865
                                                mask = (1ull << (rightShift + adjustment)); // bit mask for the lsb bit
129,810,056✔
2866
                                                bool lsb = (mask & rawFraction);
129,810,056✔
2867
                                                mask >>= 1;
129,810,056✔
2868
                                                bool guard = (mask & rawFraction);
129,810,056✔
2869
                                                mask >>= 1;
129,810,056✔
2870
                                                bool round = (mask & rawFraction);
129,810,056✔
2871
                                                if ((rightShift + adjustment) > 1) {
129,810,056✔
2872
                                                        mask = (0xFFFF'FFFF'FFFF'FFFFull << (rightShift + adjustment - 2));
129,810,056✔
2873
                                                        mask = ~mask;
129,810,056✔
2874
                                                }
2875
                                                else {
2876
                                                        mask = 0;
×
2877
                                                }
2878
#if TRACE_CONVERSION
2879
                                                std::cout << "right shift       : " << rightShift << '\n';
2880
                                                std::cout << "adjustment        : " << adjustment << '\n';
2881
                                                std::cout << "shift to LSB      : " << (rightShift + adjustment) << '\n';
2882
                                                std::cout << "fraction bits     : " << to_binary(rawFraction, ieee754_parameter<Real>::nbits, true) << '\n';
2883
                                                std::cout << "sticky mask bits  : " << to_binary(mask, ieee754_parameter<Real>::nbits, true) << '\n';
2884
#endif
2885
                                                bool sticky = (mask & rawFraction);
129,810,056✔
2886
                                                rawFraction >>= (static_cast<int64_t>(rightShift) + static_cast<int64_t>(adjustment));
129,810,056✔
2887

2888
                                                // execute rounding operation
2889
                                                if (guard) {
129,810,056✔
2890
                                                        if (lsb && (!round && !sticky)) ++rawFraction; // round to even
26,461,052✔
2891
                                                        if (round || sticky) ++rawFraction;
26,461,052✔
2892
                                                        if (rawFraction == (1ull << fbits)) { // overflow
26,461,052✔
2893
                                                                if (biasedExponent == ALL_ONES_ES) { // overflow to INF == .111..01
451,393✔
2894
                                                                        rawFraction = INF_ENCODING;
594✔
2895
                                                                }
2896
                                                                else {
2897
                                                                        ++biasedExponent;
450,799✔
2898
                                                                        rawFraction = 0;
450,799✔
2899
                                                                }
2900
                                                        }
2901
                                                }
2902
#if TRACE_CONVERSION
2903
                                                std::cout << "lsb               : " << (lsb ? "1\n" : "0\n");
2904
                                                std::cout << "guard             : " << (guard ? "1\n" : "0\n");
2905
                                                std::cout << "round             : " << (round ? "1\n" : "0\n");
2906
                                                std::cout << "sticky            : " << (sticky ? "1\n" : "0\n");
2907
                                                std::cout << "rounding decision : " << (lsb && (!round && !sticky) ? "round to even\n" : "-\n");
2908
                                                std::cout << "rounding direction: " << (round || sticky ? "round up\n" : "round down\n");
2909
#endif
2910
                                        }
2911
                                        else { // all bits of the float go into this representation and need to be shifted up, no rounding necessary
2912
                                                int shiftLeft = fbits - ieee754_parameter<Real>::fbits;
2913
                                                rawFraction <<= shiftLeft;
2914
                                        }
2915
#if TRACE_CONVERSION
2916
                                        std::cout << "biased exponent   : " << biasedExponent << " : 0x" << std::hex << biasedExponent << std::dec << '\n';
2917
                                        std::cout << "right shift       : " << rightShift << '\n';
2918
                                        std::cout << "adjustment shift  : " << adjustment << '\n';
2919
                                        std::cout << "sticky bit mask   : " << to_binary(mask, 32, true) << '\n';
2920
                                        std::cout << "fraction bits     : " << to_binary(rawFraction, 32, true) << '\n';
2921
#endif
2922
                                        // construct the target cfloat
2923
                                        bits = (s ? 1ull : 0ull);
129,810,056✔
2924
                                        bits <<= es;
129,810,056✔
2925
                                        bits |= biasedExponent;
129,810,056✔
2926
                                        bits <<= fbits;
129,810,056✔
2927
                                        bits |= rawFraction;
129,810,056✔
2928
#if TRACE_CONVERSION
2929
                                        std::cout << "sign bit          : " << (s ? '1' : '0') << '\n';
2930
                                        std::cout << "biased exponent   : " << biasedExponent << " : 0x" << std::hex << biasedExponent << std::dec << '\n';
2931
                                        std::cout << "fraction bits     : " << to_binary(rawFraction, 32, true) << '\n';
2932
                                        std::cout << "cfloat bits       : " << to_binary(bits, nbits, true) << '\n';
2933
#endif
2934
                                        setbits(bits);
129,810,056✔
2935
                                }
2936
                                else {
2937
                                        // the source real is a subnormal number                                
2938
                                        mask = 0x00FF'FFFFu >> (fbits + exponent + subnormal_reciprocal_shift[es] + 1); // mask for sticky bit 
×
2939

2940
                                        // fraction processing: we have fbits+1 bits = 1 hidden + fbits explicit fraction bits 
2941
                                        // f = 1.ffff  2^exponent * 2^fbits * 2^-(2-2^(es-1)) = 1.ff...ff >> (23 - (-exponent + fbits - (2 -2^(es-1))))
2942
                                        // -exponent because we are right shifting and exponent in this range is negative
2943
                                        adjustment = -(exponent + subnormal_reciprocal_shift[es]); // this is the right shift adjustment due to the scale of the input number, i.e. the exponent of 2^-adjustment
×
2944
#if TRACE_CONVERSION                                        
2945
                                        std::cout << "source is subnormal: TBD\n";
2946
                                        std::cout << "shift to LSB    " << (rightShift + adjustment) << '\n';
2947
                                        std::cout << "adjustment      " << adjustment << '\n';
2948
                                        std::cout << "exponent        " << exponent << '\n';
2949
                                        std::cout << "subnormal shift " << subnormal_reciprocal_shift[es] << '\n';
2950
#endif
2951
                                        if (exponent >= (MIN_EXP_SUBNORMAL - 1) && exponent < MIN_EXP_NORMAL) {
2952
                                                // the value is a subnormal number in this representation
2953
                                        }
2954
                                        else {
2955
                                                // the value is a normal number in this representation
2956
                                        }
2957
                                }
2958
                        }
2959
                        else {
2960
                                // no need to round, but we need to shift left to deliver the bits
2961
                                // cfloat<40,  8> = float
2962
                                // cfloat<48,  9> = float
2963
                                // cfloat<56, 10> = float
2964
                                // cfloat<64, 11> = float
2965
                                // cfloat<64, 10> = double 
2966
                                // can we go from an input subnormal to a cfloat normal? 
2967
                                // yes, for example a cfloat<64,11> assigned to a subnormal float
2968

2969
                                // map exponent into target cfloat encoding
2970
                                constexpr int upshift = fbits - ieee754_parameter<Real>::fbits;
259,459✔
2971
                                uint64_t biasedExponent{ 0 };
259,459✔
2972
                                bool subnormalInTarget = (exponent < MIN_EXP_NORMAL);
259,459✔
2973
                                int denormShift = 0;
259,459✔
2974
                                if (subnormalInTarget) {
259,459✔
2975
                                        biasedExponent = 0;
656✔
2976
                                        denormShift = MIN_EXP_NORMAL - exponent;
656✔
2977
                                }
2978
                                else {
2979
                                        biasedExponent = static_cast<uint64_t>(static_cast<int64_t>(exponent) + EXP_BIAS);
258,803✔
2980
                                }
2981
                                // output processing
2982
                                if constexpr (nbits < 65) {
2983
                                        // we can compose the bits in a native 64-bit unsigned integer
2984
                                        // common case: normal to normal
2985
                                        // reference example: nbits = 40, es = 8, fbits = 31: rhs = float fbits = 23; shift left by (31 - 23) = 8
2986

2987
                                        if (rawExponent != 0) {
199,206✔
2988
                                                // rhs is a normal encoding (or a normalized former-subnormal)
2989
                                                uint64_t raw{ s ? 1ull : 0ull };
199,206✔
2990
                                                raw <<= es;
199,206✔
2991
                                                raw |= biasedExponent;
199,206✔
2992
                                                raw <<= fbits;
199,206✔
2993
                                                if (subnormalInTarget) {
199,206✔
2994
                                                        // add the hidden bit and denormalize the fraction
2995
                                                        uint64_t significand = rawFraction | ieee754_parameter<Real>::hmask;
636✔
2996
                                                        int netShift = upshift - denormShift;
636✔
2997
                                                        if (netShift >= 0) {
636✔
2998
                                                                rawFraction = significand << netShift;
636✔
2999
                                                        }
3000
                                                        else {
3001
                                                                // right shift with round-to-nearest-even
UNCOV
3002
                                                                int rshift = -netShift;
×
UNCOV
3003
                                                                uint64_t lsbMask = (1ull << rshift);
×
UNCOV
3004
                                                                bool lsb    = (significand & lsbMask) != 0;
×
UNCOV
3005
                                                                bool guard  = (rshift >= 1) && ((significand & (lsbMask >> 1)) != 0);
×
UNCOV
3006
                                                                bool round  = (rshift >= 2) && ((significand & (lsbMask >> 2)) != 0);
×
UNCOV
3007
                                                                bool sticky = (rshift >= 3) && ((significand & ((lsbMask >> 2) - 1)) != 0);
×
UNCOV
3008
                                                                rawFraction = significand >> rshift;
×
UNCOV
3009
                                                                if (guard) {
×
3010
                                                                        if (lsb && (!round && !sticky)) ++rawFraction; // round to even
×
3011
                                                                        if (round || sticky) ++rawFraction;
×
3012
                                                                }
3013
                                                        }
3014
                                                }
3015
                                                else {
3016
                                                        rawFraction <<= upshift;
198,570✔
3017
                                                }
3018
                                                raw |= rawFraction;
199,206✔
3019
                                                setbits(raw);
199,206✔
3020
                                        }
3021
                                        else {
3022
                                                // rhs is a subnormal (unreachable after normalization above, kept for safety)
3023
                                        }
3024
                                }
3025
                                else {
3026
                                        // we need to write and shift bits into place
3027
                                        // use cases are cfloats like cfloat<80, 11, bt>
3028
                                        // even though the bits that come in are single or double precision
3029
                                        // we need to write the fields and then shifting them in place
3030
                                        // 
3031
                                        // common case: normal to normal
3032
                                        if constexpr (bitsInBlock < 64) {
3033
                                                if (rawExponent != 0) {
60,253✔
3034
                                                        // reference example: nbits = 128, es = 15, fbits = 112: rhs = float: shift left by (112 - 23) = 89
3035
                                                        setbits(biasedExponent);
60,253✔
3036
                                                        shiftLeft(fbits);
60,253✔
3037
                                                        // if subnormal in target, add hidden bit before copying
3038
                                                        uint64_t fractionToCopy = subnormalInTarget
60,253✔
3039
                                                                ? (rawFraction | ieee754_parameter<Real>::hmask)
60,253✔
3040
                                                                : rawFraction;
3041
                                                        // shift fraction bits
3042
                                                        int bitsToShift = subnormalInTarget ? (upshift - denormShift) : upshift;
60,253✔
3043
                                                        // for subnormal targets near minpos, bitsToShift can be negative (right shift)
3044
                                                        // apply rounding on the 64-bit significand before splitting into blocks
3045
                                                        if (bitsToShift < 0) {
60,253✔
3046
                                                                int rshift = -bitsToShift;
×
3047
                                                                uint64_t lsbMask = (1ull << rshift);
×
3048
                                                                bool lsb    = (fractionToCopy & lsbMask) != 0;
×
3049
                                                                bool guard  = (rshift >= 1) && ((fractionToCopy & (lsbMask >> 1)) != 0);
×
3050
                                                                bool round  = (rshift >= 2) && ((fractionToCopy & (lsbMask >> 2)) != 0);
×
3051
                                                                bool sticky = (rshift >= 3) && ((fractionToCopy & ((lsbMask >> 2) - 1)) != 0);
×
3052
                                                                fractionToCopy >>= rshift;
×
3053
                                                                if (guard) {
×
3054
                                                                        if (lsb && (!round && !sticky)) ++fractionToCopy; // round to even
×
3055
                                                                        if (round || sticky) ++fractionToCopy;
×
3056
                                                                }
3057
                                                                bitsToShift = 0;
×
3058
                                                        }
3059
                                                        bt fractionBlock[nrBlocks]{ 0 };
60,253✔
3060
                                                        // copy fraction bits
3061
                                                        unsigned blocksRequired = (8 * sizeof(fractionToCopy) + 1) / bitsInBlock;
60,253✔
3062
                                                        unsigned maxBlockNr = (blocksRequired < nrBlocks ? blocksRequired : nrBlocks);
60,253✔
3063
                                                        uint64_t mask = static_cast<uint64_t>(ALL_ONES); // set up the block mask
60,253✔
3064
                                                        unsigned shift = 0;
60,253✔
3065
                                                        for (unsigned i = 0; i < maxBlockNr; ++i) {
340,793✔
3066
                                                                fractionBlock[i] = bt((mask & fractionToCopy) >> shift);
280,540✔
3067
                                                                mask <<= bitsInBlock;
280,540✔
3068
                                                                shift += bitsInBlock;
280,540✔
3069
                                                        }
3070
                                                        if (bitsToShift >= static_cast<int>(bitsInBlock)) {
60,253✔
3071
                                                                int blockShift = static_cast<int>(bitsToShift / bitsInBlock);
50,222✔
3072
                                                                for (int i = MSU; i >= blockShift; --i) {
271,015✔
3073
                                                                        fractionBlock[i] = fractionBlock[i - blockShift];
220,793✔
3074
                                                                }
3075
                                                                for (int i = blockShift - 1; i >= 0; --i) {
160,769✔
3076
                                                                        fractionBlock[i] = bt(0);
110,547✔
3077
                                                                }
3078
                                                                // adjust the shift
3079
                                                                bitsToShift -= blockShift * bitsInBlock;
50,222✔
3080
                                                        }
3081
                                                        if (bitsToShift > 0) {
60,253✔
3082
                                                                // construct the mask for the upper bits in the block that need to move to the higher word
3083
                                                                bt bitsToMoveMask = bt(ALL_ONES << (bitsInBlock - bitsToShift));
40,236✔
3084
                                                                for (unsigned i = MSU; i > 0; --i) {
211,243✔
3085
                                                                        fractionBlock[i] <<= bitsToShift;
171,007✔
3086
                                                                        // mix in the bits from the right
3087
                                                                        bt fracbits = static_cast<bt>(bitsToMoveMask & fractionBlock[i - 1]); // operator & yields an int
171,007✔
3088
                                                                        fractionBlock[i] |= (fracbits >> (bitsInBlock - bitsToShift));
171,007✔
3089
                                                                }
3090
                                                                fractionBlock[0] <<= bitsToShift;
40,236✔
3091
                                                        }
3092
                                                        // OR the bits in
3093
                                                        for (unsigned i = 0; i <= MSU; ++i) {
421,696✔
3094
                                                                _block[i] |= fractionBlock[i];
361,443✔
3095
                                                        }
3096
                                                        // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
3097
                                                        _block[MSU] &= MSU_MASK;
60,253✔
3098
                                                        // finally, set the sign bit
3099
                                                        setsign(s);
60,253✔
3100
                                                }
3101
                                                else {
3102
                                                        // rhs is a subnormal
3103
                //                                        std::cerr << "rhs is a subnormal : " << to_binary(rhs) << " : " << rhs << '\n';
3104
                                                }
3105
                                        }
3106
                                        else {
3107
                                                // BlockType is incorrect
3108
                                        }
3109
                                }
3110
                        }
3111
                        // post-processing results to implement saturation and projection after rounding logic
3112
                        if constexpr (isSaturating) {
3113
                                if (isinf(INF_TYPE_POSITIVE) || isnan(NAN_TYPE_QUIET)) {
20,168,756✔
3114
                                        maxpos();
890✔
3115
                                }
3116
                                else if (isinf(INF_TYPE_NEGATIVE) || isnan(NAN_TYPE_SIGNALLING)) {
20,167,866✔
3117
                                        maxneg();
890✔
3118
                                }
3119
                        }
3120
                        else {
3121
                                if (isnan(NAN_TYPE_QUIET)) {
109,900,759✔
3122
                                        setinf(false);
301,659✔
3123
                                }
3124
                                else if (isnan(NAN_TYPE_SIGNALLING)) {
109,599,100✔
3125
                                        setinf(true);
300,610✔
3126
                                }
3127
                        }
3128
                        return *this;  // TODO: unreachable in some configurations        
130,069,515✔
3129
                }
3130
        }
3131

3132
        // post-processing results to implement saturation and projection after rounding logic
3133
        // arithmetic bit operations can't produce NaN encodings, so we need to re-interpret
3134
        // these encodings and 'project' them to the proper values.
3135
        void constexpr post_process() noexcept {
3136
                if constexpr (isSaturating) {
3137
                        if (isinf(INF_TYPE_POSITIVE) || isnan(NAN_TYPE_QUIET)) {
3138
                                maxpos();
3139
                        }
3140
                        else if (isinf(INF_TYPE_NEGATIVE) || isnan(NAN_TYPE_SIGNALLING)) {
3141
                                maxneg();
3142
                        }
3143
                }
3144
                else {
3145
                        if (isnan(NAN_TYPE_QUIET)) {
3146
                                setinf(false);
3147
                        }
3148
                        else if (isnan(NAN_TYPE_SIGNALLING)) {
3149
                                setinf(true);
3150
                        }
3151
                }
3152
        }
3153

3154
protected:
3155

3156
        /// <summary>
3157
        /// round a set of source bits to the present representation.
3158
        /// srcbits is the number of bits of significant in the source representation
3159
        /// </summary>
3160
        /// <typeparam name="StorageType"></typeparam>
3161
        /// <param name="raw"></param>
3162
        /// <returns></returns>
3163
        template<unsigned srcbits, typename StorageType>
3164
        constexpr uint64_t round(StorageType raw, int& exponent) noexcept {
12,863✔
3165
                if constexpr (fhbits < srcbits) {
3166
                        // round to even: lsb guard round sticky
3167
                    // collect guard, round, and sticky bits
3168
                    // this same logic will work for the case where
3169
                    // we only have a guard bit and no round and sticky bits
3170
                    // because the mask logic will make round and sticky both 0
3171
                        constexpr uint32_t shift = srcbits - fhbits - 1ull;
9,880✔
3172
                        StorageType mask = (StorageType(1ull) << shift);
9,880✔
3173
                        bool guard = (mask & raw);
9,880✔
3174
                        mask >>= 1;
9,880✔
3175
                        bool round = (mask & raw);
9,880✔
3176
                        if constexpr (shift > 1u) { // protect against a negative shift
3177
                                StorageType allones(StorageType(~0));
9,880✔
3178
                                mask = StorageType(allones << (shift - 1));
9,880✔
3179
                                mask = ~mask;
9,880✔
3180
                        }
3181
                        else {
3182
                                mask = 0;
3183
                        }
3184
                        bool sticky = (mask & raw);
9,880✔
3185

3186
                        raw >>= (shift + 1);  // shift out the bits we are rounding away
9,880✔
3187
                        bool lsb = (raw & 0x1u);
9,880✔
3188
                        //  ... lsb | guard  round sticky   round
3189
                        //       x     0       x     x       down
3190
                        //       0     1       0     0       down  round to even
3191
                        //       1     1       0     0        up   round to even
3192
                        //       x     1       0     1        up
3193
                        //       x     1       1     0        up
3194
                        //       x     1       1     1        up
3195
                        if (guard) {
9,880✔
3196
                                if (lsb && (!round && !sticky)) ++raw; // round to even
3,117✔
3197
                                if (round || sticky) ++raw;
3,117✔
3198
                                if (raw == (1ull << fbits)) { // overflow into next power of two
3,117✔
3199
                                        ++exponent;
135✔
3200
                                        raw = 0; // fraction is zero after carry into hidden bit
135✔
3201
                                }
3202
                        }
3203
                }
3204
                else {
3205
                        // Target has more precision than source - need to left-shift to align
3206
                        // For large types where fhbits > 64, the fraction bits cannot fit in
3207
                        // a 64-bit raw after shifting. In this case, skip the shift and let
3208
                        // convert_signed/unsigned_integer place bits using setbit().
3209
                        // The caller positions fraction bits at the top of srcbits, and we
3210
                        // need to ensure they don't overflow 64 bits after our shift.
3211
                        constexpr unsigned shift = fhbits - srcbits;
2,983✔
3212
                        if constexpr (fhbits <= 64 && shift < 64) {
3213
                                raw <<= shift;
2,582✔
3214
                        }
3215
                        // else: raw stays as-is; caller will extract bits and place them
3216
                }
3217
                uint64_t significant = raw;
12,863✔
3218
                return significant;
12,863✔
3219
        }
3220

3221
        template<typename ArgumentBlockType>
3222
        constexpr void copyBits(ArgumentBlockType v) {
3223
                unsigned blocksRequired = (8 * sizeof(v) + 1 ) / bitsInBlock;
3224
                unsigned maxBlockNr = (blocksRequired < nrBlocks ? blocksRequired : nrBlocks);
3225
                bt b{ 0ul }; b = bt(~b);
3226
                ArgumentBlockType mask = ArgumentBlockType(b);
3227
                unsigned shift = 0;
3228
                for (unsigned i = 0; i < maxBlockNr; ++i) {
3229
                        _block[i] = bt((mask & v) >> shift);
3230
                        mask <<= bitsInBlock;
3231
                        shift += bitsInBlock;
3232
                }
3233
        }
3234
        void shiftLeft(int leftShift) {
3235
                if (leftShift == 0) return;
3236
                if (leftShift < 0) return shiftRight(-leftShift);
3237
                if (leftShift > long(nbits)) leftShift = nbits; // clip to max
3238
                if (leftShift >= long(bitsInBlock)) {
3239
                        int blockShift = leftShift / static_cast<int>(bitsInBlock);
3240
                        for (signed i = signed(MSU); i >= blockShift; --i) {
3241
                                _block[i] = _block[i - blockShift];
3242
                        }
3243
                        for (signed i = blockShift - 1; i >= 0; --i) {
3244
                                _block[i] = bt(0);
3245
                        }
3246
                        // adjust the shift
3247
                        leftShift -= (long)(blockShift * bitsInBlock);
3248
                        if (leftShift == 0) return;
3249
                }
3250
                // construct the mask for the upper bits in the block that need to move to the higher word
3251
//                bt mask = static_cast<bt>(0xFFFFFFFFFFFFFFFFull << (bitsInBlock - leftShift));
3252
                bt mask = ALL_ONES;
3253
                mask <<= (bitsInBlock - leftShift);
3254
                for (unsigned i = MSU; i > 0; --i) {
3255
                        _block[i] <<= leftShift;
3256
                        // mix in the bits from the right
3257
                        bt bits = static_cast<bt>(mask & _block[i - 1]);
3258
                        _block[i] |= (bits >> (bitsInBlock - leftShift));
3259
                }
3260
                _block[0] <<= leftShift;
3261
        }
3262
        void shiftRight(int rightShift) {
3263
                if (rightShift == 0) return;
3264
                if (rightShift < 0) return shiftLeft(-rightShift);
3265
                if (rightShift >= long(nbits)) {
3266
                        setzero();
3267
                        return;
3268
                }
3269
                bool signext = sign();
3270
                unsigned blockShift = 0;
3271
                if (rightShift >= long(bitsInBlock)) {
3272
                        blockShift = rightShift / bitsInBlock;
3273
                        if (MSU >= blockShift) {
3274
                                // shift by blocks
3275
                                for (unsigned i = 0; i <= MSU - blockShift; ++i) {
3276
                                        _block[i] = _block[i + blockShift];
3277
                                }
3278
                        }
3279
                        // adjust the shift
3280
                        rightShift -= (long)(blockShift * bitsInBlock);
3281
                        if (rightShift == 0) {
3282
                                // fix up the leading zeros if we have a negative number
3283
                                if (signext) {
3284
                                        // rightShift is guaranteed to be less than nbits
3285
                                        rightShift += (long)(blockShift * bitsInBlock);
3286
                                        for (unsigned i = nbits - rightShift; i < nbits; ++i) {
3287
                                                this->setbit(i);
3288
                                        }
3289
                                }
3290
                                else {
3291
                                        // clean up the blocks we have shifted clean
3292
                                        rightShift += (long)(blockShift * bitsInBlock);
3293
                                        for (unsigned i = nbits - rightShift; i < nbits; ++i) {
3294
                                                this->setbit(i, false);
3295
                                        }
3296
                                }
3297
                                return;  // shift was aligned to block boundary, no per-bit shift needed
3298
                        }
3299
                }
3300

3301
                bt mask = ALL_ONES;
3302
                mask >>= (bitsInBlock - rightShift); // this is a mask for the lower bits in the block that need to move to the lower word
3303
                for (unsigned i = 0; i < MSU; ++i) {  // TODO: can this be improved? we should not have to work on the upper blocks in case we block shifted
3304
                        _block[i] >>= rightShift;
3305
                        // mix in the bits from the left
3306
                        bt bits = static_cast<bt>(mask & _block[i + 1]); // & operator returns an int
3307
                        _block[i] |= (bits << (bitsInBlock - rightShift));
3308
                }
3309
                _block[MSU] >>= rightShift;
3310

3311
                // fix up the leading zeros if we have a negative number
3312
                if (signext) {
3313
                        // bitsToShift is guaranteed to be less than nbits
3314
                        rightShift += (long)(blockShift * bitsInBlock);
3315
                        for (unsigned i = nbits - rightShift; i < nbits; ++i) {
3316
                                this->setbit(i);
3317
                        }
3318
                }
3319
                else {
3320
                        // clean up the blocks we have shifted clean
3321
                        rightShift += (long)(blockShift * bitsInBlock);
3322
                        for (unsigned i = nbits - rightShift; i < nbits; ++i) {
3323
                                this->setbit(i, false);
3324
                        }
3325
                }
3326

3327
                // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
3328
                _block[MSU] &= MSU_MASK;
3329
        }
3330

3331
        // calculate the integer power 2 ^ b using exponentiation by squaring
3332
        constexpr double ipow(int exponent) const {
524,725✔
3333
                bool negative = (exponent < 0);
524,725✔
3334
                exponent = negative ? -exponent : exponent;
524,725✔
3335
                double result(1.0);
524,725✔
3336
                double base = 2.0;
524,725✔
3337
                for (;;) {
3338
                        if (exponent % 2) result *= base;
3,844,590✔
3339
                        exponent >>= 1;
3,844,590✔
3340
                        if (exponent == 0) break;
3,844,590✔
3341
                        base *= base;
3,319,865✔
3342
                }
3343
                return (negative ? (1.0 / result) : result);
524,725✔
3344
        }
3345

3346
        template<BlockTripleOperator btop, typename TargetBlockType = bt>
3347
        constexpr void blockcopy(blocktriple<fbits, btop, TargetBlockType>& tgt) const {
1,056,246✔
3348
                // brute force copy of blocks
3349
                if constexpr (1 == fBlocks) {
3350
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0] & FSU_MASK));
1,050,734✔
3351
                }
3352
                else if constexpr (2 == fBlocks) {
3353
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
3,028✔
3354
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1] & FSU_MASK));
3,028✔
3355
                }
3356
                else if constexpr (3 == fBlocks) {
3357
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
204✔
3358
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
204✔
3359
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2] & FSU_MASK));
204✔
3360
                }
3361
                else if constexpr (4 == fBlocks) {
3362
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
1,500✔
3363
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
1,500✔
3364
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2]));
1,500✔
3365
                        tgt.setblock(3, static_cast<TargetBlockType>(_block[3] & FSU_MASK));
1,500✔
3366
                }
3367
                else if constexpr (5 == fBlocks) {
3368
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
×
3369
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
×
3370
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2]));
×
3371
                        tgt.setblock(3, static_cast<TargetBlockType>(_block[3]));
×
3372
                        tgt.setblock(4, static_cast<TargetBlockType>(_block[4] & FSU_MASK));
×
3373
                }
3374
                else if constexpr (6 == fBlocks) {
3375
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
3376
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
3377
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2]));
3378
                        tgt.setblock(3, static_cast<TargetBlockType>(_block[3]));
3379
                        tgt.setblock(4, static_cast<TargetBlockType>(_block[4]));
3380
                        tgt.setblock(5, static_cast<TargetBlockType>(_block[5] & FSU_MASK));
3381
                }
3382
                else if constexpr (7 == fBlocks) {
3383
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
8✔
3384
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
8✔
3385
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2]));
8✔
3386
                        tgt.setblock(3, static_cast<TargetBlockType>(_block[3]));
8✔
3387
                        tgt.setblock(4, static_cast<TargetBlockType>(_block[4]));
8✔
3388
                        tgt.setblock(5, static_cast<TargetBlockType>(_block[5]));
8✔
3389
                        tgt.setblock(6, static_cast<TargetBlockType>(_block[6] & FSU_MASK));
8✔
3390
                }
3391
                else if constexpr (8 == fBlocks) {
3392
                        tgt.setblock(0, static_cast<TargetBlockType>(_block[0]));
370✔
3393
                        tgt.setblock(1, static_cast<TargetBlockType>(_block[1]));
370✔
3394
                        tgt.setblock(2, static_cast<TargetBlockType>(_block[2]));
370✔
3395
                        tgt.setblock(3, static_cast<TargetBlockType>(_block[3]));
370✔
3396
                        tgt.setblock(4, static_cast<TargetBlockType>(_block[4]));
370✔
3397
                        tgt.setblock(5, static_cast<TargetBlockType>(_block[5]));
370✔
3398
                        tgt.setblock(6, static_cast<TargetBlockType>(_block[6]));
370✔
3399
                        tgt.setblock(7, static_cast<TargetBlockType>(_block[7] & FSU_MASK));
370✔
3400
                }
3401
                else {
3402
                        for (unsigned i = 0; i < FSU; ++i) {
4,127✔
3403
                                tgt.setblock(i, static_cast<TargetBlockType>(_block[i]));
3,725✔
3404
                        }
3405
                        tgt.setblock(FSU, static_cast<TargetBlockType>(_block[FSU] & FSU_MASK));
402✔
3406
                }
3407
        }
1,056,246✔
3408

3409
private:
3410
        bt _block[nrBlocks];
3411

3412
        //////////////////////////////////////////////////////////////////////////////
3413
        // friend functions
3414

3415
        // template parameters need names different from class template parameters (for gcc and clang)
3416
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3417
        friend std::ostream& operator<< (std::ostream& ostr, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& r);
3418
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3419
        friend std::istream& operator>> (std::istream& istr, cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& r);
3420

3421
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3422
        friend constexpr bool operator==(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3423
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3424
        friend constexpr bool operator!=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3425
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3426
        friend constexpr bool operator< (const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3427
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3428
        friend constexpr bool operator> (const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3429
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3430
        friend constexpr bool operator<=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3431
        template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3432
        friend constexpr bool operator>=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs);
3433
};
3434

3435
///////////////////////////// IOSTREAM operators ///////////////////////////////////////////////
3436

3437
// convert cfloat to decimal fixpnt string, i.e. "-1234.5678"
3438
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3439
std::string to_decimal_fixpnt_string(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& value, long long precision) {
3440
        constexpr unsigned fbits = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::fbits;
3441
        constexpr unsigned bias = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::EXP_BIAS;
3442
        std::stringstream str;
3443
        if (value.iszero()) {
3444
                str << '0';
3445
                return str.str();
3446
        }
3447
        if (value.sign()) str << '-';
3448

3449
        // construct the discretization levels of the fraction part
3450
        support::decimal range, discretizationLevels, step;
3451
        // create the decimal range we are discretizing
3452
        range.setdigit(1);
3453
        range.shiftLeft(fbits); // the decimal range of the fraction
3454
        discretizationLevels.powerOf2(fbits); // calculate the discretization levels of this range
3455
        step = div(range, discretizationLevels);
3456
        // now construct the value of this range by adding the fraction samples
3457
        support::decimal partial, multiplier;
3458
        partial.setzero();  // if you just want the fraction
3459
        multiplier.setdigit(1);
3460
        // convert the fraction part
3461
        for (unsigned i = 0; i < fbits; ++i) {
3462
                if (value.at(i)) {
3463
                        support::add(partial, multiplier);
3464
                }
3465
                support::add(multiplier, multiplier);
3466
        }
3467
        if (value.isdenormal()) {
3468
                support::mul(partial, step);
3469
                support::decimal scale;
3470
                scale.powerOf2(bias - 1ull);
3471
                partial = support::div(partial, scale);
3472
        } 
3473
        else {
3474
                support::add(partial, multiplier); // add the hidden bit
3475
                support::mul(partial, step);
3476
                support::decimal scale;
3477
                int exponent = value.scale();
3478
                if (exponent < 0) {
3479
                        scale.powerOf2(static_cast<unsigned>(-exponent));
3480
                        partial = support::div(partial, scale);
3481
                }
3482
                else {
3483
                        scale.powerOf2(static_cast<unsigned>(exponent));
3484
                        support::mul(partial, scale);
3485
                }
3486
        }
3487

3488
        // the radix is at fbits
3489
        // The partial represents the parts in the range, so we can deduce
3490
        // the number of leading zeros by comparing to the length of range
3491
        int nrLeadingZeros = static_cast<int>(range.size()) - static_cast<int>(partial.size()) - 1;
3492
        if (nrLeadingZeros >= 0) str << "0.";
3493
        for (int i = 0; i < nrLeadingZeros; ++i) str << '0';
3494
        int digitsWritten = (nrLeadingZeros > 0) ? nrLeadingZeros : 0;
3495
        int position = static_cast<int>(partial.size()) - 1;
3496
        for (support::decimal::const_reverse_iterator rit = partial.rbegin(); rit != partial.rend(); ++rit) {
3497
                str << (int)*rit;
3498
                ++digitsWritten;
3499
                if (position == fbits) str << '.';
3500
                --position;
3501
        }
3502
        if (digitsWritten < precision) { // deal with trailing 0s
3503
                for (unsigned i = static_cast<unsigned>(digitsWritten); i < fbits; ++i) {
3504
                        str << '0';
3505
                }
3506
        }
3507

3508
        return str.str();
3509
}
3510

3511
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3512
std::string to_string(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& value, long long precision) {
3513
        constexpr unsigned fbits = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::fbits;
3514
        std::stringstream str;
3515
        if (value.iszero()) {
3516
                str << '0';
3517
                return str.str();
3518
        }
3519
        if (value.sign()) str << '-';
3520

3521
        // denormalize the number to gain access to the most sigificant digits
3522
        // 1.ffff^e
3523
        // scale is e
3524
        // lsbScale is e - fbits
3525
        // shift to get lsb to position 2^0 = (e - fbits)
3526
        std::int64_t scale = value.scale();
3527
//        std::int64_t shift = scale + fbits; // we want the lsb at 2^0
3528
        std::int64_t lsbScale = scale - fbits;  // scale of the lsb
3529
        support::decimal partial, multiplier;
3530
        partial.setzero();
3531

3532
        multiplier.powerOf2(lsbScale);
3533

3534
        // convert the fraction bits 
3535
        for (unsigned i = 0; i < fbits; ++i) {
3536
                if (value.at(i)) {
3537
                        support::add(partial, multiplier);
3538
                }
3539
                support::add(multiplier, multiplier);
3540
        }
3541
        if (!value.isdenormal()) {
3542
                support::add(partial, multiplier); // add the hidden bit
3543
        }
3544
        str << partial;
3545
        return str.str();
3546
}
3547

3548
//////////////////////////////////////////////////////////////////////////////////////////////
3549
/// stream operators
3550

3551
// ostream output generates an ASCII format for the floating-point argument
3552
// Uses native binary-to-decimal conversion via blocktriple::to_string()
3553
// to produce exact output for all cfloat sizes without double conversion.
3554
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3555
inline std::ostream& operator<<(std::ostream& ostr, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& v) {
3,129✔
3556
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
3557
        constexpr unsigned cfbits = Cfloat::fbits;
3,129✔
3558

3559
        std::streamsize prec  = ostr.precision();
3,129✔
3560
        std::streamsize width = ostr.width();
3,129✔
3561
        std::ios_base::fmtflags ff = ostr.flags();
3,129✔
3562
        bool bFixed      = (ff & std::ios_base::fixed) == std::ios_base::fixed;
3,129✔
3563
        bool bScientific = (ff & std::ios_base::scientific) == std::ios_base::scientific;
3,129✔
3564
        bool bShowpos    = (ff & std::ios_base::showpos) != 0;
3,129✔
3565
        bool bUppercase  = (ff & std::ios_base::uppercase) != 0;
3,129✔
3566
        bool bInternal   = (ff & std::ios_base::internal) != 0;
3,129✔
3567
        bool bLeft       = (ff & std::ios_base::left) != 0;
3,129✔
3568
        char fillChar    = ostr.fill();
3,129✔
3569

3570
        if constexpr (cfbits == 0) {
3571
                // degenerate cfloat with no fraction bits: fall back to double
3572
                std::ostringstream oss;
3573
                oss.precision(prec);
3574
                if (bFixed) oss << std::fixed;
3575
                if (bScientific) oss << std::scientific;
3576
                if (bUppercase) oss << std::uppercase;
3577
                if (bShowpos) oss << std::showpos;
3578
                oss << static_cast<double>(v);
3579
                std::string s = oss.str();
3580
                if (width > 0 && s.length() < static_cast<size_t>(width)) {
3581
                        size_t pad = static_cast<size_t>(width) - s.length();
3582
                        if (bLeft) { s.append(pad, fillChar); }
3583
                        else { s.insert(0u, pad, fillChar); }
3584
                }
3585
                return ostr << s;
3586
        } else {
3587
                blocktriple<cfbits, BlockTripleOperator::REP, bt> a;
3,129✔
3588
                v.normalize(a);
3,129✔
3589
                return ostr << a.to_string(prec, width, bFixed, bScientific,
6,258✔
3590
                                           bInternal, bLeft, bShowpos, bUppercase, fillChar);
6,258✔
3591
        }
3592
}
3593

3594
// parse a cfloat from a string in either cfloat hex format (nbits.esxHEXVALUEc)
3595
// or a decimal floating-point representation
3596
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3597
bool parse(const std::string& txt, cfloat<nbits,es,bt,hasSubnormals,hasMaxExpValues,isSaturating>& v) {
75✔
3598
        // check if the txt is of the native cfloat form: nbits.esX[0x]hexvaluec
3599
        std::regex cfloat_regex(R"(^[0-9]+\.[0-9]+[xX](0[xX])?[0-9A-Fa-f]+c?$)");
75✔
3600
        if (std::regex_match(txt, cfloat_regex)) {
75✔
3601
                // found a cfloat representation: parse nbits.esxHEXVALUEc
3602
                std::string nbitsStr, esStr, bitStr;
6✔
3603
                auto it = txt.begin();
6✔
3604
                for (; it != txt.end(); ++it) {
18✔
3605
                        if (*it == '.') break;
18✔
3606
                        nbitsStr.append(1, *it);
12✔
3607
                }
3608
                for (++it; it != txt.end(); ++it) {
12✔
3609
                        if (*it == 'x' || *it == 'X') break;
12✔
3610
                        esStr.append(1, *it);
6✔
3611
                }
3612
                for (++it; it != txt.end(); ++it) {
42✔
3613
                        if (*it == 'c') break;
42✔
3614
                        bitStr.append(1, *it);
36✔
3615
                }
3616
                unsigned nbits_in = 0;
6✔
3617
                unsigned es_in = 0;
6✔
3618
                {
3619
                        std::istringstream ss(nbitsStr);
6✔
3620
                        ss >> nbits_in;
6✔
3621
                        if (ss.fail()) return false;
6✔
3622
                }
6✔
3623
                {
3624
                        std::istringstream ss(esStr);
6✔
3625
                        ss >> es_in;
6✔
3626
                        if (ss.fail()) return false;
6✔
3627
                }
6✔
3628
                // native cfloat form must match target configuration
3629
                if (nbits_in != nbits || es_in != es) return false;
6✔
3630
                uint64_t raw = 0;
4✔
3631
                std::istringstream ss(bitStr);
4✔
3632
                ss >> std::hex >> raw;
4✔
3633
                if (ss.fail()) return false;
4✔
3634
                ss >> std::ws;
4✔
3635
                if (!ss.eof()) return false;
4✔
3636
                v.setbits(raw);
4✔
3637
                return true;
4✔
3638
        }
6✔
3639
        else {
3640
                // Decimal floating-point representation.
3641
                // Route through the high-precision decimal_to_binary utility so that
3642
                // wide cfloat configurations (nbits > 64, including IEEE quad and
3643
                // posit-killer formats) don't lose precision through an intermediate
3644
                // double. The utility delivers a normalized mantissa with
3645
                // target_mantissa_bits bits plus guard/sticky; we package that as a
3646
                // blocktriple and hand it to convert(blocktriple, cfloat), which
3647
                // handles all IEEE-754 edge cases (subnormals, saturation, etc.).
3648
                // Special-value tokens (nan / inf in any common spelling) are
3649
                // recognised directly so callers don't depend on locale-sensitive
3650
                // std::istringstream parsing of those literals.
3651
                using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
3652
                {
3653
                        std::string t; t.reserve(txt.size());
69✔
3654
                        for (char c : txt) t.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(c))));
509✔
3655
                        bool negative = !t.empty() && t.front() == '-';
69✔
3656
                        std::string body = t;
69✔
3657
                        if (!body.empty() && (body.front() == '+' || body.front() == '-')) body.erase(0, 1);
69✔
3658
                        if (body == "nan") {
69✔
3659
                                v.setnan(NAN_TYPE_QUIET);
4✔
3660
                                return true;
4✔
3661
                        }
3662
                        if (body == "inf" || body == "infinity") {
65✔
3663
                                v.setinf(negative);
12✔
3664
                                return true;
12✔
3665
                        }
3666
                }
85✔
3667
                // We pack the d2b result into a blocktriple<fbits, MUL, bt>. The MUL
3668
                // layout has bfbits = 2*fbits + 2 and radix = 2*fbits, which gives us
3669
                // exactly fbits of headroom below the cfloat fraction for guard/
3670
                // sticky -- enough room for cfloat's convert() to make a correct
3671
                // round-to-nearest-even decision. (The REP layout has no such
3672
                // headroom: its fraction is exactly cfloat::fbits wide.)
3673
                using BT = blocktriple<Cfloat::fbits, BlockTripleOperator::MUL, bt>;
3674
                constexpr unsigned radix_pos          = static_cast<unsigned>(BT::radix);
53✔
3675
                constexpr unsigned target_mantissa_bits = radix_pos + 1u;
53✔
3676
                auto d = ::sw::universal::decimal_to_binary::convert(
53✔
3677
                        std::string_view{txt}, target_mantissa_bits);
3678
                if (!d.valid) return false;
53✔
3679
                if (d.is_zero) {
49✔
3680
                        v.setzero();
7✔
3681
                        v.setsign(d.negative);
7✔
3682
                        return true;
7✔
3683
                }
3684
                BT bt_val;
42✔
3685
                bt_val.setnormal();
42✔
3686
                bt_val.setsign(d.negative);
42✔
3687
                bt_val.setscale(static_cast<int>(d.binary_scale));
42✔
3688
                // d2b mantissa has its MSB at position radix_pos (the hidden bit);
3689
                // below it are radix_pos extra precision bits. Copy bit-for-bit
3690
                // into the blocktriple significand aligned to the same radix.
3691
                for (unsigned i = 0; i <= radix_pos; ++i) {
1,898✔
3692
                        if (d.mantissa.at(i)) bt_val.setbit(i, true);
1,856✔
3693
                }
3694
                // Fold d2b's residual guard/sticky into the lowest bit so cfloat's
3695
                // own rounding decision picks them up as sticky tail.
3696
                if (d.guard_bit || d.sticky_bit) bt_val.setbit(0, true);
42✔
3697
                convert(bt_val, v);
42✔
3698
                return true;
42✔
3699
        }
3700
}
75✔
3701

3702
// read an ASCII float or cfloat format: nbits.esxNN...NNc, for example: 16.5x7C00c
3703
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3704
inline std::istream& operator>>(std::istream& istr, cfloat<nbits,es,bt,hasSubnormals,hasMaxExpValues,isSaturating>& v) {
18✔
3705
        std::string txt;
18✔
3706
        istr >> txt;
18✔
3707
        if (!parse(txt, v)) {
18✔
3708
                std::cerr << "unable to parse -" << txt << "- into a cfloat value\n";
1✔
3709
                istr.setstate(std::ios::failbit);
1✔
3710
        }
3711
        return istr;
18✔
3712
}
18✔
3713

3714
// encoding helpers
3715

3716
// return the Unit in the Last Position
3717
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3718
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ulp(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& a) {
49✔
3719
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> b(a);
49✔
3720
        return ++b - a;
86✔
3721
}
3722

3723
// transform cfloat to a binary representation
3724
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3725
inline std::string to_binary(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& number, bool nibbleMarker = false) {
1,495✔
3726
        std::stringstream s;
1,495✔
3727
        s << "0b";
1,495✔
3728
        unsigned index = nbits;
1,495✔
3729
        s << (number.at(--index) ? '1' : '0') << '.';
1,495✔
3730

3731
        for (int i = int(es) - 1; i >= 0; --i) {
10,351✔
3732
                s << (number.at(--index) ? '1' : '0');
8,856✔
3733
                if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
8,856✔
3734
        }
3735

3736
        s << '.';
1,495✔
3737

3738
        constexpr int fbits = nbits - 1ull - es;
1,495✔
3739
        for (int i = fbits - 1; i >= 0; --i) {
31,889✔
3740
                s << (number.at(--index) ? '1' : '0');
30,394✔
3741
                if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
30,394✔
3742
        }
3743

3744
        return s.str();
2,990✔
3745
}
1,495✔
3746

3747
// native semantic representation: radix-2, delegates to to_binary
3748
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3749
inline std::string to_native(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& number, bool nibbleMarker = false) {
3750
        return to_binary(number, nibbleMarker);
3751
}
3752

3753
// transform a cfloat into a triple representation
3754
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3755
inline std::string to_triple(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& number, bool nibbleMarker = true) {
3✔
3756
        std::stringstream s;
3✔
3757
        blocktriple<cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::fbits, BlockTripleOperator::REP, bt> triple;
3✔
3758
        number.normalize(triple);
3✔
3759
        s << to_triple(triple, nibbleMarker);
3✔
3760
        return s.str();
6✔
3761
}
3✔
3762

3763
// Magnitude of a cfloat (equivalent to turning the sign bit off).
3764
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3765
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>
3766
abs(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& v) {
16,939✔
3767
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> a(v);
16,939✔
3768
        a.setsign(false);
16,939✔
3769
        return a;
16,939✔
3770
}
3771

3772
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3773
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>
3774
fabs(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> v) {
40✔
3775
        return abs(v);
40✔
3776
}
3777

3778
////////////////////// debug helpers
3779

3780
// convenience method to gain access to the values of the constexpr variables that govern the cfloat behavior
3781
template<unsigned nbits, unsigned es, typename bt = uint8_t, bool hasSubnormals = false, bool hasMaxExpValues = false, bool isSaturating = false>
3782
void ReportCfloatClassParameters() {
3783
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> a;
3784
        a.constexprClassParameters();
3785
}
3786

3787
//////////////////////////////////////////////////////
3788
/// cfloat - cfloat binary logic operators
3789

3790
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3791
constexpr inline bool operator==(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs) {
127,156,716✔
3792
        if (lhs.isnan() || rhs.isnan()) return false;
127,156,716✔
3793
        if (lhs.iszero() && rhs.iszero()) return true; // +0 == -0 per IEEE-754 §5.11
125,501,634✔
3794
        for (unsigned i = 0; i < lhs.nrBlocks; ++i) {
385,833,543✔
3795
                if (lhs._block[i] != rhs._block[i]) {
263,595,273✔
3796
                        return false;
2,114,550✔
3797
                }
3798
        }
3799
        return true;
122,238,270✔
3800
}
3801
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3802
constexpr inline bool operator!=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs) { return !operator==(lhs, rhs); }
125,744,430✔
3803
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3804
constexpr inline bool operator< (const cfloat<nnbits, nes, nbt, nsub, nsup, nsat>& lhs, const cfloat<nnbits, nes, nbt, nsub, nsup, nsat>& rhs) {
5,942,967✔
3805
        if (lhs.isnan() || rhs.isnan()) return false;
5,942,967✔
3806
        // need this as arithmetic difference is defined as snan(indeterminate)
3807
        if (lhs.isinf(INF_TYPE_NEGATIVE) && rhs.isinf(INF_TYPE_NEGATIVE)) return false;
5,827,458✔
3808
        if (lhs.isinf(INF_TYPE_POSITIVE) && rhs.isinf(INF_TYPE_POSITIVE)) return false;
5,827,444✔
3809
        if constexpr (nsub) {
3810
                cfloat<nnbits, nes, nbt, nsub, nsup, nsat> diff = (lhs - rhs);
5,758,474✔
3811
                return (!diff.iszero() && diff.sign()) ? true : false;  // got to guard against -0
5,758,474✔
3812
        }
3813
        else {
3814
                if (lhs.iszero() && rhs.iszero()) return false;  // we need to 'collapse' all zero encodings
68,937✔
3815
                if (lhs.sign() && !rhs.sign()) return true;
68,929✔
3816
                if (!lhs.sign() && rhs.sign()) return false;
51,703✔
3817
                bool positive = lhs.ispos();
34,477✔
3818
                if (positive) {
34,477✔
3819
                        if (lhs.scale() < rhs.scale()) return true;
17,257✔
3820
                        if (lhs.scale() > rhs.scale()) return false;
12,786✔
3821
                }
3822
                else {
3823
                        if (lhs.scale() > rhs.scale()) return true;
17,220✔
3824
                        if (lhs.scale() < rhs.scale()) return false;
12,770✔
3825
                }
3826
                // sign and scale are the same
3827
                if (lhs.scale() == rhs.scale()) {
16,642✔
3828
                        // compare fractions: we do not have subnormals, so we can ignore the hidden bit
3829
                        blockbinary<nnbits - 1ull - nes, nbt> l, r;
3830
                        lhs.fraction(l);
16,642✔
3831
                        rhs.fraction(r);
16,642✔
3832
                        blockbinary<nnbits - nes, nbt> ll, rr; // fbits + 1 so we can 0 extend to honor 2's complement encoding of blockbinary
3833
                        ll.assignWithoutSignExtend(l);
16,642✔
3834
                        rr.assignWithoutSignExtend(r);
16,642✔
3835
                        return (positive ? (ll < rr) : (ll > rr));
16,642✔
3836
                }
3837
                return false;
×
3838
        }
3839
}
3840
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3841
constexpr inline bool operator> (const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs) { 
3,144,623✔
3842
        if (lhs.isnan() || rhs.isnan()) return false;
3,144,623✔
3843
        // need this as arithmetic difference is defined as snan(indeterminate)
3844
        if (lhs.isinf(INF_TYPE_NEGATIVE) && rhs.isinf(INF_TYPE_NEGATIVE)) return false;
3,136,509✔
3845
        if (lhs.isinf(INF_TYPE_POSITIVE) && rhs.isinf(INF_TYPE_POSITIVE)) return false;
3,136,495✔
3846
        return  operator< (rhs, lhs); 
3,136,479✔
3847
}
3848
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3849
constexpr inline bool operator<=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs) { 
1,747,159✔
3850
        if (lhs.isnan() || rhs.isnan()) return false;
1,747,159✔
3851
        return !operator> (lhs, rhs); 
1,739,059✔
3852
}
3853
template<unsigned nnbits, unsigned nes, typename nbt, bool nsub, bool nsup, bool nsat>
3854
constexpr inline bool operator>=(const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& lhs, const cfloat<nnbits,nes,nbt,nsub,nsup,nsat>& rhs) {
1,530,053✔
3855
        if (lhs.isnan() || rhs.isnan()) return false;
1,530,053✔
3856
        return !operator< (lhs, rhs); 
1,521,947✔
3857
}
3858

3859
//////////////////////////////////////////////////////
3860
/// cfloat - cfloat binary arithmetic operators
3861

3862
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3863
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
10,326,946✔
3864
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
10,326,946✔
3865
        sum += rhs;
10,326,946✔
3866
        return sum;
10,326,946✔
3867
}
3868
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3869
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
8,806,554✔
3870
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
8,806,554✔
3871
        diff -= rhs;
8,806,554✔
3872
        return diff;
8,806,554✔
3873
}
3874
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3875
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4,175,913✔
3876
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
4,175,913✔
3877
        mul *= rhs;
4,175,913✔
3878
        return mul;
4,175,913✔
3879
}
3880
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3881
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
5,023,015✔
3882
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
5,023,015✔
3883
        ratio /= rhs;
5,023,015✔
3884
        return ratio;
5,023,014✔
3885
}
3886

3887
/// binary cfloat - literal arithmetic operators
3888

3889
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3890
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(float lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3891
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
3892
        sum += rhs;
3893
        return sum;
3894
}
3895
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3896
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(float lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3897
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
3898
        diff -= rhs;
3899
        return diff;
3900
}
3901
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3902
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(float lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3903
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
3904
        mul *= rhs;
3905
        return mul;
3906
}
3907
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3908
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(float lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
2✔
3909
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
2✔
3910
        ratio /= rhs;
2✔
3911
        return ratio;
2✔
3912
}
3913

3914
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3915
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(double lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4✔
3916
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
4✔
3917
        sum += rhs;
4✔
3918
        return sum;
4✔
3919
}
3920
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3921
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(double lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3922
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
3923
        diff -= rhs;
3924
        return diff;
3925
}
3926
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3927
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(double lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
16✔
3928
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
16✔
3929
        mul *= rhs;
16✔
3930
        return mul;
16✔
3931
}
3932
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3933
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(double lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3934
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
3935
        ratio /= rhs;
3936
        return ratio;
3937
}
3938

3939
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3940
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
×
3941
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
×
3942
        sum += rhs;
×
3943
        return sum;
×
3944
}
3945
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3946
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3947
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
3948
        diff -= rhs;
3949
        return diff;
3950
}
3951
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3952
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
38✔
3953
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
38✔
3954
        mul *= rhs;
38✔
3955
        return mul;
38✔
3956
}
3957
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3958
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
374✔
3959
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
374✔
3960
        ratio /= rhs;
374✔
3961
        return ratio;
374✔
3962
}
3963

3964
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3965
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(unsigned int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3966
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
3967
        sum += rhs;
3968
        return sum;
3969
}
3970
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3971
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(unsigned int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3972
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
3973
        diff -= rhs;
3974
        return diff;
3975
}
3976
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3977
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(unsigned int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3978
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
3979
        mul *= rhs;
3980
        return mul;
3981
}
3982
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3983
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(unsigned int lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3984
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
3985
        ratio /= rhs;
3986
        return ratio;
3987
}
3988

3989
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3990
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3991
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
3992
        sum += rhs;
3993
        return sum;
3994
}
3995
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
3996
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
3997
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
3998
        diff -= rhs;
3999
        return diff;
4000
}
4001
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4002
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4003
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
4004
        mul *= rhs;
4005
        return mul;
4006
}
4007
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4008
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4009
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
4010
        ratio /= rhs;
4011
        return ratio;
4012
}
4013

4014
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4015
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(unsigned long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4016
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> sum(lhs);
4017
        sum += rhs;
4018
        return sum;
4019
}
4020
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4021
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(unsigned long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4022
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> diff(lhs);
4023
        diff -= rhs;
4024
        return diff;
4025
}
4026
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4027
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(unsigned long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4028
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> mul(lhs);
4029
        mul *= rhs;
4030
        return mul;
4031
}
4032
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4033
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(unsigned long long lhs, const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& rhs) {
4034
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ratio(lhs);
4035
        ratio /= rhs;
4036
        return ratio;
4037
}
4038

4039
///  binary cfloat - literal arithmetic operators
4040

4041
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4042
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
4043
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4044
        Cfloat sum(lhs);
4045
        sum += Cfloat(rhs);
4046
        return sum;
4047
}
4048
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4049
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
4050
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4051
        Cfloat diff(lhs);
4052
        diff -= rhs;
4053
        return diff;
4054
}
4055
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4056
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
4057
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4058
        Cfloat mul(lhs);
4059
        mul *= Cfloat(rhs);
4060
        return mul;
4061
}
4062
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4063
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
4064
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4065
        Cfloat ratio(lhs);
4066
        ratio /= Cfloat(rhs);
4067
        return ratio;
4068
}
4069

4070
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4071
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
4072
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4073
        Cfloat sum(lhs);
4074
        sum += Cfloat(rhs);
4075
        return sum;
4076
}
4077
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4078
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
4✔
4079
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4080
        Cfloat diff(lhs);
4✔
4081
        diff -= rhs;
4✔
4082
        return diff;
4✔
4083
}
4084
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4085
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
4086
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4087
        Cfloat mul(lhs);
4088
        mul *= Cfloat(rhs);
4089
        return mul;
4090
}
4091
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4092
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
4093
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4094
        Cfloat ratio(lhs);
4095
        ratio /= Cfloat(rhs);
4096
        return ratio;
4097
}
4098

4099
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4100
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
4101
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4102
        Cfloat sum(lhs);
4103
        sum += Cfloat(rhs);
4104
        return sum;
4105
}
4106
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4107
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
4✔
4108
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4109
        Cfloat diff(lhs);
4✔
4110
        diff -= rhs;
4✔
4111
        return diff;
4✔
4112
}
4113
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4114
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
4115
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4116
        Cfloat mul(lhs);
4117
        mul *= Cfloat(rhs);
4118
        return mul;
4119
}
4120
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4121
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
4122
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4123
        Cfloat ratio(lhs);
4124
        ratio /= Cfloat(rhs);
4125
        return ratio;
4126
}
4127

4128
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4129
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned int rhs) {
4130
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4131
        Cfloat sum(lhs);
4132
        sum += Cfloat(rhs);
4133
        return sum;
4134
}
4135
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4136
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned int rhs) {
4137
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4138
        Cfloat diff(lhs);
4139
        diff -= rhs;
4140
        return diff;
4141
}
4142
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4143
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned int rhs) {
4144
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4145
        Cfloat mul(lhs);
4146
        mul *= Cfloat(rhs);
4147
        return mul;
4148
}
4149
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4150
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned int rhs) {
4151
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4152
        Cfloat ratio(lhs);
4153
        ratio /= Cfloat(rhs);
4154
        return ratio;
4155
}
4156

4157
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4158
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4159
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4160
        Cfloat sum(lhs);
4161
        sum += Cfloat(rhs);
4162
        return sum;
4163
}
4164
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4165
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4166
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4167
        Cfloat diff(lhs);
4168
        diff -= rhs;
4169
        return diff;
4170
}
4171
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4172
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4173
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4174
        Cfloat mul(lhs);
4175
        mul *= Cfloat(rhs);
4176
        return mul;
4177
}
4178
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4179
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4180
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4181
        Cfloat ratio(lhs);
4182
        ratio /= Cfloat(rhs);
4183
        return ratio;
4184
}
4185

4186
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4187
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator+(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned long long rhs) {
4188
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4189
        Cfloat sum(lhs);
4190
        sum += Cfloat(rhs);
4191
        return sum;
4192
}
4193
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4194
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator-(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned long long rhs) {
4195
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4196
        Cfloat diff(lhs);
4197
        diff -= rhs;
4198
        return diff;
4199
}
4200
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4201
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator*(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned long long rhs) {
4202
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4203
        Cfloat mul(lhs);
4204
        mul *= Cfloat(rhs);
4205
        return mul;
4206
}
4207
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4208
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> operator/(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, unsigned long long rhs) {
4209
        using Cfloat = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>;
4210
        Cfloat ratio(lhs);
4211
        ratio /= Cfloat(rhs);
4212
        return ratio;
4213
}
4214

4215
///////////////////////////////////////////////////////////////////////
4216
///   binary logic literal comparisons
4217

4218
// cfloat - literal float logic operators
4219
// Promote rhs into the cfloat domain rather than narrowing lhs to native float;
4220
// otherwise cfloat<80, 11>(1.0 + 1ulp) == 1.0f reports true because float(lhs)
4221
// rounds the wider value into single precision. Comparisons must happen in the
4222
// shared cfloat domain to preserve precision.
4223
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4224
constexpr inline bool operator==(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
1✔
4225
        return operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4226
}
4227
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4228
constexpr inline bool operator!=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
1✔
4229
        return operator!=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4230
}
4231
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4232
constexpr inline bool operator< (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
2✔
4233
        return operator< (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
2✔
4234
}
4235
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4236
constexpr inline bool operator> (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
1✔
4237
        return operator> (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4238
}
4239
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4240
constexpr inline bool operator<=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
1✔
4241
        return operator<=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4242
}
4243
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4244
constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, float rhs) {
1✔
4245
        return operator>=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4246
}
4247
// cfloat - literal double logic operators (same promotion strategy)
4248
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4249
constexpr inline bool operator==(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
1✔
4250
        return operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4251
}
4252
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4253
constexpr inline bool operator!=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
1✔
4254
        return operator!=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4255
}
4256
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4257
constexpr inline bool operator< (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
336✔
4258
        return operator< (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
336✔
4259
}
4260
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4261
constexpr inline bool operator> (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
892✔
4262
        return operator> (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
892✔
4263
}
4264
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4265
constexpr inline bool operator<=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
1✔
4266
        return operator<=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4267
}
4268
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4269
constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, double rhs) {
1✔
4270
        return operator>=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4271
}
4272

4273
#if LONG_DOUBLE_SUPPORT
4274
// cfloat - literal long double logic operators (same promotion strategy)
4275
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4276
constexpr inline bool operator==(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4277
        return operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4278
}
4279
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4280
constexpr inline bool operator!=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4281
        return operator!=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4282
}
4283
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4284
constexpr inline bool operator< (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4285
        return operator< (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4286
}
4287
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4288
constexpr inline bool operator> (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4289
        return operator> (lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4290
}
4291
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4292
constexpr inline bool operator<=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4293
        return operator<=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4294
}
4295
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4296
constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long double rhs) {
1✔
4297
        return operator>=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4298
}
4299
#endif
4300

4301
// cfloat - literal int logic operators
4302
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4303
constexpr inline bool operator==(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
11✔
4304
        return operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
11✔
4305
}
4306
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4307
constexpr inline bool operator!=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
1✔
4308
        return operator!=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4309
}
4310
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4311
constexpr inline bool operator< (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
240✔
4312
        return operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
240✔
4313
}
4314
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4315
constexpr inline bool operator> (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
666✔
4316
        return operator<(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs), lhs);
666✔
4317
}
4318
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4319
constexpr inline bool operator<=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
1✔
4320
        return operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs)) || operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4321
}
4322
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4323
constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, int rhs) {
1✔
4324
        return !operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
1✔
4325
}
4326

4327
// cfloat - long long logic operators
4328
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4329
constexpr inline bool operator==(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4330
        return operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
4331
}
4332
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4333
constexpr inline bool operator!=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4334
        return operator!=(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
4335
}
4336
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4337
constexpr inline bool operator< (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4338
        return operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
4339
}
4340
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4341
constexpr inline bool operator> (const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4342
        return operator<(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs), lhs);
4343
}
4344
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4345
constexpr inline bool operator<=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4346
        return operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs)) || operator==(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
4347
}
4348
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4349
constexpr inline bool operator>=(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& lhs, long long rhs) {
4350
        return !operator<(lhs, cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>(rhs));
4351
}
4352

4353
// standard library functions for floating point
4354

4355
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4356
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> frexp(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& x, int* exp) {
765✔
4357
        *exp = x.scale();
765✔
4358
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> fraction(x);
765✔
4359
        fraction.setexponent(0);
765✔
4360
        return fraction;
765✔
4361
}
4362

4363
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4364
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> ldexp(const cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& x, int exp) {
765✔
4365
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> result(x);
765✔
4366
        int xexp = x.scale();
765✔
4367
        result.setexponent(xexp + exp);  // TODO: this does not work for subnormals
765✔
4368
        return result;
765✔
4369
}
4370

4371
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4372
constexpr inline cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> 
4373
fma(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> x,
3✔
4374
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> y,
4375
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> z) {
4376
        cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> fused{ 0 };
3✔
4377
        constexpr unsigned FBITS = cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>::fbits;
3✔
4378
        constexpr unsigned EXTRA_FBITS = FBITS+2;
3✔
4379
        constexpr unsigned EXTENDED_PRECISION = nbits + EXTRA_FBITS;
3✔
4380
        // the C++ fma spec indicates that the x*y+z is evaluated in 'infinite' precision
4381
        // with only a single rounding event. The minimum finite precision that would behave like this
4382
        // is the precision where the product x*y does not need to be rounded, which will
4383
        // need at least 2*(fbits+1) mantissa bits to capture all bits that can be
4384
        // generated by the product.
4385
        cfloat<EXTENDED_PRECISION, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> preciseX(x), preciseY(y), preciseZ(z);
3✔
4386
//        ReportValue(preciseX, "extended precision x");
4387
//        ReportValue(preciseY, "extended precision y");
4388
//        ReportValue(preciseZ, "extended precision z");
4389
        cfloat<EXTENDED_PRECISION, es, bt, hasSubnormals, hasMaxExpValues, isSaturating> product = preciseX * preciseY;
3✔
4390
//        ReportValue(product, "extended precision p");
4391
        fused = product + preciseZ;
3✔
4392
        return fused;
3✔
4393
}
4394

4395
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4396
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>&
4397
minpos(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& c) {
4398
        return c.minpos();
4399
}
4400
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4401
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>&
4402
maxpos(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& c) {
4403
        return c.maxpos();
4404
}
4405
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4406
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>&
4407
minneg(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& c) {
4408
        return c.minneg();
4409
}
4410
template<unsigned nbits, unsigned es, typename bt, bool hasSubnormals, bool hasMaxExpValues, bool isSaturating>
4411
cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>&
4412
maxneg(cfloat<nbits, es, bt, hasSubnormals, hasMaxExpValues, isSaturating>& c) {
4413
        return c.maxneg();
4414
}
4415

4416
}} // namespace sw::universal
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc