• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37919

29 Sep 2024 09:41AM UTC coverage: 86.232% (-0.3%) from 86.484%
#37919

push

local

web-flow
fix rawbigints OOB issues (#55917)

Fixes issues introduced in #50691 and found in #55906:
* use `@inbounds` and `@boundscheck` macros in rawbigints, for catching
OOB with `--check-bounds=yes`
* fix OOB in `truncate`

12 of 13 new or added lines in 1 file covered. (92.31%)

1287 existing lines in 41 files now uncovered.

77245 of 89578 relevant lines covered (86.23%)

15686161.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.76
/base/float.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
const IEEEFloat = Union{Float16, Float32, Float64}
4

5
## floating point traits ##
6

7
"""
8
    Inf16
9

10
Positive infinity of type [`Float16`](@ref).
11
"""
12
const Inf16 = bitcast(Float16, 0x7c00)
13
"""
14
    NaN16
15

16
A not-a-number value of type [`Float16`](@ref).
17

18
See also: [`NaN`](@ref).
19
"""
20
const NaN16 = bitcast(Float16, 0x7e00)
21
"""
22
    Inf32
23

24
Positive infinity of type [`Float32`](@ref).
25
"""
26
const Inf32 = bitcast(Float32, 0x7f800000)
27
"""
28
    NaN32
29

30
A not-a-number value of type [`Float32`](@ref).
31

32
See also: [`NaN`](@ref).
33
"""
34
const NaN32 = bitcast(Float32, 0x7fc00000)
35
const Inf64 = bitcast(Float64, 0x7ff0000000000000)
36
const NaN64 = bitcast(Float64, 0x7ff8000000000000)
37

38
const Inf = Inf64
39
"""
40
    Inf, Inf64
41

42
Positive infinity of type [`Float64`](@ref).
43

44
See also: [`isfinite`](@ref), [`typemax`](@ref), [`NaN`](@ref), [`Inf32`](@ref).
45

46
# Examples
47
```jldoctest
48
julia> π/0
49
Inf
50

51
julia> +1.0 / -0.0
52
-Inf
53

54
julia> ℯ^-Inf
55
0.0
56
```
57
"""
58
Inf, Inf64
59

60
const NaN = NaN64
61
"""
62
    NaN, NaN64
63

64
A not-a-number value of type [`Float64`](@ref).
65

66
See also: [`isnan`](@ref), [`missing`](@ref), [`NaN32`](@ref), [`Inf`](@ref).
67

68
# Examples
69
```jldoctest
70
julia> 0/0
71
NaN
72

73
julia> Inf - Inf
74
NaN
75

76
julia> NaN == NaN, isequal(NaN, NaN), isnan(NaN)
77
(false, true, true)
78
```
79

80
!!! note
81
    Always use [`isnan`](@ref) or [`isequal`](@ref) for checking for `NaN`.
82
    Using `x === NaN` may give unexpected results:
83
    ```julia-repl
84
    julia> reinterpret(UInt32, NaN32)
85
    0x7fc00000
86

87
    julia> NaN32p1 = reinterpret(Float32, 0x7fc00001)
88
    NaN32
89

90
    julia> NaN32p1 === NaN32, isequal(NaN32p1, NaN32), isnan(NaN32p1)
91
    (false, true, true)
92
    ```
93
"""
94
NaN, NaN64
95

96
# bit patterns
97
reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x)
33,605,915✔
98
reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x)
605,123,007✔
99
reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16, x)
3,830,724✔
100
reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x)
600,265,019✔
101
reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x)
600,393,135✔
102
reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16, x)
596,826✔
103

104
sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
×
105
exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
×
106
exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
×
107
exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
×
108
significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
×
109

110
sign_mask(::Type{Float32}) =        0x8000_0000
×
111
exponent_mask(::Type{Float32}) =    0x7f80_0000
×
112
exponent_one(::Type{Float32}) =     0x3f80_0000
×
113
exponent_half(::Type{Float32}) =    0x3f00_0000
×
114
significand_mask(::Type{Float32}) = 0x007f_ffff
×
115

116
sign_mask(::Type{Float16}) =        0x8000
×
117
exponent_mask(::Type{Float16}) =    0x7c00
×
118
exponent_one(::Type{Float16}) =     0x3c00
×
119
exponent_half(::Type{Float16}) =    0x3800
×
120
significand_mask(::Type{Float16}) = 0x03ff
×
121

122
mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T)
3,618,808✔
123

124
for T in (Float16, Float32, Float64)
125
    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
×
126
    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
×
127
    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
×
128
    # maximum float exponent
129
    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) - 1)
×
130
    # maximum float exponent without bias
131
    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
×
132
end
133

134
"""
135
    exponent_max(T)
136

137
Maximum [`exponent`](@ref) value for a floating point number of type `T`.
138

139
# Examples
140
```jldoctest
141
julia> Base.exponent_max(Float64)
142
1023
143
```
144

145
Note, `exponent_max(T) + 1` is a possible value of the exponent field
146
with bias, which might be used as sentinel value for `Inf` or `NaN`.
147
"""
148
function exponent_max end
149

150
"""
151
    exponent_raw_max(T)
152

153
Maximum value of the [`exponent`](@ref) field for a floating point number of type `T` without bias,
154
i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
155
"""
156
function exponent_raw_max end
157

158
"""
159
IEEE 754 definition of the minimum exponent.
160
"""
161
ieee754_exponent_min(::Type{T}) where {T<:IEEEFloat} = Int(1 - exponent_max(T))::Int
19,002,114✔
162

163
exponent_min(::Type{Float16}) = ieee754_exponent_min(Float16)
19,002,114✔
164
exponent_min(::Type{Float32}) = ieee754_exponent_min(Float32)
×
165
exponent_min(::Type{Float64}) = ieee754_exponent_min(Float64)
×
166

167
function ieee754_representation(
168
    ::Type{F}, sign_bit::Bool, exponent_field::Integer, significand_field::Integer
169
) where {F<:IEEEFloat}
170
    T = uinttype(F)
19,002,114✔
171
    ret::T = sign_bit
77,703,039✔
172
    ret <<= exponent_bits(F)
77,703,039✔
173
    ret |= exponent_field
77,703,039✔
174
    ret <<= significand_bits(F)
77,703,039✔
175
    ret |= significand_field
77,703,039✔
176
end
177

178
# ±floatmax(T)
179
function ieee754_representation(
180
    ::Type{F}, sign_bit::Bool, ::Val{:omega}
181
) where {F<:IEEEFloat}
182
    ieee754_representation(F, sign_bit, exponent_raw_max(F) - 1, significand_mask(F))
8,761,621✔
183
end
184

185
# NaN or an infinity
186
function ieee754_representation(
187
    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:nan}
188
) where {F<:IEEEFloat}
189
    ieee754_representation(F, sign_bit, exponent_raw_max(F), significand_field)
11,701,957✔
190
end
191

192
# NaN with default payload
193
function ieee754_representation(
194
    ::Type{F}, sign_bit::Bool, ::Val{:nan}
195
) where {F<:IEEEFloat}
196
    ieee754_representation(F, sign_bit, one(uinttype(F)) << (significand_bits(F) - 1), Val(:nan))
87✔
197
end
198

199
# Infinity
200
function ieee754_representation(
201
    ::Type{F}, sign_bit::Bool, ::Val{:inf}
202
) where {F<:IEEEFloat}
203
    ieee754_representation(F, sign_bit, false, Val(:nan))
11,701,870✔
204
end
205

206
# Subnormal or zero
207
function ieee754_representation(
208
    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:subnormal}
209
) where {F<:IEEEFloat}
210
    ieee754_representation(F, sign_bit, false, significand_field)
10,884,131✔
211
end
212

213
# Zero
214
function ieee754_representation(
215
    ::Type{F}, sign_bit::Bool, ::Val{:zero}
216
) where {F<:IEEEFloat}
217
    ieee754_representation(F, sign_bit, false, Val(:subnormal))
10,884,131✔
218
end
219

220
"""
221
    uabs(x::Integer)
222

223
Return the absolute value of `x`, possibly returning a different type should the
224
operation be susceptible to overflow. This typically arises when `x` is a two's complement
225
signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the result of
226
`uabs(x)` will be an unsigned integer of the same size.
227
"""
228
uabs(x::Integer) = abs(x)
1,238,227✔
229
uabs(x::BitSigned) = unsigned(abs(x))
4,677,588✔
230

231
## conversions to floating-point ##
232

233
# TODO: deprecate in 2.0
234
Float16(x::Integer) = convert(Float16, convert(Float32, x)::Float32)
×
235

236
for t1 in (Float16, Float32, Float64)
237
    for st in (Int8, Int16, Int32, Int64)
238
        @eval begin
239
            (::Type{$t1})(x::($st)) = sitofp($t1, x)
258,951,129✔
240
            promote_rule(::Type{$t1}, ::Type{$st}) = $t1
×
241
        end
242
    end
243
    for ut in (Bool, UInt8, UInt16, UInt32, UInt64)
244
        @eval begin
245
            (::Type{$t1})(x::($ut)) = uitofp($t1, x)
85,439,575✔
246
            promote_rule(::Type{$t1}, ::Type{$ut}) = $t1
×
247
        end
248
    end
249
end
250

251
Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
16,417,092✔
252

253
promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
×
254
promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
×
255
promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
×
256
promote_rule(::Type{Float32}, ::Type{Int128}) = Float32
×
257
promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16
×
258
promote_rule(::Type{Float16}, ::Type{Int128}) = Float16
×
259

260
function Float64(x::UInt128)
18✔
261
    if x < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
1,931✔
262
        low_exp = 0x1p52
×
263
        high_exp = 0x1p104
×
264
        low_bits = (x % UInt64) & Base.significand_mask(Float64)
896✔
265
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
896✔
266
        high_bits = ((x >> 52) % UInt64)
896✔
267
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
896✔
268
        low_value + high_value
896✔
269
    else # Large enough that low bits only affect rounding, pack low bits
270
        low_exp = 0x1p76
×
271
        high_exp = 0x1p128
×
272
        low_bits = ((x >> 12) % UInt64) >> 12 | (x % UInt64) & 0xFFFFFF
1,019✔
273
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
1,019✔
274
        high_bits = ((x >> 76) % UInt64)
1,019✔
275
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
1,019✔
276
        low_value + high_value
1,019✔
277
    end
278
end
279

280
function Float64(x::Int128)
62✔
281
    sign_bit = ((x >> 127) % UInt64) << 63
3,425,870✔
282
    ux = uabs(x)
4,077,162✔
283
    if ux < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
4,077,162✔
284
        low_exp = 0x1p52
×
285
        high_exp = 0x1p104
×
286
        low_bits = (ux % UInt64) & Base.significand_mask(Float64)
3,425,834✔
287
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
3,425,834✔
288
        high_bits = ((ux >> 52) % UInt64)
3,425,834✔
289
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
3,425,834✔
290
        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
3,425,834✔
291
    else # Large enough that low bits only affect rounding, pack low bits
292
        low_exp = 0x1p76
×
293
        high_exp = 0x1p128
×
294
        low_bits = ((ux >> 12) % UInt64) >> 12 | (ux % UInt64) & 0xFFFFFF
36✔
295
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
36✔
296
        high_bits = ((ux >> 76) % UInt64)
36✔
297
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
36✔
298
        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
36✔
299
    end
300
end
301

302
function Float32(x::UInt128)
6✔
303
    x == 0 && return 0f0
324✔
304
    n = top_set_bit(x) # ndigits0z(x,2)
308✔
305
    if n <= 24
308✔
306
        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
305✔
307
    else
308
        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
3✔
309
        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
3✔
310
        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
3✔
311
    end
312
    d = ((n+126) % UInt32) << 23
308✔
313
    reinterpret(Float32, d + y)
308✔
314
end
315

316
function Float32(x::Int128)
8✔
317
    x == 0 && return 0f0
326✔
318
    s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
311✔
319
    x = abs(x) % UInt128
311✔
320
    n = top_set_bit(x) # ndigits0z(x,2)
311✔
321
    if n <= 24
311✔
322
        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
306✔
323
    else
324
        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
5✔
325
        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
5✔
326
        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
5✔
327
    end
328
    d = ((n+126) % UInt32) << 23
311✔
329
    reinterpret(Float32, s | d + y)
311✔
330
end
331

332
# TODO: optimize
333
Float16(x::UInt128) = convert(Float16, Float64(x))
35✔
334
Float16(x::Int128)  = convert(Float16, Float64(x))
38✔
335

336
Float16(x::Float32) = fptrunc(Float16, x)
5,641,078✔
337
Float16(x::Float64) = fptrunc(Float16, x)
292,437✔
338
Float32(x::Float64) = fptrunc(Float32, x)
458,595,389✔
339

340
Float32(x::Float16) = fpext(Float32, x)
26,672,453✔
341
Float64(x::Float32) = fpext(Float64, x)
476,348,150✔
342
Float64(x::Float16) = fpext(Float64, x)
4,506,554✔
343

344
AbstractFloat(x::Bool)    = Float64(x)
1,016,568✔
345
AbstractFloat(x::Int8)    = Float64(x)
192✔
346
AbstractFloat(x::Int16)   = Float64(x)
101✔
347
AbstractFloat(x::Int32)   = Float64(x)
71,125✔
348
AbstractFloat(x::Int64)   = Float64(x) # LOSSY
17,419,123✔
349
AbstractFloat(x::Int128)  = Float64(x) # LOSSY
1,417,693✔
350
AbstractFloat(x::UInt8)   = Float64(x)
8,222✔
351
AbstractFloat(x::UInt16)  = Float64(x)
45✔
352
AbstractFloat(x::UInt32)  = Float64(x)
45✔
353
AbstractFloat(x::UInt64)  = Float64(x) # LOSSY
1,693✔
354
AbstractFloat(x::UInt128) = Float64(x) # LOSSY
2,058✔
355

356
Bool(x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
5✔
357

358
"""
359
    float(x)
360

361
Convert a number or array to a floating point data type.
362

363
See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref).
364

365
# Examples
366
```jldoctest
367
julia> float(1:1000)
368
1.0:1.0:1000.0
369

370
julia> float(typemax(Int32))
371
2.147483647e9
372
```
373
"""
374
float(x) = AbstractFloat(x)
51,537,700✔
375

376
"""
377
    float(T::Type)
378

379
Return an appropriate type to represent a value of type `T` as a floating point value.
380
Equivalent to `typeof(float(zero(T)))`.
381

382
# Examples
383
```jldoctest
384
julia> float(Complex{Int})
385
ComplexF64 (alias for Complex{Float64})
386

387
julia> float(Int)
388
Float64
389
```
390
"""
391
float(::Type{T}) where {T<:Number} = typeof(float(zero(T)))
3,798✔
392
float(::Type{T}) where {T<:AbstractFloat} = T
23,945✔
393
float(::Type{Union{}}, slurp...) = Union{}(0.0)
×
394

395
"""
396
    unsafe_trunc(T, x)
397

398
Return the nearest integral value of type `T` whose absolute value is
399
less than or equal to the absolute value of `x`. If the value is not representable by `T`,
400
an arbitrary value will be returned.
401
See also [`trunc`](@ref).
402

403
# Examples
404
```jldoctest
405
julia> unsafe_trunc(Int, -2.2)
406
-2
407

408
julia> unsafe_trunc(Int, NaN)
409
-9223372036854775808
410
```
411
"""
412
function unsafe_trunc end
413

414
for Ti in (Int8, Int16, Int32, Int64)
415
    @eval begin
416
        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptosi($Ti, x)
48,997,953✔
417
    end
418
end
419
for Ti in (UInt8, UInt16, UInt32, UInt64)
420
    @eval begin
421
        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptoui($Ti, x)
39,117,865✔
422
    end
423
end
424

425
function unsafe_trunc(::Type{UInt128}, x::Float64)
426
    xu = reinterpret(UInt64,x)
653,300✔
427
    k = Int(xu >> 52) & 0x07ff - 1075
653,300✔
428
    xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000
653,300✔
429
    if k <= 0
653,300✔
430
        UInt128(xu >> -k)
652,263✔
431
    else
432
        UInt128(xu) << k
1,037✔
433
    end
434
end
435
function unsafe_trunc(::Type{Int128}, x::Float64)
436
    copysign(unsafe_trunc(UInt128,x) % Int128, x)
651,870✔
437
end
438

439
function unsafe_trunc(::Type{UInt128}, x::Float32)
440
    xu = reinterpret(UInt32,x)
631✔
441
    k = Int(xu >> 23) & 0x00ff - 150
631✔
442
    xu = (xu & 0x007f_ffff) | 0x0080_0000
631✔
443
    if k <= 0
631✔
444
        UInt128(xu >> -k)
611✔
445
    else
446
        UInt128(xu) << k
20✔
447
    end
448
end
449
function unsafe_trunc(::Type{Int128}, x::Float32)
450
    copysign(unsafe_trunc(UInt128,x) % Int128, x)
329✔
451
end
452

453
unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x))
18✔
454
unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
17✔
455

456
# matches convert methods
457
# also determines trunc, floor, ceil
458
round(::Type{Signed},   x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
×
459
round(::Type{Unsigned}, x::IEEEFloat, r::RoundingMode) = round(UInt, x, r)
×
460
round(::Type{Integer},  x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
3,113✔
461

462
round(x::IEEEFloat, ::RoundingMode{:ToZero})  = trunc_llvm(x)
36,649,014✔
463
round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
341,564✔
464
round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
700,540✔
465
round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
12,368,383✔
466

467
rounds_up(x, ::RoundingMode{:Down}) = false
×
468
rounds_up(x, ::RoundingMode{:Up}) = true
×
469
rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x)
1✔
470
rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x)
×
471
function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat}
472
    x_t = convert(T, x_integer)
6✔
473
    if rounds_up(x, r)
6✔
474
        x_t < x ? nextfloat(x_t) : x_t
5✔
475
    else
476
        x_t > x ? prevfloat(x_t) : x_t
2✔
477
    end
478
end
479

480
## floating point promotions ##
481
promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
×
482
promote_rule(::Type{Float64}, ::Type{Float16}) = Float64
×
483
promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
×
484

485
widen(::Type{Float16}) = Float32
×
486
widen(::Type{Float32}) = Float64
×
487

488
## floating point arithmetic ##
489
-(x::IEEEFloat) = neg_float(x)
418,895,474✔
490

491
+(x::T, y::T) where {T<:IEEEFloat} = add_float(x, y)
657,200,974✔
492
-(x::T, y::T) where {T<:IEEEFloat} = sub_float(x, y)
1,284,239,192✔
493
*(x::T, y::T) where {T<:IEEEFloat} = mul_float(x, y)
2,147,483,647✔
494
/(x::T, y::T) where {T<:IEEEFloat} = div_float(x, y)
901,239,219✔
495

496
muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z)
824,806,367✔
497

498
# TODO: faster floating point div?
499
# TODO: faster floating point fld?
500
# TODO: faster floating point mod?
501

502
function unbiased_exponent(x::T) where {T<:IEEEFloat}
503
    return (reinterpret(Unsigned, x) & exponent_mask(T)) >> significand_bits(T)
3,618,784✔
504
end
505

506
function explicit_mantissa_noinfnan(x::T) where {T<:IEEEFloat}
507
    m = mantissa(x)
3,618,784✔
508
    issubnormal(x) || (m |= significand_mask(T) + uinttype(T)(1))
7,237,528✔
509
    return m
3,618,784✔
510
end
511

512
function _to_float(number::U, ep) where {U<:Unsigned}
513
    F = floattype(U)
366✔
514
    S = signed(U)
366✔
515
    epint = unsafe_trunc(S,ep)
1,793,154✔
516
    lz::signed(U) = unsafe_trunc(S, Core.Intrinsics.ctlz_int(number) - U(exponent_bits(F)))
1,793,154✔
517
    number <<= lz
1,793,154✔
518
    epint -= lz
1,793,154✔
519
    bits = U(0)
366✔
520
    if epint >= 0
1,793,154✔
521
        bits = number & significand_mask(F)
1,793,138✔
522
        bits |= ((epint + S(1)) << significand_bits(F)) & exponent_mask(F)
1,793,138✔
523
    else
524
        bits = (number >> -epint) & significand_mask(F)
16✔
525
    end
526
    return reinterpret(F, bits)
1,793,154✔
527
end
528

529
@assume_effects :terminates_locally :nothrow function rem_internal(x::T, y::T) where {T<:IEEEFloat}
3,183,824✔
530
    xuint = reinterpret(Unsigned, x)
3,183,836✔
531
    yuint = reinterpret(Unsigned, y)
3,183,836✔
532
    if xuint <= yuint
3,183,836✔
533
        if xuint < yuint
1,374,444✔
534
            return x
1,368,753✔
535
        end
536
        return zero(T)
5,691✔
537
    end
538

539
    e_x = unbiased_exponent(x)
1,809,392✔
540
    e_y = unbiased_exponent(y)
1,809,392✔
541
    # Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH
542
    if e_y > (significand_bits(T)) && (e_x - e_y) <= (exponent_bits(T))
1,809,392✔
543
        m_x = explicit_mantissa_noinfnan(x)
2,788,340✔
544
        m_y = explicit_mantissa_noinfnan(y)
2,788,340✔
545
        d = urem_int((m_x << (e_x - e_y)),  m_y)
1,394,170✔
546
        iszero(d) && return zero(T)
1,394,170✔
547
        return _to_float(d, e_y - uinttype(T)(1))
1,378,217✔
548
    end
549
    # Both are subnormals
550
    if e_x == 0 && e_y == 0
415,222✔
551
        return reinterpret(T, urem_int(xuint, yuint) & significand_mask(T))
×
552
    end
553

554
    m_x = explicit_mantissa_noinfnan(x)
830,432✔
555
    e_x -= uinttype(T)(1)
415,222✔
556
    m_y = explicit_mantissa_noinfnan(y)
830,416✔
557
    lz_m_y = uinttype(T)(exponent_bits(T))
44✔
558
    if e_y > 0
415,222✔
559
        e_y -= uinttype(T)(1)
415,198✔
560
    else
561
        m_y = mantissa(y)
24✔
562
        lz_m_y = Core.Intrinsics.ctlz_int(m_y)
24✔
563
    end
564

565
    tz_m_y = Core.Intrinsics.cttz_int(m_y)
415,222✔
566
    sides_zeroes_cnt = lz_m_y + tz_m_y
415,222✔
567

568
    # n>0
569
    exp_diff = e_x - e_y
415,222✔
570
    # Shift hy right until the end or n = 0
571
    right_shift = min(exp_diff, tz_m_y)
415,222✔
572
    m_y >>= right_shift
415,222✔
573
    exp_diff -= right_shift
415,222✔
574
    e_y += right_shift
415,222✔
575
    # Shift hx left until the end or n = 0
576
    left_shift = min(exp_diff, uinttype(T)(exponent_bits(T)))
415,222✔
577
    m_x <<= left_shift
415,222✔
578
    exp_diff -= left_shift
415,222✔
579

580
    m_x = urem_int(m_x, m_y)
415,222✔
581
    iszero(m_x) && return zero(T)
415,222✔
582
    iszero(exp_diff) && return _to_float(m_x, e_y)
414,937✔
583

584
    while exp_diff > sides_zeroes_cnt
402,808✔
585
        exp_diff -= sides_zeroes_cnt
1,215✔
586
        m_x <<= sides_zeroes_cnt
1,215✔
587
        m_x = urem_int(m_x, m_y)
1,215✔
588
    end
1,215✔
589
    m_x <<= exp_diff
401,593✔
590
    m_x = urem_int(m_x, m_y)
401,593✔
591
    return _to_float(m_x, e_y)
401,601✔
592
end
593

594
function rem(x::T, y::T) where {T<:IEEEFloat}
3,056✔
595
    if isfinite(x) && !iszero(x) && isfinite(y) && !iszero(y)
3,192,858✔
596
        return copysign(rem_internal(abs(x), abs(y)), x)
3,183,854✔
597
    elseif isinf(x) || isnan(y) || iszero(y)  # y can still be Inf
18,011✔
598
        return T(NaN)
41✔
599
    else
600
        return x
8,971✔
601
    end
602
end
603

604
function mod(x::T, y::T) where {T<:AbstractFloat}
5,072✔
605
    r = rem(x,y)
126,085✔
606
    if r == 0
121,612✔
607
        copysign(r,y)
15,928✔
608
    elseif (r > 0) ⊻ (y > 0)
105,684✔
609
        r+y
28,655✔
610
    else
611
        r
820✔
612
    end
613
end
614

615
## floating point comparisons ##
616
==(x::T, y::T) where {T<:IEEEFloat} = eq_float(x, y)
425,029,608✔
617
!=(x::T, y::T) where {T<:IEEEFloat} = ne_float(x, y)
2,147,483,647✔
618
<( x::T, y::T) where {T<:IEEEFloat} = lt_float(x, y)
166,089,404✔
619
<=(x::T, y::T) where {T<:IEEEFloat} = le_float(x, y)
144,807,377✔
620

621
isequal(x::T, y::T) where {T<:IEEEFloat} = fpiseq(x, y)
3,466,460✔
622

623
# interpret as sign-magnitude integer
624
@inline function _fpint(x)
6✔
625
    IntT = inttype(typeof(x))
69,430✔
626
    ix = reinterpret(IntT, x)
90,525,189✔
627
    return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix)
90,525,189✔
628
end
629

630
@inline function isless(a::T, b::T) where T<:IEEEFloat
70✔
631
    (isnan(a) || isnan(b)) && return !isnan(a)
90,785,237✔
632

633
    return _fpint(a) < _fpint(b)
45,377,332✔
634
end
635

636
# Exact Float (Tf) vs Integer (Ti) comparisons
637
# Assumes:
638
# - typemax(Ti) == 2^n-1
639
# - typemax(Ti) can't be exactly represented by Tf:
640
#   => Tf(typemax(Ti)) == 2^n or Inf
641
# - typemin(Ti) can be exactly represented by Tf
642
#
643
# 1. convert y::Ti to float fy::Tf
644
# 2. perform Tf comparison x vs fy
645
# 3. if x == fy, check if (1) resulted in rounding:
646
#  a. convert fy back to Ti and compare with original y
647
#  b. unsafe_convert undefined behaviour if fy == Tf(typemax(Ti))
648
#     (but consequently x == fy > y)
649
for Ti in (Int64,UInt64,Int128,UInt128)
650
    for Tf in (Float32,Float64)
651
        @eval begin
652
            function ==(x::$Tf, y::$Ti)
228,418✔
653
                fy = ($Tf)(y)
6,196,739✔
654
                (x == fy) & (fy != $(Tf(typemax(Ti)))) & (y == unsafe_trunc($Ti,fy))
7,282,016✔
655
            end
656
            ==(y::$Ti, x::$Tf) = x==y
285,440✔
657

658
            function <(x::$Ti, y::$Tf)
5,596✔
659
                fx = ($Tf)(x)
39,310,053✔
660
                (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x < unsafe_trunc($Ti,fx)) ))
39,415,845✔
661
            end
662
            function <=(x::$Ti, y::$Tf)
12,966✔
663
                fx = ($Tf)(x)
224,820✔
664
                (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x <= unsafe_trunc($Ti,fx)) ))
469,273✔
665
            end
666

667
            function <(x::$Tf, y::$Ti)
15,306✔
668
                fy = ($Tf)(y)
825,223✔
669
                (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) < y))
1,117,488✔
670
            end
671
            function <=(x::$Tf, y::$Ti)
10,432✔
672
                fy = ($Tf)(y)
26,108✔
673
                (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) <= y))
26,843✔
674
            end
675
        end
676
    end
677
end
678
for op in (:(==), :<, :<=)
679
    @eval begin
680
        ($op)(x::Float16, y::Union{Int128,UInt128,Int64,UInt64}) = ($op)(Float64(x), Float64(y))
2,702,109✔
681
        ($op)(x::Union{Int128,UInt128,Int64,UInt64}, y::Float16) = ($op)(Float64(x), Float64(y))
18,472✔
682

683
        ($op)(x::Union{Float16,Float32}, y::Union{Int32,UInt32}) = ($op)(Float64(x), Float64(y))
246,722✔
684
        ($op)(x::Union{Int32,UInt32}, y::Union{Float16,Float32}) = ($op)(Float64(x), Float64(y))
611✔
685

686
        ($op)(x::Float16, y::Union{Int16,UInt16}) = ($op)(Float32(x), Float32(y))
280✔
687
        ($op)(x::Union{Int16,UInt16}, y::Float16) = ($op)(Float32(x), Float32(y))
278✔
688
    end
689
end
690

691

692
abs(x::IEEEFloat) = abs_float(x)
159,280,889✔
693

694
"""
695
    isnan(f) -> Bool
696

697
Test whether a number value is a NaN, an indeterminate value which is neither an infinity
698
nor a finite number ("not a number").
699

700
See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref).
701
"""
702
isnan(x::AbstractFloat) = (x != x)::Bool
2,147,483,647✔
703
isnan(x::Number) = false
×
704

705
isfinite(x::AbstractFloat) = !isnan(x - x)
626,905,642✔
706
isfinite(x::Real) = decompose(x)[3] != 0
106,200✔
707
isfinite(x::Integer) = true
×
708

709
"""
710
    isinf(f) -> Bool
711

712
Test whether a number is infinite.
713

714
See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
715
"""
716
isinf(x::Real) = !isnan(x) & !isfinite(x)
140,799✔
717
isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf)
40,766,836✔
718

719
const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
720
function hash(x::Float64, h::UInt)
239✔
721
    # see comments on trunc and hash(Real, UInt)
722
    if typemin(Int64) <= x < typemax(Int64)
75,412✔
723
        xi = fptosi(Int64, x)
75,252✔
724
        if isequal(xi, x)
75,252✔
725
            return hash(xi, h)
24,056✔
726
        end
727
    elseif typemin(UInt64) <= x < typemax(UInt64)
160✔
728
        xu = fptoui(UInt64, x)
94✔
729
        if isequal(xu, x)
94✔
730
            return hash(xu, h)
94✔
731
        end
732
    elseif isnan(x)
66✔
733
        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
66✔
734
    end
735
    return hash_uint64(bitcast(UInt64, x)) - 3h
51,274✔
736
end
737

738
hash(x::Float32, h::UInt) = hash(Float64(x), h)
6,666✔
739

740
function hash(x::Float16, h::UInt)
741
    # see comments on trunc and hash(Real, UInt)
742
    if isfinite(x) # all finite Float16 fit in Int64
7✔
743
        xi = fptosi(Int64, x)
7✔
744
        if isequal(xi, x)
7✔
745
            return hash(xi, h)
7✔
746
        end
747
    elseif isnan(x)
×
748
        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
×
749
    end
UNCOV
750
    return hash_uint64(bitcast(UInt64, Float64(x))) - 3h
×
751
end
752

753
## generic hashing for rational values ##
754
function hash(x::Real, h::UInt)
15,058✔
755
    # decompose x as num*2^pow/den
756
    num, pow, den = decompose(x)
5,446✔
757

758
    # handle special values
759
    num == 0 && den == 0 && return hash(NaN, h)
15,058✔
760
    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
15,058✔
761
    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
5,060✔
762

763
    # normalize decomposition
764
    if den < 0
5,060✔
765
        num = -num
908✔
766
        den = -den
908✔
767
    end
768
    num_z = trailing_zeros(num)
14,804✔
769
    num >>= num_z
17,466✔
770
    den_z = trailing_zeros(den)
5,060✔
771
    den >>= den_z
5,063✔
772
    pow += num_z - den_z
14,804✔
773
    # If the real can be represented as an Int64, UInt64, or Float64, hash as those types.
774
    # To be an Integer the denominator must be 1 and the power must be non-negative.
775
    if den == 1
5,060✔
776
        # left = ceil(log2(num*2^pow))
777
        left = top_set_bit(abs(num)) + pow
18,266✔
778
        # 2^-1074 is the minimum Float64 so if the power is smaller, not a Float64
779
        if -1074 <= pow
14,801✔
780
            if 0 <= pow # if pow is non-negative, it is an integer
14,801✔
781
                left <= 63 && return hash(Int64(num) << Int(pow), h)
14,694✔
782
                left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h)
10,196✔
783
            end # typemin(Int64) handled by Float64 case
784
            # 2^1024 is the maximum Float64 so if the power is greater, not a Float64
785
            # Float64s only have 53 mantisa bits (including implicit bit)
786
            left <= 1024 && left - pow <= 53 && return hash(ldexp(Float64(num), pow), h)
10,235✔
787
        end
788
    else
789
        h = hash_integer(den, h)
3✔
790
    end
791
    # handle generic rational values
792
    h = hash_integer(pow, h)
10,025✔
793
    h = hash_integer(num, h)
10,028✔
794
    return h
10,025✔
795
end
796

797
#=
798
`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
799

800
The decompose function is the point where rational-valued numeric types that support
801
hashing hook into the hashing protocol. `decompose(x)` should return three integer
802
values `num, pow, den`, such that the value of `x` is mathematically equal to
803

804
    num*2^pow/den
805

806
The decomposition need not be canonical in the sense that it just needs to be *some*
807
way to express `x` in this form, not any particular way – with the restriction that
808
`num` and `den` may not share any odd common factors. They may, however, have powers
809
of two in common – the generic hashing code will normalize those as necessary.
810

811
Special values:
812

813
 - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
814
 - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
815
 - `x` is not a number: `num` and `den` should both be zero
816
=#
817

818
decompose(x::Integer) = x, 0, 1
1,154✔
819

820
function decompose(x::Float16)::NTuple{3,Int}
821
    isnan(x) && return 0, 0, 0
132✔
822
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
132✔
823
    n = reinterpret(UInt16, x)
132✔
824
    s = (n & 0x03ff) % Int16
132✔
825
    e = ((n & 0x7c00) >> 10) % Int
132✔
826
    s |= Int16(e != 0) << 10
132✔
827
    d = ifelse(signbit(x), -1, 1)
132✔
828
    s, e - 25 + (e == 0), d
132✔
829
end
830

831
function decompose(x::Float32)::NTuple{3,Int}
832
    isnan(x) && return 0, 0, 0
206✔
833
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
206✔
834
    n = reinterpret(UInt32, x)
198✔
835
    s = (n & 0x007fffff) % Int32
198✔
836
    e = ((n & 0x7f800000) >> 23) % Int
198✔
837
    s |= Int32(e != 0) << 23
198✔
838
    d = ifelse(signbit(x), -1, 1)
198✔
839
    s, e - 150 + (e == 0), d
198✔
840
end
841

842
function decompose(x::Float64)::Tuple{Int64, Int, Int}
843
    isnan(x) && return 0, 0, 0
18,862✔
844
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
18,862✔
845
    n = reinterpret(UInt64, x)
18,855✔
846
    s = (n & 0x000fffffffffffff) % Int64
18,855✔
847
    e = ((n & 0x7ff0000000000000) >> 52) % Int
18,855✔
848
    s |= Int64(e != 0) << 52
18,855✔
849
    d = ifelse(signbit(x), -1, 1)
18,855✔
850
    s, e - 1075 + (e == 0), d
18,855✔
851
end
852

853

854
"""
855
    precision(num::AbstractFloat; base::Integer=2)
856
    precision(T::Type; base::Integer=2)
857

858
Get the precision of a floating point number, as defined by the effective number of bits in
859
the significand, or the precision of a floating-point type `T` (its current default, if
860
`T` is a variable-precision type like [`BigFloat`](@ref)).
861

862
If `base` is specified, then it returns the maximum corresponding
863
number of significand digits in that base.
864

865
!!! compat "Julia 1.8"
866
    The `base` keyword requires at least Julia 1.8.
867
"""
868
function precision end
869

870
_precision_with_base_2(::Type{Float16}) = 11
×
871
_precision_with_base_2(::Type{Float32}) = 24
×
872
_precision_with_base_2(::Type{Float64}) = 53
×
873
function _precision(x, base::Integer)
1,487,288✔
874
    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
1,487,314✔
875
    p = _precision_with_base_2(x)
4,442,927✔
876
    return base == 2 ? Int(p) : floor(Int, p / log2(base))
2,970,080✔
877
end
878
precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
5,911,496✔
879
precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
283✔
880

881

882
"""
883
    nextfloat(x::AbstractFloat, n::Integer)
884

885
The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
886
applications of [`prevfloat`](@ref) if `n < 0`.
887
"""
888
function nextfloat(f::IEEEFloat, d::Integer)
471✔
889
    F = typeof(f)
600,989,895✔
890
    fumax = reinterpret(Unsigned, F(Inf))
600,989,895✔
891
    U = typeof(fumax)
600,989,895✔
892

893
    isnan(f) && return f
1,201,254,734✔
894
    fi = reinterpret(Signed, f)
1,201,254,732✔
895
    fneg = fi < 0
1,201,254,732✔
896
    fu = unsigned(fi & typemax(fi))
1,201,254,732✔
897

898
    dneg = d < 0
600,990,085✔
899
    da = uabs(d)
600,990,085✔
900
    if da > typemax(U)
1,201,254,732✔
901
        fneg = dneg
4✔
902
        fu = fumax
4✔
903
    else
904
        du = da % U
600,989,890✔
905
        if fneg ⊻ dneg
1,201,254,728✔
906
            if du > fu
765,222✔
907
                fu = min(fumax, du - fu)
115✔
908
                fneg = !fneg
115✔
909
            else
910
                fu = fu - du
765,107✔
911
            end
912
        else
913
            if fumax - fu < du
1,200,489,506✔
914
                fu = fumax
40✔
915
            else
916
                fu = fu + du
1,200,489,462✔
917
            end
918
        end
919
    end
920
    if fneg
1,201,254,732✔
921
        fu |= sign_mask(F)
536,909✔
922
    end
923
    reinterpret(F, fu)
1,201,254,732✔
924
end
925

926
"""
927
    nextfloat(x::AbstractFloat)
928

929
Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no
930
such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
931

932
See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
933
"""
934
nextfloat(x::AbstractFloat) = nextfloat(x,1)
2,147,483,647✔
935

936
"""
937
    prevfloat(x::AbstractFloat, n::Integer)
938

939
The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n`
940
applications of [`nextfloat`](@ref) if `n < 0`.
941
"""
942
prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d)
261✔
943

944
"""
945
    prevfloat(x::AbstractFloat)
946

947
Return the largest floating point number `y` of the same type as `x` such `y < x`. If no
948
such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
949
"""
950
prevfloat(x::AbstractFloat) = nextfloat(x,-1)
1,087,085✔
951

952
for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
953
    for Tf in (Float16, Float32, Float64)
954
        if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf)
955
            # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound
956
            # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
957
            # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
958
            @eval begin
959
                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
960
                    if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf))
1,113✔
961
                        return unsafe_trunc($Ti,x)
1,113✔
962
                    else
963
                        throw(InexactError(:round, $Ti, x, RoundToZero))
×
964
                    end
965
                end
966
                function (::Type{$Ti})(x::$Tf)
364✔
967
                    # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is,
968
                    # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using
969
                    # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1)
970
                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x)
187,350✔
971
                        return unsafe_trunc($Ti,x)
187,995✔
972
                    else
973
                        throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
356✔
974
                    end
975
                end
976
            end
977
        else
978
            # Here `eps(Tf(typemin(Ti))) > 1`, so the only value which can be truncated to
979
            # `Tf(typemin(Ti)` is itself. Similarly, `Tf(typemax(Ti))` is inexact and will
980
            # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
981
            # these types, but not for `Float16` or larger integer types.
982
            @eval begin
983
                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
984
                    if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
22,070,814✔
985
                        return unsafe_trunc($Ti,x)
22,070,814✔
986
                    else
987
                        throw(InexactError(:round, $Ti, x, RoundToZero))
×
988
                    end
989
                end
990
                function (::Type{$Ti})(x::$Tf)
378✔
991
                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x)
36,307,370✔
992
                        return unsafe_trunc($Ti,x)
36,307,129✔
993
                    else
994
                        throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
240✔
995
                    end
996
                end
997
            end
998
        end
999
    end
1000
end
1001

1002
"""
1003
    issubnormal(f) -> Bool
1004

1005
Test whether a floating point number is subnormal.
1006

1007
An IEEE floating point number is [subnormal](https://en.wikipedia.org/wiki/Subnormal_number)
1008
when its exponent bits are zero and its significand is not zero.
1009

1010
# Examples
1011
```jldoctest
1012
julia> floatmin(Float32)
1013
1.1754944f-38
1014

1015
julia> issubnormal(1.0f-37)
1016
false
1017

1018
julia> issubnormal(1.0f-38)
1019
true
1020
```
1021
"""
1022
function issubnormal(x::T) where {T<:IEEEFloat}
141,127✔
1023
    y = reinterpret(Unsigned, x)
7,470,922✔
1024
    (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
7,470,922✔
1025
end
1026

1027
ispow2(x::AbstractFloat) = !iszero(x) && frexp(x)[1] == 0.5
42✔
1028
iseven(x::AbstractFloat) = isinteger(x) && (abs(x) > maxintfloat(x) || iseven(Integer(x)))
52✔
1029
isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Integer(x))
28✔
1030

1031
@eval begin
1032
    typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
×
1033
    typemax(::Type{Float16}) = $(Inf16)
×
1034
    typemin(::Type{Float32}) = $(-Inf32)
×
1035
    typemax(::Type{Float32}) = $(Inf32)
×
1036
    typemin(::Type{Float64}) = $(-Inf64)
×
1037
    typemax(::Type{Float64}) = $(Inf64)
×
1038
    typemin(x::T) where {T<:Real} = typemin(T)
5,054✔
1039
    typemax(x::T) where {T<:Real} = typemax(T)
601,029,655✔
1040

1041
    floatmin(::Type{Float16}) = $(bitcast(Float16, 0x0400))
×
1042
    floatmin(::Type{Float32}) = $(bitcast(Float32, 0x00800000))
×
1043
    floatmin(::Type{Float64}) = $(bitcast(Float64, 0x0010000000000000))
×
1044
    floatmax(::Type{Float16}) = $(bitcast(Float16, 0x7bff))
×
1045
    floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
×
1046
    floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
×
1047

1048
    eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
×
1049
    eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
×
1050
    eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
×
1051
    eps() = eps(Float64)
531✔
1052
end
1053

1054
eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
1,870✔
1055

1056
function eps(x::T) where T<:IEEEFloat
629,429✔
1057
    # For isfinite(x), toggling the LSB will produce either prevfloat(x) or
1058
    # nextfloat(x) but will never change the sign or exponent.
1059
    # For !isfinite(x), this will map Inf to NaN and NaN to NaN or Inf.
1060
    y = reinterpret(T, reinterpret(Unsigned, x) ⊻ true)
2,279,890✔
1061
    # The absolute difference between these values is eps(x). This is true even
1062
    # for Inf/NaN values.
1063
    return abs(x - y)
2,279,890✔
1064
end
1065

1066
"""
1067
    floatmin(T = Float64)
1068

1069
Return the smallest positive normal number representable by the floating-point
1070
type `T`.
1071

1072
# Examples
1073
```jldoctest
1074
julia> floatmin(Float16)
1075
Float16(6.104e-5)
1076

1077
julia> floatmin(Float32)
1078
1.1754944f-38
1079

1080
julia> floatmin()
1081
2.2250738585072014e-308
1082
```
1083
"""
1084
floatmin(x::T) where {T<:AbstractFloat} = floatmin(T)
1,555,313✔
1085

1086
"""
1087
    floatmax(T = Float64)
1088

1089
Return the largest finite number representable by the floating-point type `T`.
1090

1091
See also: [`typemax`](@ref), [`floatmin`](@ref), [`eps`](@ref).
1092

1093
# Examples
1094
```jldoctest
1095
julia> floatmax(Float16)
1096
Float16(6.55e4)
1097

1098
julia> floatmax(Float32)
1099
3.4028235f38
1100

1101
julia> floatmax()
1102
1.7976931348623157e308
1103

1104
julia> typemax(Float64)
1105
Inf
1106
```
1107
"""
1108
floatmax(x::T) where {T<:AbstractFloat} = floatmax(T)
776,724✔
1109

1110
floatmin() = floatmin(Float64)
16✔
1111
floatmax() = floatmax(Float64)
19✔
1112

1113
"""
1114
    eps(::Type{T}) where T<:AbstractFloat
1115
    eps()
1116

1117
Return the *machine epsilon* of the floating point type `T` (`T = Float64` by
1118
default). This is defined as the gap between 1 and the next largest value representable by
1119
`typeof(one(T))`, and is equivalent to `eps(one(T))`.  (Since `eps(T)` is a
1120
bound on the *relative error* of `T`, it is a "dimensionless" quantity like [`one`](@ref).)
1121

1122
# Examples
1123
```jldoctest
1124
julia> eps()
1125
2.220446049250313e-16
1126

1127
julia> eps(Float32)
1128
1.1920929f-7
1129

1130
julia> 1.0 + eps()
1131
1.0000000000000002
1132

1133
julia> 1.0 + eps()/2
1134
1.0
1135
```
1136
"""
1137
eps(::Type{<:AbstractFloat})
1138

1139
"""
1140
    eps(x::AbstractFloat)
1141

1142
Return the *unit in last place* (ulp) of `x`. This is the distance between consecutive
1143
representable floating point values at `x`. In most cases, if the distance on either side
1144
of `x` is different, then the larger of the two is taken, that is
1145

1146
    eps(x) == max(x-prevfloat(x), nextfloat(x)-x)
1147

1148
The exceptions to this rule are the smallest and largest finite values
1149
(e.g. `nextfloat(-Inf)` and `prevfloat(Inf)` for [`Float64`](@ref)), which round to the
1150
smaller of the values.
1151

1152
The rationale for this behavior is that `eps` bounds the floating point rounding
1153
error. Under the default `RoundNearest` rounding mode, if ``y`` is a real number and ``x``
1154
is the nearest floating point number to ``y``, then
1155

1156
```math
1157
|y-x| \\leq \\operatorname{eps}(x)/2.
1158
```
1159

1160
See also: [`nextfloat`](@ref), [`issubnormal`](@ref), [`floatmax`](@ref).
1161

1162
# Examples
1163
```jldoctest
1164
julia> eps(1.0)
1165
2.220446049250313e-16
1166

1167
julia> eps(prevfloat(2.0))
1168
2.220446049250313e-16
1169

1170
julia> eps(2.0)
1171
4.440892098500626e-16
1172

1173
julia> x = prevfloat(Inf)      # largest finite Float64
1174
1.7976931348623157e308
1175

1176
julia> x + eps(x)/2            # rounds up
1177
Inf
1178

1179
julia> x + prevfloat(eps(x)/2) # rounds down
1180
1.7976931348623157e308
1181
```
1182
"""
1183
eps(::AbstractFloat)
1184

1185

1186
## byte order swaps for arbitrary-endianness serialization/deserialization ##
1187
bswap(x::IEEEFloat) = bswap_int(x)
7✔
1188

1189
# integer size of float
1190
uinttype(::Type{Float64}) = UInt64
×
1191
uinttype(::Type{Float32}) = UInt32
×
1192
uinttype(::Type{Float16}) = UInt16
×
1193
inttype(::Type{Float64}) = Int64
×
1194
inttype(::Type{Float32}) = Int32
×
1195
inttype(::Type{Float16}) = Int16
×
1196
# float size of integer
1197
floattype(::Type{UInt64}) = Float64
×
1198
floattype(::Type{UInt32}) = Float32
×
1199
floattype(::Type{UInt16}) = Float16
×
1200
floattype(::Type{Int64}) = Float64
×
1201
floattype(::Type{Int32}) = Float32
×
1202
floattype(::Type{Int16}) = Float16
×
1203

1204

1205
## Array operations on floating point numbers ##
1206

1207
float(A::AbstractArray{<:AbstractFloat}) = A
2✔
1208

1209
function float(A::AbstractArray{T}) where T
304✔
1210
    if !isconcretetype(T)
327✔
1211
        error("`float` not defined on abstractly-typed arrays; please convert to a more specific type")
×
1212
    end
1213
    convert(AbstractArray{typeof(float(zero(T)))}, A)
334✔
1214
end
1215

1216
float(r::StepRange) = float(r.start):float(r.step):float(last(r))
49✔
1217
float(r::UnitRange) = float(r.start):float(last(r))
49✔
1218
float(r::StepRangeLen{T}) where {T} =
4✔
1219
    StepRangeLen{typeof(float(T(r.ref)))}(float(r.ref), float(r.step), length(r), r.offset)
1220
function float(r::LinRange)
×
1221
    LinRange(float(r.start), float(r.stop), length(r))
×
1222
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc