• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / 1512

22 Apr 2026 06:21AM UTC coverage: 77.882% (-0.08%) from 77.962%
1512

push

buildkite

web-flow
Fix JuliaTaskDispatcher deadlock with std::future callers (#61575)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

65448 of 84035 relevant lines covered (77.88%)

24445346.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.8
/base/float.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
const IEEEFloat = Union{Float16, Float32, Float64}
4

5
import Core: Float16, Float32, Float64, AbstractFloat
6
import Core: Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128
7

8
## floating point traits ##
9

10
"""
11
    Inf16
12

13
Positive infinity of type [`Float16`](@ref).
14
"""
15
const Inf16 = bitcast(Float16, 0x7c00)
16
"""
17
    NaN16
18

19
A not-a-number value of type [`Float16`](@ref).
20

21
See also: [`NaN`](@ref).
22
"""
23
const NaN16 = bitcast(Float16, 0x7e00)
24
"""
25
    Inf32
26

27
Positive infinity of type [`Float32`](@ref).
28
"""
29
const Inf32 = bitcast(Float32, 0x7f800000)
30
"""
31
    NaN32
32

33
A not-a-number value of type [`Float32`](@ref).
34

35
See also: [`NaN`](@ref).
36
"""
37
const NaN32 = bitcast(Float32, 0x7fc00000)
38
const Inf64 = bitcast(Float64, 0x7ff0000000000000)
39
const NaN64 = bitcast(Float64, 0x7ff8000000000000)
40

41
const Inf = Inf64
42
"""
43
    Inf, Inf64
44

45
Positive infinity of type [`Float64`](@ref).
46

47
See also: [`isfinite`](@ref), [`typemax`](@ref), [`NaN`](@ref), [`Inf32`](@ref).
48

49
# Examples
50
```jldoctest
51
julia> π/0
52
Inf
53

54
julia> +1.0 / -0.0
55
-Inf
56

57
julia> ℯ^-Inf
58
0.0
59
```
60
"""
61
Inf, Inf64
62

63
const NaN = NaN64
64
"""
65
    NaN, NaN64
66

67
A not-a-number value of type [`Float64`](@ref).
68

69
See also: [`isnan`](@ref), [`missing`](@ref), [`NaN32`](@ref), [`Inf`](@ref).
70

71
# Examples
72
```jldoctest
73
julia> 0/0
74
NaN
75

76
julia> Inf - Inf
77
NaN
78

79
julia> NaN == NaN, isequal(NaN, NaN), isnan(NaN)
80
(false, true, true)
81
```
82

83
!!! note
84
    Always use [`isnan`](@ref) or [`isequal`](@ref) for checking for `NaN`.
85
    Using `x === NaN` may give unexpected results:
86
    ```jldoctest
87
    julia> reinterpret(UInt32, NaN32)
88
    0x7fc00000
89

90
    julia> NaN32p1 = reinterpret(Float32, 0x7fc00001)
91
    NaN32
92

93
    julia> NaN32p1 === NaN32, isequal(NaN32p1, NaN32), isnan(NaN32p1)
94
    (false, true, true)
95
    ```
96
"""
97
NaN, NaN64
98

99
# bit patterns
100
reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x)
214,373✔
101
reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x)
13,558,434✔
102
reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16, x)
7,495,173✔
103
reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x)
1,800,404,339✔
104
reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x)
1,800,002,421✔
105
reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16, x)
1,758,489✔
106

107
sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
×
108
exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
×
109
exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
×
110
exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
×
111
significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
×
112

113
sign_mask(::Type{Float32}) =        0x8000_0000
×
114
exponent_mask(::Type{Float32}) =    0x7f80_0000
×
115
exponent_one(::Type{Float32}) =     0x3f80_0000
×
116
exponent_half(::Type{Float32}) =    0x3f00_0000
×
117
significand_mask(::Type{Float32}) = 0x007f_ffff
×
118

119
sign_mask(::Type{Float16}) =        0x8000
×
120
exponent_mask(::Type{Float16}) =    0x7c00
×
121
exponent_one(::Type{Float16}) =     0x3c00
×
122
exponent_half(::Type{Float16}) =    0x3800
×
123
significand_mask(::Type{Float16}) = 0x03ff
×
124

125
mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T)
7,704✔
126

127
for T in (Float16, Float32, Float64)
128
    sb = trailing_ones(significand_mask(T))
129
    em = exponent_mask(T)
130
    eb = Int(exponent_one(T) >> sb)
131
    @eval significand_bits(::Type{$T}) = $(sb)
×
132
    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - sb - 1)
×
133
    @eval exponent_bias(::Type{$T}) = $(eb)
×
134
    # maximum float exponent
135
    @eval exponent_max(::Type{$T}) = $(Int(em >> sb) - eb - 1)
×
136
    # maximum float exponent without bias
137
    @eval exponent_raw_max(::Type{$T}) = $(Int(em >> sb))
×
138
end
139

140
"""
141
    exponent_max(T)
142

143
Maximum [`exponent`](@ref) value for a floating point number of type `T`.
144

145
# Examples
146
```jldoctest
147
julia> Base.exponent_max(Float64)
148
1023
149
```
150

151
Note, `exponent_max(T) + 1` is a possible value of the exponent field
152
with bias, which might be used as sentinel value for `Inf` or `NaN`.
153
"""
154
function exponent_max end
155

156
"""
157
    exponent_raw_max(T)
158

159
Maximum value of the [`exponent`](@ref) field for a floating point number of type `T` without bias,
160
i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
161
"""
162
function exponent_raw_max end
163

164
"""
165
IEEE 754 definition of the minimum exponent.
166
"""
167
ieee754_exponent_min(::Type{T}) where {T<:IEEEFloat} = Int(1 - exponent_max(T))::Int
2,883✔
168

169
exponent_min(::Type{Float16}) = ieee754_exponent_min(Float16)
×
170
exponent_min(::Type{Float32}) = ieee754_exponent_min(Float32)
1,443✔
171
exponent_min(::Type{Float64}) = ieee754_exponent_min(Float64)
1,440✔
172

173
function ieee754_representation(
174
    ::Type{F}, sign_bit::Bool, exponent_field::Integer, significand_field::Integer
175
) where {F<:IEEEFloat}
176
    T = uinttype(F)
2,883✔
177
    ret::T = sign_bit
149,314,385✔
178
    ret <<= exponent_bits(F)
149,314,385✔
179
    ret |= exponent_field
149,314,385✔
180
    ret <<= significand_bits(F)
149,314,385✔
181
    ret |= significand_field
149,314,385✔
182
end
183

184
# ±floatmax(T)
185
function ieee754_representation(
186
    ::Type{F}, sign_bit::Bool, ::Val{:omega}
187
) where {F<:IEEEFloat}
188
    ieee754_representation(F, sign_bit, exponent_raw_max(F) - 1, significand_mask(F))
21,237,883✔
189
end
190

191
# NaN or an infinity
192
function ieee754_representation(
193
    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:nan}
194
) where {F<:IEEEFloat}
195
    ieee754_representation(F, sign_bit, exponent_raw_max(F), significand_field)
26,292,963✔
196
end
197

198
# NaN with default payload
199
function ieee754_representation(
200
    ::Type{F}, sign_bit::Bool, ::Val{:nan}
201
) where {F<:IEEEFloat}
202
    ieee754_representation(F, sign_bit, one(uinttype(F)) << (significand_bits(F) - 1), Val(:nan))
×
203
end
204

205
# Infinity
206
function ieee754_representation(
207
    ::Type{F}, sign_bit::Bool, ::Val{:inf}
208
) where {F<:IEEEFloat}
209
    ieee754_representation(F, sign_bit, false, Val(:nan))
26,292,963✔
210
end
211

212
# Subnormal or zero
213
function ieee754_representation(
214
    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:subnormal}
215
) where {F<:IEEEFloat}
216
    ieee754_representation(F, sign_bit, false, significand_field)
25,159,074✔
217
end
218

219
# Zero
220
function ieee754_representation(
221
    ::Type{F}, sign_bit::Bool, ::Val{:zero}
222
) where {F<:IEEEFloat}
223
    ieee754_representation(F, sign_bit, false, Val(:subnormal))
25,159,074✔
224
end
225

226
"""
227
    Base.uabs(x::Integer)
228

229
Return the absolute value of `x`, possibly returning a different type should the
230
operation be susceptible to overflow. This typically arises when `x` is a two's complement
231
signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the result of
232
`uabs(x)` will be an unsigned integer of the same size.
233
"""
234
uabs(x::Integer) = abs(x)
3,463,254✔
235
uabs(x::BitSigned) = unsigned(abs(x))
12,685,494✔
236

237
## conversions to floating-point ##
238

239
# TODO: deprecate in 2.0
240
Float16(x::Integer) = convert(Float16, convert(Float32, x)::Float32)
×
241

242
for t1 in (Float16, Float32, Float64)
243
    for st in (Int8, Int16, Int32, Int64)
244
        @eval begin
245
            (::Type{$t1})(x::($st)) = sitofp($t1, x)
644,930,997✔
246
            promote_rule(::Type{$t1}, ::Type{$st}) = $t1
×
247
        end
248
    end
249
    for ut in (Bool, UInt8, UInt16, UInt32, UInt64)
250
        @eval begin
251
            (::Type{$t1})(x::($ut)) = uitofp($t1, x)
482,195,368✔
252
            promote_rule(::Type{$t1}, ::Type{$ut}) = $t1
×
253
        end
254
    end
255
end
256

257
promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
×
258
promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
×
259
promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
×
260
promote_rule(::Type{Float32}, ::Type{Int128}) = Float32
×
261
promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16
×
262
promote_rule(::Type{Float16}, ::Type{Int128}) = Float16
×
263

264
function Float64(x::UInt128)
265
    if x < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
62,724✔
266
        low_exp = 0x1p52
×
267
        high_exp = 0x1p104
×
268
        low_bits = (x % UInt64) & Base.significand_mask(Float64)
2,637✔
269
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
2,637✔
270
        high_bits = ((x >> 52) % UInt64)
2,637✔
271
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
2,637✔
272
        low_value + high_value
2,637✔
273
    else # Large enough that low bits only affect rounding, pack low bits
274
        low_exp = 0x1p76
×
275
        high_exp = 0x1p128
×
276
        low_bits = ((x >> 12) % UInt64) >> 12 | (x % UInt64) & 0xFFFFFF
60,039✔
277
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
60,039✔
278
        high_bits = ((x >> 76) % UInt64)
60,039✔
279
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
60,039✔
280
        low_value + high_value
60,039✔
281
    end
282
end
283

284
function Float64(x::Int128)
285
    sign_bit = ((x >> 127) % UInt64) << 63
10,482,018✔
286
    ux = uabs(x)
12,435,966✔
287
    if ux < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
12,435,966✔
288
        low_exp = 0x1p52
×
289
        high_exp = 0x1p104
×
290
        low_bits = (ux % UInt64) & Base.significand_mask(Float64)
10,421,967✔
291
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
10,421,967✔
292
        high_bits = ((ux >> 52) % UInt64)
10,421,967✔
293
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
10,421,967✔
294
        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
10,421,967✔
295
    else # Large enough that low bits only affect rounding, pack low bits
296
        low_exp = 0x1p76
×
297
        high_exp = 0x1p128
×
298
        low_bits = ((ux >> 12) % UInt64) >> 12 | (ux % UInt64) & 0xFFFFFF
60,051✔
299
        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
60,051✔
300
        high_bits = ((ux >> 76) % UInt64)
60,051✔
301
        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
60,051✔
302
        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
60,051✔
303
    end
304
end
305

306
function Float32(x::UInt128)
307
    x == 0 && return 0f0
942✔
308
    n = top_set_bit(x) # ndigits0z(x,2)
900✔
309
    if n <= 24
900✔
310
        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
900✔
311
    else
312
        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
×
313
        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
×
314
        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
×
315
    end
316
    d = ((n+126) % UInt32) << 23
900✔
317
    reinterpret(Float32, d + y)
900✔
318
end
319

320
function Float32(x::Int128)
321
    x == 0 && return 0f0
942✔
322
    s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
900✔
323
    x = abs(x) % UInt128
900✔
324
    n = top_set_bit(x) # ndigits0z(x,2)
900✔
325
    if n <= 24
900✔
326
        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
900✔
327
    else
328
        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
×
329
        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
×
330
        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
×
331
    end
332
    d = ((n+126) % UInt32) << 23
900✔
333
    reinterpret(Float32, s | d + y)
900✔
334
end
335

336
# TODO: optimize
337
Float16(x::UInt128) = convert(Float16, Float64(x))
105✔
338
Float16(x::Int128)  = convert(Float16, Float64(x))
108✔
339

340
Float16(x::Float32) = fptrunc(Float16, x)
8,001,607✔
341
Float16(x::Float64) = fptrunc(Float16, x)
41,109✔
342
Float32(x::Float64) = fptrunc(Float32, x)
1,342,076,815✔
343

344
Float32(x::Float16) = fpext(Float32, x)
19,596,635✔
345
Float64(x::Float32) = fpext(Float64, x)
1,289,673,810✔
346
Float64(x::Float16) = fpext(Float64, x)
8,570,639✔
347

348
AbstractFloat(x::Bool)    = Float64(x)
3,031,368✔
349
AbstractFloat(x::Int8)    = Float64(x)
1,337✔
350
AbstractFloat(x::Int16)   = Float64(x)
312✔
351
AbstractFloat(x::Int32)   = Float64(x)
197,896✔
352
AbstractFloat(x::Int64)   = Float64(x) # LOSSY
35,617,699✔
353
AbstractFloat(x::Int128)  = Float64(x) # LOSSY
2,348,499✔
354
AbstractFloat(x::UInt8)   = Float64(x)
24,789✔
355
AbstractFloat(x::UInt16)  = Float64(x)
156✔
356
AbstractFloat(x::UInt32)  = Float64(x)
213✔
357
AbstractFloat(x::UInt64)  = Float64(x) # LOSSY
2,561✔
358
AbstractFloat(x::UInt128) = Float64(x) # LOSSY
138✔
359

360
Bool(x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
15✔
361

362
"""
363
    float(x)
364

365
Convert a number or array to a floating point data type.
366

367
See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref).
368

369
# Examples
370
```jldoctest
371
julia> float(typemax(Int32))
372
2.147483647e9
373
```
374
"""
375
float(x) = AbstractFloat(x)
56,910,663✔
376

377
"""
378
    float(T::Type)
379

380
Return an appropriate type to represent a value of type `T` as a floating point value.
381
Equivalent to `typeof(float(zero(T)))`.
382

383
# Examples
384
```jldoctest
385
julia> float(Complex{Int})
386
ComplexF64 (alias for Complex{Float64})
387

388
julia> float(Int)
389
Float64
390
```
391
"""
392
float(::Type{T}) where {T<:Number} = typeof(float(zero(T)))
29,079✔
393
float(::Type{T}) where {T<:AbstractFloat} = T
11,826✔
394
float(::Type{Union{}}, slurp...) = Union{}
3✔
395

396
"""
397
    unsafe_trunc(T, x)
398

399
Return the nearest integral value of type `T` whose absolute value is
400
less than or equal to the absolute value of `x`. If the value is not representable by `T`,
401
an arbitrary value will be returned.
402
See also [`trunc`](@ref).
403

404
# Examples
405
```jldoctest
406
julia> unsafe_trunc(Int, -2.2)
407
-2
408

409
julia> unsafe_trunc(Int, NaN) isa Int
410
true
411
```
412
"""
413
function unsafe_trunc end
414

415
for Ti in (Int8, Int16, Int32, Int64)
416
    @eval begin
417
        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptosi($Ti, x)
62,077,438✔
418
    end
419
end
420
for Ti in (UInt8, UInt16, UInt32, UInt64)
421
    @eval begin
422
        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptoui($Ti, x)
156,219,228✔
423
    end
424
end
425

426
function unsafe_trunc(::Type{UInt128}, x::Float64)
427
    xu = reinterpret(UInt64,x)
1,956,732✔
428
    # use `% Int` instead of `Int(...)` to preserve `:nothrow` (the shifted value
429
    # fits in 11 bits, but `Int(::UInt64)` would otherwise add a bounds check)
430
    k = ((xu >> 52) % Int) & 0x07ff - 1075
1,956,732✔
431
    xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000
1,956,732✔
432
    if k <= 0
1,956,732✔
433
        UInt128(xu >> -k)
1,956,681✔
434
    else
435
        UInt128(xu) << k
51✔
436
    end
437
end
438
function unsafe_trunc(::Type{Int128}, x::Float64)
439
    copysign(unsafe_trunc(UInt128,x) % Int128, x)
1,955,457✔
440
end
441

442
function unsafe_trunc(::Type{UInt128}, x::Float32)
443
    xu = reinterpret(UInt32,x)
1,800✔
444
    k = Int(xu >> 23) & 0x00ff - 150
1,800✔
445
    xu = (xu & 0x007f_ffff) | 0x0080_0000
1,800✔
446
    if k <= 0
1,800✔
447
        UInt128(xu >> -k)
1,770✔
448
    else
449
        UInt128(xu) << k
30✔
450
    end
451
end
452
function unsafe_trunc(::Type{Int128}, x::Float32)
453
    copysign(unsafe_trunc(UInt128,x) % Int128, x)
912✔
454
end
455

456
unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x))
39✔
457
unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
30✔
458

459
# matches convert methods
460
# also determines trunc, floor, ceil
461
round(::Type{Signed},   x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
×
462
round(::Type{Unsigned}, x::IEEEFloat, r::RoundingMode) = round(UInt, x, r)
×
463
round(::Type{Integer},  x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
13,701✔
464

465
round(x::IEEEFloat, ::RoundingMode{:ToZero})  = trunc_llvm(x)
30,693,641✔
466
round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
1,429,332✔
467
round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
1,995,010✔
468
round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
25,272,925✔
469

470
rounds_up(x, ::RoundingMode{:Down}) = false
×
471
rounds_up(x, ::RoundingMode{:Up}) = true
×
472
rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x)
135✔
473
rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x)
66✔
474
function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat}
36✔
475
    x_t = convert(T, x_integer)
351✔
476
    if rounds_up(x, r)
348✔
477
        x_t < x ? nextfloat(x_t) : x_t
180✔
478
    else
479
        x_t > x ? prevfloat(x_t) : x_t
174✔
480
    end
481
end
482

483
## floating point promotions ##
484
promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
×
485
promote_rule(::Type{Float64}, ::Type{Float16}) = Float64
593,938✔
486
promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
117✔
487

488
widen(::Type{Float16}) = Float32
×
489
widen(::Type{Float32}) = Float64
×
490

491
## floating point arithmetic ##
492
-(x::IEEEFloat) = neg_float(x)
58,445,858✔
493

494
+(x::T, y::T) where {T<:IEEEFloat} = add_float(x, y)
1,263,660,676✔
495
-(x::T, y::T) where {T<:IEEEFloat} = sub_float(x, y)
580,969,741✔
496
*(x::T, y::T) where {T<:IEEEFloat} = mul_float(x, y)
4,212,192,146✔
497
/(x::T, y::T) where {T<:IEEEFloat} = div_float(x, y)
1,957,873,156✔
498

499
muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z)
36,453,012✔
500

501
# TODO: faster floating point div?
502
# TODO: faster floating point fld?
503
# TODO: faster floating point mod?
504

505
function unbiased_exponent(x::T) where {T<:IEEEFloat}
506
    return (reinterpret(Unsigned, x) & exponent_mask(T)) >> significand_bits(T)
7,704✔
507
end
508

509
function explicit_mantissa_noinfnan(x::T) where {T<:IEEEFloat}
510
    m = mantissa(x)
7,704✔
511
    issubnormal(x) || (m |= significand_mask(T) + uinttype(T)(1))
15,408✔
512
    return m
7,704✔
513
end
514

515
function _to_float(number::U, ep) where {U<:Unsigned}
516
    F = floattype(U)
3,851✔
517
    S = signed(U)
3,851✔
518
    epint = unsafe_trunc(S,ep)
3,851✔
519
    lz::signed(U) = unsafe_trunc(S, Core.Intrinsics.ctlz_int(number) - U(exponent_bits(F)))
3,851✔
520
    number <<= lz
3,851✔
521
    epint -= lz
3,851✔
522
    bits = U(0)
3,851✔
523
    if epint >= 0
3,851✔
524
        bits = number & significand_mask(F)
3,851✔
525
        bits |= ((epint + S(1)) << significand_bits(F)) & exponent_mask(F)
3,851✔
526
    else
527
        bits = (number >> -epint) & significand_mask(F)
×
528
    end
529
    return reinterpret(F, bits)
3,851✔
530
end
531

532
function rem_internal(x::T, y::T) where {T<:IEEEFloat}
3,855✔
533
    @_terminates_locally_meta
3,855✔
534
    xuint = reinterpret(Unsigned, x)
3,855✔
535
    yuint = reinterpret(Unsigned, y)
3,855✔
536
    if xuint <= yuint
3,855✔
537
        if xuint < yuint
3✔
538
            return x
3✔
539
        end
540
        return zero(T)
×
541
    end
542

543
    e_x = unbiased_exponent(x)
3,852✔
544
    e_y = unbiased_exponent(y)
3,852✔
545
    # Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH
546
    if e_y > (significand_bits(T)) && (e_x - e_y) <= (exponent_bits(T))
3,852✔
547
        m_x = explicit_mantissa_noinfnan(x)
7,702✔
548
        m_y = explicit_mantissa_noinfnan(y)
7,702✔
549
        d = urem_int((m_x << (e_x - e_y)),  m_y)
3,851✔
550
        iszero(d) && return zero(T)
3,851✔
551
        return _to_float(d, e_y - uinttype(T)(1))
3,850✔
552
    end
553
    # Both are subnormals
554
    if e_x == 0 && e_y == 0
1✔
555
        return reinterpret(T, urem_int(xuint, yuint) & significand_mask(T))
×
556
    end
557

558
    m_x = explicit_mantissa_noinfnan(x)
2✔
559
    e_x -= uinttype(T)(1)
1✔
560
    m_y = explicit_mantissa_noinfnan(y)
2✔
561
    lz_m_y = uinttype(T)(exponent_bits(T))
1✔
562
    if e_y > 0
1✔
563
        e_y -= uinttype(T)(1)
1✔
564
    else
565
        m_y = mantissa(y)
×
566
        lz_m_y = Core.Intrinsics.ctlz_int(m_y)
×
567
    end
568

569
    tz_m_y = Core.Intrinsics.cttz_int(m_y)
1✔
570
    sides_zeroes_cnt = lz_m_y + tz_m_y
1✔
571

572
    # n>0
573
    exp_diff = e_x - e_y
1✔
574
    # Shift hy right until the end or n = 0
575
    right_shift = min(exp_diff, tz_m_y)
1✔
576
    m_y >>= right_shift
1✔
577
    exp_diff -= right_shift
1✔
578
    e_y += right_shift
1✔
579
    # Shift hx left until the end or n = 0
580
    left_shift = min(exp_diff, uinttype(T)(exponent_bits(T)))
1✔
581
    m_x <<= left_shift
1✔
582
    exp_diff -= left_shift
1✔
583

584
    m_x = urem_int(m_x, m_y)
1✔
585
    iszero(m_x) && return zero(T)
1✔
586
    iszero(exp_diff) && return _to_float(m_x, e_y)
1✔
587

588
    while exp_diff > sides_zeroes_cnt
×
589
        exp_diff -= sides_zeroes_cnt
×
590
        m_x <<= sides_zeroes_cnt
×
591
        m_x = urem_int(m_x, m_y)
×
592
    end
×
593
    m_x <<= exp_diff
×
594
    m_x = urem_int(m_x, m_y)
×
595
    return _to_float(m_x, e_y)
×
596
end
597

598
function rem(x::T, y::T) where {T<:IEEEFloat}
599
    if isfinite(x) && !iszero(x) && isfinite(y) && !iszero(y)
1,872,671✔
600
        return copysign(rem_internal(abs(x), abs(y)), x)
1,847,611✔
601
    elseif isinf(x) || isnan(y) || iszero(y)  # y can still be Inf
49,386✔
602
        return T(NaN)
108✔
603
    else
604
        return x
24,952✔
605
    end
606
end
607

608
function mod(x::T, y::T) where T<:AbstractFloat
15,258✔
609
    if isinf(y) && isfinite(x)
228,878✔
610
        return x
72✔
611
    end
612
    r = rem(x,y)
242,091✔
613
    if r == 0
228,806✔
614
        copysign(r,y)
46,223✔
615
    elseif (r > 0) ⊻ (y > 0)
182,583✔
616
        r+y
81,357✔
617
    else
618
        r
1,462✔
619
    end
620
end
621

622
## floating point comparisons ##
623
==(x::T, y::T) where {T<:IEEEFloat} = eq_float(x, y)
5,175,580,998✔
624
<( x::T, y::T) where {T<:IEEEFloat} = lt_float(x, y)
518,887,393✔
625
<=(x::T, y::T) where {T<:IEEEFloat} = le_float(x, y)
230,947,068✔
626

627
isequal(x::T, y::T) where {T<:IEEEFloat} = fpiseq(x, y)
3,609,637✔
628

629
# interpret as sign-magnitude integer
630
function _fpint(x)
631
    @inline
504✔
632
    IntT = inttype(typeof(x))
504✔
633
    ix = reinterpret(IntT, x)
271,491,005✔
634
    return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix)
271,491,005✔
635
end
636

637
function isless(a::T, b::T) where T<:IEEEFloat
638
    @inline
369✔
639
    (isnan(a) || isnan(b)) && return !isnan(a)
271,596,902✔
640

641
    return _fpint(a) < _fpint(b)
135,745,561✔
642
end
643

644
# Exact Float (Tf) vs Integer (Ti) comparisons
645
# Assumes:
646
# - typemax(Ti) == 2^n-1
647
# - typemax(Ti) can't be exactly represented by Tf:
648
#   => Tf(typemax(Ti)) == 2^n or Inf
649
# - typemin(Ti) can be exactly represented by Tf
650
#
651
# 1. convert y::Ti to float fy::Tf
652
# 2. perform Tf comparison x vs fy
653
# 3. if x == fy, check if (1) resulted in rounding:
654
#  a. convert fy back to Ti and compare with original y
655
#  b. unsafe_convert undefined behaviour if fy == Tf(typemax(Ti))
656
#     (but consequently x == fy > y)
657
for Ti in (Int64,UInt64,Int128,UInt128)
658
    for Tf in (Float32,Float64)
659
        @eval begin
660
            function ==(x::$Tf, y::$Ti)
3,156✔
661
                fy = ($Tf)(y)
9,037,770✔
662
                (x == fy) & (fy != $(Tf(typemax(Ti)))) & (y == unsafe_trunc($Ti,fy))
9,263,643✔
663
            end
664
            ==(y::$Ti, x::$Tf) = x==y
311,441✔
665

666
            function <(x::$Ti, y::$Tf)
12✔
667
                fx = ($Tf)(x)
157,247,600✔
668
                (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x < unsafe_trunc($Ti,fx)) ))
157,428,179✔
669
            end
670
            function <=(x::$Ti, y::$Tf)
671
                fx = ($Tf)(x)
1,060,374✔
672
                (fx < y) | ((fx == y) & ((fx == $(Tf(typemax(Ti)))) | (x <= unsafe_trunc($Ti,fx)) ))
1,334,300✔
673
            end
674

675
            function <(x::$Tf, y::$Ti)
18✔
676
                fy = ($Tf)(y)
657,720✔
677
                (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) < y))
985,408✔
678
            end
679
            function <=(x::$Tf, y::$Ti)
680
                fy = ($Tf)(y)
46,310✔
681
                (x < fy) | ((x == fy) & (fy < $(Tf(typemax(Ti)))) & (unsafe_trunc($Ti,fy) <= y))
46,310✔
682
            end
683
        end
684
    end
685
end
686
for op in (:(==), :<, :<=)
687
    @eval begin
688
        ($op)(x::Float16, y::Union{Int128,UInt128,Int64,UInt64}) = ($op)(Float64(x), Float64(y))
4,930,704✔
689
        ($op)(x::Union{Int128,UInt128,Int64,UInt64}, y::Float16) = ($op)(Float64(x), Float64(y))
8,750✔
690

691
        ($op)(x::Union{Float16,Float32}, y::Union{Int32,UInt32}) = ($op)(Float64(x), Float64(y))
2,646✔
692
        ($op)(x::Union{Int32,UInt32}, y::Union{Float16,Float32}) = ($op)(Float64(x), Float64(y))
1,794✔
693

694
        ($op)(x::Float16, y::Union{Int16,UInt16}) = ($op)(Float32(x), Float32(y))
816✔
695
        ($op)(x::Union{Int16,UInt16}, y::Float16) = ($op)(Float32(x), Float32(y))
810✔
696
    end
697
end
698

699

700
abs(x::IEEEFloat) = abs_float(x)
216,782,377✔
701

702
"""
703
    isnan(f)::Bool
704

705
Test whether a number value is a NaN, an indeterminate value which is neither an infinity
706
nor a finite number ("not a number").
707

708
See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref).
709
"""
710
isnan(x::AbstractFloat) = (x != x)::Bool
3,960,489,374✔
711
isnan(x::Number) = false
×
712

713
isfinite(x::AbstractFloat) = !(isnan(x - x)::Bool)
34,107,682✔
714
isfinite(x::Real) = decompose(x)[3] != 0
321,090✔
715
isfinite(x::Integer) = true
×
716

717
"""
718
    isinf(f)::Bool
719

720
Test whether a number is infinite.
721

722
See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
723
"""
724
isinf(x::Real) = !isnan(x) & !isfinite(x)
408,549✔
725
isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf)
17,326,205✔
726

727
#=
728
`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
729

730
The decompose function is the point where rational-valued numeric types that support
731
hashing hook into the hashing protocol. `decompose(x)` should return three integer
732
values `num, pow, den`, such that the value of `x` is mathematically equal to
733

734
    num*2^pow/den
735

736
The decomposition need not be canonical in the sense that it just needs to be *some*
737
way to express `x` in this form, not any particular way – with the restriction that
738
`num` and `den` may not share any odd common factors. They may, however, have powers
739
of two in common – the generic hashing code will normalize those as necessary.
740

741
Special values:
742

743
 - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
744
 - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
745
 - `x` is not a number: `num` and `den` should both be zero
746
=#
747

748
decompose(x::Integer) = x, 0, 1
5,007✔
749

750
function decompose(x::Float16)::NTuple{3,Int}
×
751
    isnan(x) && return 0, 0, 0
×
752
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
×
753
    n = reinterpret(UInt16, x)
×
754
    s = (n & 0x03ff) % Int16
×
755
    e = ((n & 0x7c00) >> 10) % Int
×
756
    s |= Int16(e != 0) << 10
×
757
    d = ifelse(signbit(x), -1, 1)
×
758
    s, e - 25 + (e == 0), d
×
759
end
760

761
function decompose(x::Float32)::NTuple{3,Int}
×
762
    isnan(x) && return 0, 0, 0
×
763
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
×
764
    n = reinterpret(UInt32, x)
×
765
    s = (n & 0x007fffff) % Int32
×
766
    e = ((n & 0x7f800000) >> 23) % Int
×
767
    s |= Int32(e != 0) << 23
×
768
    d = ifelse(signbit(x), -1, 1)
×
769
    s, e - 150 + (e == 0), d
×
770
end
771

772
function decompose(x::Float64)::Tuple{Int64, Int, Int}
773
    isnan(x) && return 0, 0, 0
56,589✔
774
    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
56,589✔
775
    n = reinterpret(UInt64, x)
56,568✔
776
    s = (n & 0x000fffffffffffff) % Int64
56,568✔
777
    e = ((n & 0x7ff0000000000000) >> 52) % Int
56,568✔
778
    s |= Int64(e != 0) << 52
56,568✔
779
    d = ifelse(signbit(x), -1, 1)
56,568✔
780
    s, e - 1075 + (e == 0), d
56,568✔
781
end
782

783

784
"""
785
    precision(num::AbstractFloat; base::Integer=2)
786
    precision(T::Type; base::Integer=2)
787

788
Get the precision of a floating point number, as defined by the effective number of bits in
789
the significand, or the precision of a floating-point type `T` (its current default, if
790
`T` is a variable-precision type like [`BigFloat`](@ref)).
791

792
If `base` is specified, then it returns the maximum corresponding
793
number of significand digits in that base.
794

795
!!! compat "Julia 1.8"
796
    The `base` keyword requires at least Julia 1.8.
797
"""
798
function precision end
799

800
_precision_with_base_2(::Type{Float16}) = 11
×
801
_precision_with_base_2(::Type{Float32}) = 24
×
802
_precision_with_base_2(::Type{Float64}) = 53
×
803
function _precision(x, base::Integer)
72,000,000✔
804
    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
1,588,025,020✔
805
    p = _precision_with_base_2(x)
3,108,633,419✔
806
    return base == 2 ? Int(p) : floor(Int, p / log2(base))
1,588,025,038✔
807
end
808
precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
6,073,267,438✔
809
precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
909✔
810

811

812
function _nextfloat(f::IEEEFloat, dneg::Bool, da::Integer)
12✔
813
    # da must be > 0
814
    F = typeof(f)
39✔
815
    fumax = reinterpret(Unsigned, F(Inf))
39✔
816
    U = typeof(fumax)
39✔
817

818
    isnan(f) && return f
3,602,164,514✔
819
    fi = reinterpret(Signed, f)
3,602,164,514✔
820
    fneg = fi < 0
3,602,164,514✔
821
    fu = unsigned(fi & typemax(fi))
3,602,164,514✔
822

823
    if da > typemax(U)
3,602,164,262✔
824
        fneg = dneg
9✔
825
        fu = fumax
9✔
826
    else
827
        du = da % U
1,143✔
828
        if fneg ⊻ dneg
3,602,164,502✔
829
            if du > fu
1,330,505✔
830
                fu = min(fumax, du - fu)
351✔
831
                fneg = !fneg
351✔
832
            else
833
                fu = fu - du
1,330,154✔
834
            end
835
        else
836
            if fumax - fu < du
3,600,833,997✔
837
                fu = fumax
×
838
            else
839
                fu = fu + du
3,600,833,844✔
840
            end
841
        end
842
    end
843
    if fneg
3,602,164,514✔
844
        fu |= sign_mask(F)
1,039,554✔
845
    end
846
    reinterpret(F, fu)
3,602,164,514✔
847
end
848

849
"""
850
    nextfloat(x::AbstractFloat, n::Integer)
851

852
The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
853
applications of [`prevfloat`](@ref) if `n < 0`.
854
"""
855
nextfloat(f::AbstractFloat, d::Integer) = _nextfloat(f, isnegative(d), uabs(d))
6,442,450,941✔
856

857
"""
858
    nextfloat(x::AbstractFloat)
859

860
Return the smallest floating point number `y` of the same type as `x` such that `x < y`.
861
If no such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
862

863
See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
864
"""
865
nextfloat(x::AbstractFloat) = nextfloat(x, 1)
6,442,450,941✔
866

867
"""
868
    prevfloat(x::AbstractFloat, n::Integer)
869

870
The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n`
871
applications of [`nextfloat`](@ref) if `n < 0`.
872
"""
873
prevfloat(x::AbstractFloat, d::Integer) = _nextfloat(x, ispositive(d), uabs(d))
792✔
874

875
"""
876
    prevfloat(x::AbstractFloat)
877

878
Return the largest floating point number `y` of the same type as `x` such that `y < x`.
879
If no such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
880
"""
881
prevfloat(x::AbstractFloat) = nextfloat(x, -1)
1,244,630✔
882

883
for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
884
    for Tf in (Float16, Float32, Float64)
885
        if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf)
886
            # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound
887
            # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
888
            # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
889
            @eval begin
890
                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
891
                    if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf))
3,339✔
892
                        return unsafe_trunc($Ti,x)
3,339✔
893
                    else
894
                        throw(InexactError(:round, $Ti, x, RoundToZero))
×
895
                    end
896
                end
897
                function (::Type{$Ti})(x::$Tf)
898
                    # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is,
899
                    # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using
900
                    # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1)
901
                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x)
75,454✔
902
                        return unsafe_trunc($Ti,x)
75,082✔
903
                    else
904
                        throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
372✔
905
                    end
906
                end
907
            end
908
        else
909
            # Here `eps(Tf(typemin(Ti))) > 1`, so the only value which can be truncated to
910
            # `Tf(typemin(Ti)` is itself. Similarly, `Tf(typemax(Ti))` is inexact and will
911
            # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
912
            # these types, but not for `Float16` or larger integer types.
913
            @eval begin
914
                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
915
                    if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
31,139,157✔
916
                        return unsafe_trunc($Ti,x)
31,139,157✔
917
                    else
918
                        throw(InexactError(:round, $Ti, x, RoundToZero))
×
919
                    end
920
                end
921
                function (::Type{$Ti})(x::$Tf)
148✔
922
                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x)
30,138,391✔
923
                        return unsafe_trunc($Ti,x)
30,138,031✔
924
                    else
925
                        throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
345✔
926
                    end
927
                end
928
            end
929
        end
930
    end
931
end
932

933
"""
934
    issubnormal(f)::Bool
935

936
Test whether a floating point number is subnormal.
937

938
An IEEE floating point number is [subnormal](https://en.wikipedia.org/wiki/Subnormal_number)
939
when its exponent bits are zero and its significand is not zero.
940

941
# Examples
942
```jldoctest
943
julia> floatmin(Float32)
944
1.1754944f-38
945

946
julia> issubnormal(1.0f-37)
947
false
948

949
julia> issubnormal(1.0f-38)
950
true
951
```
952
"""
953
function issubnormal(x::T) where {T<:IEEEFloat}
954
    y = reinterpret(Unsigned, x)
9,004,185✔
955
    (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
9,004,185✔
956
end
957

958
ispow2(x::AbstractFloat) = !iszero(x) && frexp(x)[1] == 0.5
126✔
959
iseven(x::AbstractFloat) = isinteger(x) && (abs(x) > maxintfloat(x) || iseven(Integer(x)))
156✔
960
isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Integer(x))
84✔
961

962
@eval begin
963
    typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
×
964
    typemax(::Type{Float16}) = $(Inf16)
×
965
    typemin(::Type{Float32}) = $(-Inf32)
×
966
    typemax(::Type{Float32}) = $(Inf32)
×
967
    typemin(::Type{Float64}) = $(-Inf64)
×
968
    typemax(::Type{Float64}) = $(Inf64)
×
969
    typemin(x::T) where {T<:Real} = typemin(T)
15,294✔
970
    typemax(x::T) where {T<:Real} = typemax(T)
1,573,359✔
971

972
    floatmin(::Type{Float16}) = $(bitcast(Float16, 0x0400))
×
973
    floatmin(::Type{Float32}) = $(bitcast(Float32, 0x00800000))
×
974
    floatmin(::Type{Float64}) = $(bitcast(Float64, 0x0010000000000000))
×
975
    floatmax(::Type{Float16}) = $(bitcast(Float16, 0x7bff))
×
976
    floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
×
977
    floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
×
978

979
    eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
×
980
    eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
×
981
    eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
×
982
    eps() = eps(Float64)
600✔
983
end
984

985
eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
18,480✔
986

987
function eps(x::T) where T<:IEEEFloat
988
    # For isfinite(x), toggling the LSB will produce either prevfloat(x) or
989
    # nextfloat(x) but will never change the sign or exponent.
990
    # For !isfinite(x), this will map Inf to NaN and NaN to NaN or Inf.
991
    y = reinterpret(T, reinterpret(Unsigned, x) ⊻ true)
93,994✔
992
    # The absolute difference between these values is eps(x). This is true even
993
    # for Inf/NaN values.
994
    return abs(x - y)
93,994✔
995
end
996

997
"""
998
    floatmin(T = Float64)
999

1000
Return the smallest positive normal number representable by the floating-point
1001
type `T`.
1002

1003
See also: [`typemin`](@ref), [`maxintfloat`](@ref), [`floatmax`](@ref), [`eps`](@ref).
1004

1005
# Examples
1006
```jldoctest
1007
julia> floatmin(Float16)
1008
Float16(6.104e-5)
1009

1010
julia> floatmin(Float32)
1011
1.1754944f-38
1012

1013
julia> floatmin()
1014
2.2250738585072014e-308
1015
```
1016
"""
1017
floatmin(x::T) where {T<:AbstractFloat} = floatmin(T)
5,684,333✔
1018

1019
"""
1020
    floatmax(T = Float64)
1021

1022
Return the largest finite number representable by the floating-point type `T`.
1023

1024
See also: [`typemax`](@ref), [`maxintfloat`](@ref), [`floatmin`](@ref), [`eps`](@ref).
1025

1026
# Examples
1027
```jldoctest
1028
julia> floatmax(Float16)
1029
Float16(6.55e4)
1030

1031
julia> floatmax(Float32)
1032
3.4028235f38
1033

1034
julia> floatmax()
1035
1.7976931348623157e308
1036

1037
julia> typemax(Float64)
1038
Inf
1039
```
1040
"""
1041
floatmax(x::T) where {T<:AbstractFloat} = floatmax(T)
2,832,934✔
1042

1043
floatmin() = floatmin(Float64)
×
1044
floatmax() = floatmax(Float64)
×
1045

1046
"""
1047
    eps(::Type{T}) where T<:AbstractFloat
1048
    eps()
1049

1050
Return the *machine epsilon* of the floating point type `T` (`T = Float64` by
1051
default). This is defined as the gap between 1 and the next largest value representable by
1052
`typeof(one(T))`, and is equivalent to `eps(one(T))`.  (Since `eps(T)` is a
1053
bound on the *relative error* of `T`, it is a "dimensionless" quantity like [`one`](@ref).)
1054

1055
# Examples
1056
```jldoctest
1057
julia> eps()
1058
2.220446049250313e-16
1059

1060
julia> eps(Float32)
1061
1.1920929f-7
1062

1063
julia> 1.0 + eps()
1064
1.0000000000000002
1065

1066
julia> 1.0 + eps()/2
1067
1.0
1068
```
1069

1070
More generally, for any floating-point numeric type, `eps` corresponds to an
1071
upper bound on the distance to the nearest floating-point complex value: if ``\text{fl}(x)`` is the closest
1072
floating-point value to a number ``x`` (e.g. an arbitrary real number), then ``\text{fl}(x)``
1073
satisfies ``|x - \text{fl}(x)| ≤ \text{eps}(x)/2``, not including overflow cases.
1074
This allows the definition of `eps` to be extended to complex numbers,
1075
for which ``\text{fl}(a + ib) = \text{fl}(a) + i \text{fl}(b)``.
1076
"""
1077
eps(::Type{<:AbstractFloat})
1078

1079
"""
1080
    eps(x::AbstractFloat)
1081

1082
Return the *unit in last place* (ulp) of `x`. This is the distance between consecutive
1083
representable floating point values at `x`. In most cases, if the distance on either side
1084
of `x` is different, then the larger of the two is taken, that is
1085

1086
    eps(x) == max(x-prevfloat(x), nextfloat(x)-x)
1087

1088
The exceptions to this rule are the smallest and largest finite values
1089
(e.g. `nextfloat(-Inf)` and `prevfloat(Inf)` for [`Float64`](@ref)), which round to the
1090
smaller of the values.
1091

1092
The rationale for this behavior is that `eps` bounds the floating point rounding
1093
error. Under the default `RoundNearest` rounding mode, if ``y`` is a real number and ``x``
1094
is the nearest floating point number to ``y``, then
1095

1096
```math
1097
|y-x| \\leq \\operatorname{eps}(x)/2.
1098
```
1099

1100
See also: [`nextfloat`](@ref), [`issubnormal`](@ref), [`floatmax`](@ref).
1101

1102
# Examples
1103
```jldoctest
1104
julia> eps(1.0)
1105
2.220446049250313e-16
1106

1107
julia> eps(prevfloat(2.0))
1108
2.220446049250313e-16
1109

1110
julia> eps(2.0)
1111
4.440892098500626e-16
1112

1113
julia> x = prevfloat(Inf)      # largest finite Float64
1114
1.7976931348623157e308
1115

1116
julia> x + eps(x)/2            # rounds up
1117
Inf
1118

1119
julia> x + prevfloat(eps(x)/2) # rounds down
1120
1.7976931348623157e308
1121
```
1122
"""
1123
eps(::AbstractFloat)
1124

1125

1126
## byte order swaps for arbitrary-endianness serialization/deserialization ##
1127
bswap(x::IEEEFloat) = bswap_int(x)
12✔
1128

1129
# integer size of float
1130
uinttype(::Type{Float64}) = UInt64
565,457✔
1131
uinttype(::Type{Float32}) = UInt32
2,906✔
1132
uinttype(::Type{Float16}) = UInt16
×
1133
inttype(::Type{Float64}) = Int64
504✔
1134
inttype(::Type{Float32}) = Int32
×
1135
inttype(::Type{Float16}) = Int16
×
1136
# float size of integer
1137
floattype(::Type{UInt64}) = Float64
3,851✔
1138
floattype(::Type{UInt32}) = Float32
×
1139
floattype(::Type{UInt16}) = Float16
×
1140
floattype(::Type{Int64}) = Float64
×
1141
floattype(::Type{Int32}) = Float32
×
1142
floattype(::Type{Int16}) = Float16
×
1143

1144

1145
## Array operations on floating point numbers ##
1146
"""
1147
    float(A::AbstractArray)
1148

1149
Return an array containing the floating-point analog of each entry in array `A`.
1150

1151
Equivalent to `float.(A)`, except that the return value may share memory with all or
1152
part of `A` in accordance with the behavior of `convert(T, A)` given output type `T`.
1153

1154
# Examples
1155
```jldoctest
1156
julia> float(1:1000)
1157
1.0:1.0:1000.0
1158
```
1159
"""
1160
float(A::AbstractArray{<:AbstractFloat}) = A
15✔
1161

1162
function float(A::AbstractArray{T}) where T
921✔
1163
    if !isconcretetype(T)
1,026✔
1164
        error("`float` not defined on abstractly-typed arrays; please convert to a more specific type")
×
1165
    end
1166
    convert(AbstractArray{typeof(float(zero(T)))}, A)
1,041✔
1167
end
1168

1169
float(r::StepRange) = float(r.start):float(r.step):float(last(r))
147✔
1170
float(r::UnitRange) = float(r.start):float(last(r))
147✔
1171
float(r::StepRangeLen{T}) where {T} =
12✔
1172
    StepRangeLen{typeof(float(T(r.ref)))}(float(r.ref), float(r.step), length(r), r.offset)
1173
function float(r::LinRange)
×
1174
    LinRange(float(r.start), float(r.stop), length(r))
×
1175
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc