• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37632

26 Sep 2023 06:44AM UTC coverage: 86.999% (-0.9%) from 87.914%
#37632

push

local

web-flow
inference: make `throw` block deoptimization concrete-eval friendly (#49235)

The deoptimization can sometimes destroy the effects analysis and
disable [semi-]concrete evaluation that is otherwise possible. This is
because the deoptimization was designed with the type domain
profitability in mind (#35982), and hasn't been adequately considering
the effects domain.

This commit makes the deoptimization aware of the effects domain more
and enables the `throw` block deoptimization only when the effects
already known to be ineligible for concrete-evaluation.

In our current effect system, `ALWAYS_FALSE`/`false` means that the
effect can not be refined to `ALWAYS_TRUE`/`true` anymore (unless given
user annotation later). Therefore we can enable the `throw` block
deoptimization without hindering the chance of concrete-evaluation when
any of the following conditions are met:
- `effects.consistent === ALWAYS_FALSE`
- `effects.effect_free === ALWAYS_FALSE`
- `effects.terminates === false`
- `effects.nonoverlayed === false`

Here are some numbers:

| Metric | master | this commit | #35982 reverted (set
`unoptimize_throw_blocks=false`) |

|-------------------------|-----------|-------------|--------------------------------------------|
| Base (seconds) | 15.579300 | 15.206645 | 15.296319 |
| Stdlibs (seconds) | 17.919013 | 17.667094 | 17.738128 |
| Total (seconds) | 33.499279 | 32.874737 | 33.035448 |
| Precompilation (seconds) | 49.967516 | 49.421121 | 49.999998 |
| First time `plot(rand(10,3))` [^1] | `2.476678 seconds (11.74 M
allocations)` | `2.430355 seconds (11.77 M allocations)` | `2.514874
seconds (11.64 M allocations)` |
| First time `solve(prob, QNDF())(5.0)` [^2] | `4.469492 seconds (15.32
M allocations)` | `4.499217 seconds (15.41 M allocations)` | `4.470772
seconds (15.38 M allocations)` |

[^1]: With disabling precompilation of Plots.jl.
[^2]: With disabling precompilation of OrdinaryDiffEq.

These numbers ma... (continued)

7 of 7 new or added lines in 1 file covered. (100.0%)

73407 of 84377 relevant lines covered (87.0%)

11275130.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.13
/stdlib/LinearAlgebra/src/triangular.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
## Triangular
4

5
# could be renamed to Triangular when that name has been fully deprecated
6
abstract type AbstractTriangular{T} <: AbstractMatrix{T} end
7

8
# First loop through all methods that don't need special care for upper/lower and unit diagonal
9
for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
10
    @eval begin
11
        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T}
12
            data::S
13

14
            function $t{T,S}(data) where {T,S<:AbstractMatrix{T}}
61,960✔
15
                require_one_based_indexing(data)
61,960✔
16
                checksquare(data)
61,961✔
17
                new{T,S}(data)
61,959✔
18
            end
19
        end
20
        $t(A::$t) = A
1,725✔
21
        $t{T}(A::$t{T}) where {T} = A
56✔
22
        function $t(A::AbstractMatrix)
61,926✔
23
            return $t{eltype(A), typeof(A)}(A)
61,926✔
24
        end
25
        function $t{T}(A::AbstractMatrix) where T
28✔
26
            $t(convert(AbstractMatrix{T}, A))
28✔
27
        end
28

29
        function $t{T}(A::$t) where T
874✔
30
            Anew = convert(AbstractMatrix{T}, A.data)
874✔
31
            $t(Anew)
634✔
32
        end
33
        Matrix(A::$t{T}) where {T} = Matrix{T}(A)
31,241✔
34

35
        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
846✔
36

37
        size(A::$t) = size(A.data)
359,351✔
38

39
        # For A<:AbstractTriangular, similar(A[, neweltype]) should yield a matrix with the same
40
        # triangular type and underlying storage type as A. The following method covers these cases.
41
        similar(A::$t, ::Type{T}) where {T} = $t(similar(parent(A), T))
1,268✔
42
        # On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
43
        # storage type of A (not wrapped in a triangular type). The following method covers these cases.
44
        similar(A::$t, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
16,032✔
45

46
        copy(A::$t) = $t(copy(A.data))
3,940✔
47

48
        real(A::$t{<:Real}) = A
81✔
49
        real(A::$t{<:Complex}) = (B = real(A.data); $t(B))
710✔
50
    end
51
end
52

53
similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
54
    UpperTriangular(similar(parent(parent(A)), T))
55
similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
56
    UnitUpperTriangular(similar(parent(parent(A)), T))
57
similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
58
    LowerTriangular(similar(parent(parent(A)), T))
59
similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
60
    UnitLowerTriangular(similar(parent(parent(A)), T))
61

62

63
"""
64
    LowerTriangular(A::AbstractMatrix)
65

66
Construct a `LowerTriangular` view of the matrix `A`.
67

68
# Examples
69
```jldoctest
70
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
71
3×3 Matrix{Float64}:
72
 1.0  2.0  3.0
73
 4.0  5.0  6.0
74
 7.0  8.0  9.0
75

76
julia> LowerTriangular(A)
77
3×3 LowerTriangular{Float64, Matrix{Float64}}:
78
 1.0   ⋅    ⋅
79
 4.0  5.0   ⋅
80
 7.0  8.0  9.0
81
```
82
"""
83
LowerTriangular
84
"""
85
    UpperTriangular(A::AbstractMatrix)
86

87
Construct an `UpperTriangular` view of the matrix `A`.
88

89
# Examples
90
```jldoctest
91
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
92
3×3 Matrix{Float64}:
93
 1.0  2.0  3.0
94
 4.0  5.0  6.0
95
 7.0  8.0  9.0
96

97
julia> UpperTriangular(A)
98
3×3 UpperTriangular{Float64, Matrix{Float64}}:
99
 1.0  2.0  3.0
100
  ⋅   5.0  6.0
101
  ⋅    ⋅   9.0
102
```
103
"""
104
UpperTriangular
105
"""
106
    UnitLowerTriangular(A::AbstractMatrix)
107

108
Construct a `UnitLowerTriangular` view of the matrix `A`.
109
Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
110
of `A` on its diagonal.
111

112
# Examples
113
```jldoctest
114
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
115
3×3 Matrix{Float64}:
116
 1.0  2.0  3.0
117
 4.0  5.0  6.0
118
 7.0  8.0  9.0
119

120
julia> UnitLowerTriangular(A)
121
3×3 UnitLowerTriangular{Float64, Matrix{Float64}}:
122
 1.0   ⋅    ⋅
123
 4.0  1.0   ⋅
124
 7.0  8.0  1.0
125
```
126
"""
127
UnitLowerTriangular
128
"""
129
    UnitUpperTriangular(A::AbstractMatrix)
130

131
Construct an `UnitUpperTriangular` view of the matrix `A`.
132
Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
133
of `A` on its diagonal.
134

135
# Examples
136
```jldoctest
137
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
138
3×3 Matrix{Float64}:
139
 1.0  2.0  3.0
140
 4.0  5.0  6.0
141
 7.0  8.0  9.0
142

143
julia> UnitUpperTriangular(A)
144
3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
145
 1.0  2.0  3.0
146
  ⋅   1.0  6.0
147
  ⋅    ⋅   1.0
148
```
149
"""
150
UnitUpperTriangular
151

152
const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}}
153
const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
154
const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
155

156
imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
7✔
157
imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
7✔
158
imag(A::UnitLowerTriangular) = LowerTriangular(tril!(imag(A.data),-1))
7✔
159
imag(A::UnitUpperTriangular) = UpperTriangular(triu!(imag(A.data),1))
7✔
160

161
Array(A::AbstractTriangular) = Matrix(A)
1,279✔
162
parent(A::UpperOrLowerTriangular) = A.data
115,420✔
163

164
# then handle all methods that requires specific handling of upper/lower and unit diagonal
165

166
function Matrix{T}(A::LowerTriangular) where T
8,522✔
167
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
8,522✔
168
    copyto!(B, A.data)
8,795✔
169
    tril!(B)
8,522✔
170
    B
8,522✔
171
end
172
function Matrix{T}(A::UnitLowerTriangular) where T
7,377✔
173
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
7,377✔
174
    copyto!(B, A.data)
7,390✔
175
    tril!(B)
7,377✔
176
    for i = 1:size(B,1)
14,754✔
177
        B[i,i] = oneunit(T)
65,908✔
178
    end
124,439✔
179
    B
7,377✔
180
end
181
function Matrix{T}(A::UpperTriangular) where T
8,689✔
182
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
8,689✔
183
    copyto!(B, A.data)
8,967✔
184
    triu!(B)
8,689✔
185
    B
8,689✔
186
end
187
function Matrix{T}(A::UnitUpperTriangular) where T
7,360✔
188
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
7,360✔
189
    copyto!(B, A.data)
7,373✔
190
    triu!(B)
7,360✔
191
    for i = 1:size(B,1)
14,720✔
192
        B[i,i] = oneunit(T)
65,821✔
193
    end
124,282✔
194
    B
7,360✔
195
end
196

197
function full!(A::LowerTriangular)
49✔
198
    B = A.data
49✔
199
    tril!(B)
49✔
200
    B
49✔
201
end
202
function full!(A::UnitLowerTriangular)
49✔
203
    B = A.data
49✔
204
    tril!(B)
49✔
205
    for i = 1:size(A,1)
98✔
206
        B[i,i] = oneunit(eltype(B))
441✔
207
    end
833✔
208
    B
49✔
209
end
210
function full!(A::UpperTriangular)
49✔
211
    B = A.data
49✔
212
    triu!(B)
49✔
213
    B
49✔
214
end
215
function full!(A::UnitUpperTriangular)
49✔
216
    B = A.data
49✔
217
    triu!(B)
49✔
218
    for i = 1:size(A,1)
98✔
219
        B[i,i] = oneunit(eltype(B))
441✔
220
    end
833✔
221
    B
49✔
222
end
223

224
Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) =
40,118✔
225
    i > j ? isassigned(A.data, i, j) : true
226
Base.isassigned(A::LowerTriangular, i::Int, j::Int) =
50✔
227
    i >= j ? isassigned(A.data, i, j) : true
228
Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) =
40,018✔
229
    i < j ? isassigned(A.data, i, j) : true
230
Base.isassigned(A::UpperTriangular, i::Int, j::Int) =
2,706✔
231
    i <= j ? isassigned(A.data, i, j) : true
232

233
Base.isstored(A::UnitLowerTriangular, i::Int, j::Int) =
×
234
    i > j ? Base.isstored(A.data, i, j) : false
235
Base.isstored(A::LowerTriangular, i::Int, j::Int) =
×
236
    i >= j ? Base.isstored(A.data, i, j) : false
237
Base.isstored(A::UnitUpperTriangular, i::Int, j::Int) =
×
238
    i < j ? Base.isstored(A.data, i, j) : false
239
Base.isstored(A::UpperTriangular, i::Int, j::Int) =
×
240
    i <= j ? Base.isstored(A.data, i, j) : false
241

242
getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
1,627,247✔
243
    i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
244
getindex(A::LowerTriangular, i::Integer, j::Integer) =
2,420,260✔
245
    i >= j ? A.data[i,j] : zero(A.data[j,i])
246
getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
1,636,279✔
247
    i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
248
getindex(A::UpperTriangular, i::Integer, j::Integer) =
3,183,627✔
249
    i <= j ? A.data[i,j] : zero(A.data[j,i])
250

251
function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
47,303✔
252
    if i > j
47,303✔
253
        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
1,066✔
254
            "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
255
    else
256
        A.data[i,j] = x
46,492✔
257
    end
258
    return A
47,048✔
259
end
260

261
function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
2,855✔
262
    if i > j
2,855✔
263
        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
1,204✔
264
            "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
265
    elseif i == j
1,903✔
266
        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
251✔
267
            "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
268
    else
269
        A.data[i,j] = x
1,652✔
270
    end
271
    return A
2,540✔
272
end
273

274
function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
3,037✔
275
    if i < j
3,037✔
276
        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
1,064✔
277
            "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
278
    else
279
        A.data[i,j] = x
2,227✔
280
    end
281
    return A
2,783✔
282
end
283

284
function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
2,855✔
285
    if i < j
2,855✔
286
        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
1,204✔
287
            "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
288
    elseif i == j
1,903✔
289
        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
251✔
290
            "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
291
    else
292
        A.data[i,j] = x
1,652✔
293
    end
294
    return A
2,540✔
295
end
296

297

298
## structured matrix methods ##
299
function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
21,362✔
300
                                      i::Integer, j::Integer, s::AbstractString)
301
    return i <= j ? s : Base.replace_with_centered_mark(s)
21,362✔
302
end
303
function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangular},
20,084✔
304
                                      i::Integer, j::Integer, s::AbstractString)
305
    return i >= j ? s : Base.replace_with_centered_mark(s)
20,084✔
306
end
307

308
function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
102✔
309
    k >= 0 && return true
102✔
310
    return _istril(A, k)
×
311
end
312
function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
961✔
313
    k <= 0 && return true
961✔
314
    return _istriu(A, k)
×
315
end
316
istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
6,602✔
317
istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
1,487✔
318
istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
6,613✔
319
istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
1,487✔
320

321
function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
35✔
322
    n = size(A,1)
35✔
323
    if k < 0
35✔
324
        fill!(A.data, zero(T))
1,134✔
325
        return A
14✔
326
    elseif k == 0
21✔
327
        for j in 1:n, i in 1:j-1
63✔
328
            A.data[i,j] = zero(T)
252✔
329
        end
308✔
330
        return A
7✔
331
    else
332
        return UpperTriangular(tril!(A.data,k))
14✔
333
    end
334
end
335
triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data, k))
186✔
336

337
function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
35✔
338
    n = size(A,1)
35✔
339
    if k < 0
35✔
340
        fill!(A.data, zero(T))
1,134✔
341
        return UpperTriangular(A.data)
14✔
342
    elseif k == 0
21✔
343
        fill!(A.data, zero(T))
567✔
344
        for i in diagind(A)
14✔
345
            A.data[i] = oneunit(T)
63✔
346
        end
119✔
347
        return UpperTriangular(A.data)
7✔
348
    else
349
        for i in diagind(A)
28✔
350
            A.data[i] = oneunit(T)
126✔
351
        end
238✔
352
        return UpperTriangular(tril!(A.data,k))
14✔
353
    end
354
end
355

356
function triu!(A::UnitUpperTriangular, k::Integer=0)
35✔
357
    for i in diagind(A)
70✔
358
        A.data[i] = oneunit(eltype(A))
315✔
359
    end
595✔
360
    return triu!(UpperTriangular(A.data), k)
35✔
361
end
362

363
function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
35✔
364
    n = size(A,1)
35✔
365
    if k > 0
35✔
366
        fill!(A.data, zero(T))
1,134✔
367
        return A
14✔
368
    elseif k == 0
21✔
369
        for j in 1:n, i in j+1:n
63✔
370
            A.data[i,j] = zero(T)
252✔
371
        end
308✔
372
        return A
7✔
373
    else
374
        return LowerTriangular(triu!(A.data, k))
14✔
375
    end
376
end
377

378
tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data, k))
70✔
379

380
function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
35✔
381
    n = size(A,1)
35✔
382
    if k > 0
35✔
383
        fill!(A.data, zero(T))
1,134✔
384
        return LowerTriangular(A.data)
14✔
385
    elseif k == 0
21✔
386
        fill!(A.data, zero(T))
567✔
387
        for i in diagind(A)
14✔
388
            A.data[i] = oneunit(T)
63✔
389
        end
119✔
390
        return LowerTriangular(A.data)
7✔
391
    else
392
        for i in diagind(A)
28✔
393
            A.data[i] = oneunit(T)
126✔
394
        end
238✔
395
        return LowerTriangular(triu!(A.data, k))
14✔
396
    end
397
end
398

399
function tril!(A::UnitLowerTriangular, k::Integer=0)
35✔
400
    for i in diagind(A)
70✔
401
        A.data[i] = oneunit(eltype(A))
315✔
402
    end
595✔
403
    return tril!(LowerTriangular(A.data), k)
35✔
404
end
405

406
adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
3,056✔
407
adjoint(A::UpperTriangular) = LowerTriangular(adjoint(A.data))
3,607✔
408
adjoint(A::UnitLowerTriangular) = UnitUpperTriangular(adjoint(A.data))
2,925✔
409
adjoint(A::UnitUpperTriangular) = UnitLowerTriangular(adjoint(A.data))
2,815✔
410
transpose(A::LowerTriangular) = UpperTriangular(transpose(A.data))
2,825✔
411
transpose(A::UpperTriangular) = LowerTriangular(transpose(A.data))
3,124✔
412
transpose(A::UnitLowerTriangular) = UnitUpperTriangular(transpose(A.data))
3,151✔
413
transpose(A::UnitUpperTriangular) = UnitLowerTriangular(transpose(A.data))
2,774✔
414

415
transpose!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L', false, true))
21✔
416
transpose!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L', false, true))
21✔
417
transpose!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U', false, true))
21✔
418
transpose!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U', false, true))
21✔
419
adjoint!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L' , true, true))
21✔
420
adjoint!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L' , true, true))
21✔
421
adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true))
21✔
422
adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
21✔
423

424
diag(A::LowerTriangular) = diag(A.data)
34✔
425
diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
159✔
426
diag(A::UpperTriangular) = diag(A.data)
164✔
427
diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
321✔
428

429
# Unary operations
430
-(A::LowerTriangular) = LowerTriangular(-A.data)
9✔
431
-(A::UpperTriangular) = UpperTriangular(-A.data)
168✔
432
function -(A::UnitLowerTriangular)
9✔
433
    Anew = -A.data
9✔
434
    for i = 1:size(A, 1)
18✔
435
        Anew[i, i] = -A[i, i]
126✔
436
    end
129✔
437
    LowerTriangular(Anew)
9✔
438
end
439
function -(A::UnitUpperTriangular)
9✔
440
    Anew = -A.data
9✔
441
    for i = 1:size(A, 1)
18✔
442
        Anew[i, i] = -A[i, i]
126✔
443
    end
129✔
444
    UpperTriangular(Anew)
9✔
445
end
446

447
tr(A::LowerTriangular) = tr(A.data)
7✔
448
tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
7✔
449
tr(A::UpperTriangular) = tr(A.data)
7✔
450
tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
7✔
451

452
# copy and scale
453
function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
1,651✔
454
    n = size(B,1)
1,651✔
455
    for j = 1:n
3,302✔
456
        for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
14,574✔
457
            @inbounds A[i,j] = B[i,j]
25,722✔
458
        end
44,171✔
459
    end
12,951✔
460
    return A
1,651✔
461
end
462
function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
56✔
463
    n = size(B,1)
56✔
464
    for j = 1:n
112✔
465
        for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
952✔
466
            @inbounds A[i,j] = B[i,j]
2,149✔
467
        end
3,836✔
468
    end
924✔
469
    return A
56✔
470
end
471

472
# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
473
# be permissive here and require compatibility later in _triscale!
474
@inline mul!(A::AbstractTriangular, B::AbstractTriangular, C::Number, alpha::Number, beta::Number) =
568✔
475
    _triscale!(A, B, C, MulAddMul(alpha, beta))
476
@inline mul!(A::AbstractTriangular, B::Number, C::AbstractTriangular, alpha::Number, beta::Number) =
14✔
477
    _triscale!(A, B, C, MulAddMul(alpha, beta))
478

479
function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
711✔
480
    n = checksquare(B)
711✔
481
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
711✔
482
    for j = 1:n
1,422✔
483
        for i = 1:j
5,526✔
484
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
8,263✔
485
        end
13,763✔
486
    end
4,815✔
487
    return A
711✔
488
end
489
function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
39✔
490
    n = checksquare(B)
39✔
491
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
39✔
492
    for j = 1:n
78✔
493
        for i = 1:j
766✔
494
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
2,075✔
495
        end
3,767✔
496
    end
727✔
497
    return A
39✔
498
end
499
function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
9✔
500
    n = checksquare(B)
9✔
501
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
9✔
502
    for j = 1:n
18✔
503
        @inbounds _modify!(_add, c, A, (j,j))
69✔
504
        for i = 1:(j - 1)
129✔
505
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
258✔
506
        end
456✔
507
    end
129✔
508
    return A
9✔
509
end
510
function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
7✔
511
    n = checksquare(B)
7✔
512
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
7✔
513
    for j = 1:n
14✔
514
        @inbounds _modify!(_add, c, A, (j,j))
63✔
515
        for i = 1:(j - 1)
119✔
516
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
252✔
517
        end
448✔
518
    end
119✔
519
    return A
7✔
520
end
521
function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
41✔
522
    n = checksquare(B)
41✔
523
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
41✔
524
    for j = 1:n
82✔
525
        for i = j:n
778✔
526
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
2,087✔
527
        end
3,785✔
528
    end
737✔
529
    return A
41✔
530
end
531
function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
39✔
532
    n = checksquare(B)
39✔
533
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
39✔
534
    for j = 1:n
78✔
535
        for i = j:n
766✔
536
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
2,075✔
537
        end
3,767✔
538
    end
727✔
539
    return A
39✔
540
end
541
function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
9✔
542
    n = checksquare(B)
9✔
543
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
9✔
544
    for j = 1:n
18✔
545
        @inbounds _modify!(_add, c, A, (j,j))
69✔
546
        for i = (j + 1):n
129✔
547
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
258✔
548
        end
456✔
549
    end
129✔
550
    return A
9✔
551
end
552
function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
7✔
553
    n = checksquare(B)
7✔
554
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
7✔
555
    for j = 1:n
14✔
556
        @inbounds _modify!(_add, c, A, (j,j))
63✔
557
        for i = (j + 1):n
119✔
558
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
252✔
559
        end
448✔
560
    end
119✔
561
    return A
7✔
562
end
563

564
rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
202✔
565
lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
78✔
566

567
function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
42✔
568
    require_one_based_indexing(x, y)
42✔
569
    m = size(A, 1)
42✔
570
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
571
    if iszero(m)
42✔
572
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
573
    end
574
    x₁ = x[1]
42✔
575
    r = dot(x₁, A[1,1], y[1])
42✔
576
    @inbounds for j in 2:m
84✔
577
        yj = y[j]
336✔
578
        if !iszero(yj)
336✔
579
            temp = adjoint(A[1,j]) * x₁
336✔
580
            @simd for i in 2:j
336✔
581
                temp += adjoint(A[i,j]) * x[i]
1,512✔
582
            end
×
583
            r += dot(temp, yj)
336✔
584
        end
585
    end
630✔
586
    return r
42✔
587
end
588
function dot(x::AbstractVector, A::UnitUpperTriangular, y::AbstractVector)
42✔
589
    require_one_based_indexing(x, y)
42✔
590
    m = size(A, 1)
42✔
591
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
592
    if iszero(m)
42✔
593
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
594
    end
595
    x₁ = first(x)
42✔
596
    r = dot(x₁, y[1])
42✔
597
    @inbounds for j in 2:m
84✔
598
        yj = y[j]
336✔
599
        if !iszero(yj)
336✔
600
            temp = adjoint(A[1,j]) * x₁
336✔
601
            @simd for i in 2:j-1
378✔
602
                temp += adjoint(A[i,j]) * x[i]
1,176✔
603
            end
×
604
            r += dot(temp, yj)
420✔
605
            r += dot(x[j], yj)
336✔
606
        end
607
    end
630✔
608
    return r
42✔
609
end
610
function dot(x::AbstractVector, A::LowerTriangular, y::AbstractVector)
42✔
611
    require_one_based_indexing(x, y)
42✔
612
    m = size(A, 1)
42✔
613
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
614
    if iszero(m)
42✔
615
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
616
    end
617
    r = zero(typeof(dot(first(x), first(A), first(y))))
42✔
618
    @inbounds for j in 1:m
84✔
619
        yj = y[j]
378✔
620
        if !iszero(yj)
378✔
621
            temp = adjoint(A[j,j]) * x[j]
378✔
622
            @simd for i in j+1:m
420✔
623
                temp += adjoint(A[i,j]) * x[i]
1,512✔
624
            end
×
625
            r += dot(temp, yj)
378✔
626
        end
627
    end
714✔
628
    return r
42✔
629
end
630
function dot(x::AbstractVector, A::UnitLowerTriangular, y::AbstractVector)
42✔
631
    require_one_based_indexing(x, y)
42✔
632
    m = size(A, 1)
42✔
633
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
634
    if iszero(m)
42✔
635
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
636
    end
637
    r = zero(typeof(dot(first(x), first(y))))
42✔
638
    @inbounds for j in 1:m
84✔
639
        yj = y[j]
378✔
640
        if !iszero(yj)
378✔
641
            temp = x[j]
378✔
642
            @simd for i in j+1:m
420✔
643
                temp += adjoint(A[i,j]) * x[i]
1,876✔
644
            end
×
645
            r += dot(temp, yj)
491✔
646
        end
647
    end
714✔
648
    return r
42✔
649
end
650

651
fillstored!(A::LowerTriangular, x)     = (fillband!(A.data, x, 1-size(A,1), 0); A)
2✔
652
fillstored!(A::UnitLowerTriangular, x) = (fillband!(A.data, x, 1-size(A,1), -1); A)
2✔
653
fillstored!(A::UpperTriangular, x)     = (fillband!(A.data, x, 0, size(A,2)-1); A)
2✔
654
fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1); A)
×
655

656
# Binary operations
657
+(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data + B.data)
175✔
658
+(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data + B.data)
63✔
659
+(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data + triu(B.data, 1) + I)
50✔
660
+(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data + tril(B.data, -1) + I)
50✔
661
+(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) + B.data + I)
50✔
662
+(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) + B.data + I)
49✔
663
+(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
49✔
664
+(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
49✔
665
+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B)
406✔
666

667
-(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data - B.data)
221✔
668
-(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data - B.data)
212✔
669
-(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data - triu(B.data, 1) - I)
50✔
670
-(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data - tril(B.data, -1) - I)
49✔
671
-(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) - B.data + I)
50✔
672
-(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) - B.data + I)
49✔
673
-(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
57✔
674
-(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
56✔
675
-(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
396✔
676

677
######################
678
# BlasFloat routines #
679
######################
680

681
# which triangle to use of the underlying data
682
uplo_char(::UpperOrUnitUpperTriangular) = 'U'
16,104✔
683
uplo_char(::LowerOrUnitLowerTriangular) = 'L'
9,380✔
684
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:AdjOrTrans}) = 'L'
10,026✔
685
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:AdjOrTrans}) = 'U'
10,000✔
686
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'U'
84✔
687
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'L'
84✔
688
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'U'
2✔
689
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'L'
2✔
690

691
isunit_char(::UpperTriangular) = 'N'
18,472✔
692
isunit_char(::UnitUpperTriangular) = 'U'
7,744✔
693
isunit_char(::LowerTriangular) = 'N'
11,803✔
694
isunit_char(::UnitLowerTriangular) = 'U'
7,663✔
695

696
lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
168✔
697
mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _trimul!(C, A, B)
2,374✔
698
mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _trimul!(C, A, B)
5,261✔
699
mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _trimul!(C, A, B)
5,630✔
700
mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _trimul!(C, A, B)
11,564✔
701

702
# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
703
_trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) =
×
704
    lmul!(A, copyto!(C, B))
705
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) =
×
706
    lmul!(A, inplace_adj_or_trans(B)(C, _unwrap_at(B)))
707
_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
×
708
    rmul!(inplace_adj_or_trans(A)(C, _unwrap_at(A)), B)
709
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) =
×
710
    lmul!(A, copyto!(C, B))
711
# redirect for UpperOrLowerTriangular
712
_trimul!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVector) =
2,926✔
713
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
714
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractMatrix) =
5,549✔
715
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
716
_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
6,182✔
717
    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
718
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) =
11,612✔
719
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
720
# disambiguation with AbstractTriangular
721
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractTriangular) =
×
722
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
723
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::UpperOrLowerTriangular) =
×
724
    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
725

726
lmul!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _trimul!(B, A, B)
792✔
727
rmul!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _trimul!(A, A, B)
552✔
728

729

730
for TC in (:AbstractVector, :AbstractMatrix)
731
    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
×
732
        if isone(alpha) && iszero(beta)
×
733
            return mul!(C, A, B)
×
734
        else
735
            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
×
736
        end
737
    end
738
end
739
for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
740
                    (:AbstractMatrix, :AbstractTriangular),
741
                    (:AbstractTriangular, :AbstractTriangular)
742
                )
743
    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
160✔
744
        if isone(alpha) && iszero(beta)
160✔
745
            return mul!(C, A, B)
144✔
746
        else
747
            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
16✔
748
        end
749
    end
750
end
751

752
ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = _ldiv!(C, A, B)
12,098✔
753
# generic fallback for AbstractTriangular, directs to 2-arg [l/r]div!
754
_ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
×
755
    ldiv!(A, inplace_adj_or_trans(B)(C, _unwrap_at(B)))
756
_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
×
757
    rdiv!(inplace_adj_or_trans(A)(C, _unwrap_at(A)), B)
758
# redirect for UpperOrLowerTriangular to generic_*div!
759
_ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) =
15,373✔
760
    generic_trimatdiv!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
761
_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
7,774✔
762
    generic_mattridiv!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
763

764
ldiv!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _ldiv!(B, A, B)
3,275✔
765
rdiv!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _rdiv!(A, A, B)
1,928✔
766

767
# preserve triangular structure in in-place multiplication/division
768
for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
769
                        (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
770
                        (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
771
                        (:UnitUpperTriangular, :UnitUpperTriangular, :UnitUpperTriangular),
772
                        (:LowerTriangular, :LowerTriangular, :LowerTriangular),
773
                        (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
774
                        (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
775
                        (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
776
    @eval begin
777
        function _trimul!(C::$cty, A::$aty, B::$bty)
1,332✔
778
            _trimul!(parent(C), A, B)
1,332✔
779
            return C
1,332✔
780
        end
781
        function _ldiv!(C::$cty, A::$aty, B::$bty)
72✔
782
            _ldiv!(parent(C), A, B)
72✔
783
            return C
72✔
784
        end
785
        function _rdiv!(C::$cty, A::$aty, B::$bty)
1,737✔
786
            _rdiv!(parent(C), A, B)
1,737✔
787
            return C
1,737✔
788
        end
789
    end
790
end
791

792
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
793
                            (:UnitLowerTriangular, 'L', 'U'),
794
                            (:UpperTriangular, 'U', 'N'),
795
                            (:UnitUpperTriangular, 'U', 'U'))
796
    @eval begin
797
        # Matrix inverse
798
        inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
16✔
799
            $t{T,S}(LAPACK.trtri!($uploc, $isunitc, A.data))
800

801
        function inv(A::$t{T}) where {T}
533✔
802
            S = typeof(inv(oneunit(T)))
533✔
803
            if S <: BlasFloat || S === T # i.e. A is unitless
533✔
804
                $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
464✔
805
            else
806
                J = (one(T)*I)(size(A, 1))
650✔
807
                $t(ldiv!(similar(A, S, size(A)), A, J))
69✔
808
            end
809
        end
810

811
        # Error bounds for triangular solve
812
        errorbounds(A::$t{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
160✔
813
            LAPACK.trrfs!($uploc, 'N', $isunitc, A.data, B, X)
814

815
        # Condition numbers
816
        function cond(A::$t{<:BlasFloat,<:StridedMatrix}, p::Real=2)
144✔
817
            checksquare(A)
144✔
818
            if p == 1
144✔
819
                return inv(LAPACK.trcon!('O', $uploc, $isunitc, A.data))
64✔
820
            elseif p == Inf
80✔
821
                return inv(LAPACK.trcon!('I', $uploc, $isunitc, A.data))
64✔
822
            else # use fallback
823
                return cond(copyto!(similar(parent(A)), A), p)
16✔
824
            end
825
        end
826
    end
827
end
828

829
# multiplication
830
generic_trimatmul!(c::StridedVector{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, b::AbstractVector{T}) where {T<:BlasFloat} =
744✔
831
    BLAS.trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b))
832
generic_trimatmul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
6,084✔
833
    BLAS.trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B))
834
generic_mattrimul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
1,720✔
835
    BLAS.trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
836
# division
837
generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
4,829✔
838
    LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B))
839
generic_mattridiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
2,992✔
840
    BLAS.trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
841

842
errorbounds(A::AbstractTriangular{T}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
×
843
    error("not implemented yet! Please submit a pull request.")
844
function errorbounds(A::AbstractTriangular{TA}, X::AbstractVecOrMat{TX}, B::AbstractVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
128✔
845
    TAXB = promote_type(TA, TB, TX, Float32)
128✔
846
    errorbounds(convert(AbstractMatrix{TAXB}, A), convert(AbstractArray{TAXB}, X), convert(AbstractArray{TAXB}, B))
128✔
847
end
848

849
# Eigensystems
850
## Notice that trecv works for quasi-triangular matrices and therefore the lower sub diagonal must be zeroed before calling the subroutine
851
function eigvecs(A::UpperTriangular{<:BlasFloat,<:StridedMatrix})
5✔
852
    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
5✔
853
end
854
function eigvecs(A::UnitUpperTriangular{<:BlasFloat,<:StridedMatrix})
5✔
855
    for i = 1:size(A, 1)
10✔
856
        A.data[i,i] = 1
45✔
857
    end
85✔
858
    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
5✔
859
end
860
function eigvecs(A::LowerTriangular{<:BlasFloat,<:StridedMatrix})
5✔
861
    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
5✔
862
end
863
function eigvecs(A::UnitLowerTriangular{<:BlasFloat,<:StridedMatrix})
5✔
864
    for i = 1:size(A, 1)
10✔
865
        A.data[i,i] = 1
45✔
866
    end
85✔
867
    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
5✔
868
end
869

870
####################
871
# Generic routines #
872
####################
873

874
for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
875
                   (LowerTriangular, UnitLowerTriangular))
876
    @eval begin
877
        (*)(A::$t, x::Number) = $t(A.data*x)
36✔
878

879
        function (*)(A::$unitt, x::Number)
20✔
880
            B = A.data*x
20✔
881
            for i = 1:size(A, 1)
40✔
882
                B[i,i] = x
180✔
883
            end
340✔
884
            $t(B)
20✔
885
        end
886

887
        (*)(x::Number, A::$t) = $t(x*A.data)
272✔
888

889
        function (*)(x::Number, A::$unitt)
36✔
890
            B = x*A.data
36✔
891
            for i = 1:size(A, 1)
72✔
892
                B[i,i] = x
324✔
893
            end
612✔
894
            $t(B)
36✔
895
        end
896

897
        (/)(A::$t, x::Number) = $t(A.data/x)
60✔
898

899
        function (/)(A::$unitt, x::Number)
14✔
900
            B = A.data/x
14✔
901
            invx = inv(x)
14✔
902
            for i = 1:size(A, 1)
28✔
903
                B[i,i] = invx
126✔
904
            end
238✔
905
            $t(B)
14✔
906
        end
907

908
        (\)(x::Number, A::$t) = $t(x\A.data)
14✔
909

910
        function (\)(x::Number, A::$unitt)
14✔
911
            B = x\A.data
14✔
912
            invx = inv(x)
14✔
913
            for i = 1:size(A, 1)
28✔
914
                B[i,i] = invx
126✔
915
            end
238✔
916
            $t(B)
14✔
917
        end
918
    end
919
end
920

921
## Generic triangular multiplication
922
function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
13,109✔
923
    require_one_based_indexing(C, A, B)
13,109✔
924
    m, n = size(B, 1), size(B, 2)
13,109✔
925
    N = size(A, 1)
13,109✔
926
    if m != N
13,109✔
927
        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
2,472✔
928
    end
929
    mc, nc = size(C, 1), size(C, 2)
10,637✔
930
    if mc != N || nc != n
21,274✔
931
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
×
932
    end
933
    oA = oneunit(eltype(A))
10,637✔
934
    unit = isunitc == 'U'
10,637✔
935
    @inbounds if uploc == 'U'
10,637✔
936
        if tfun === identity
5,396✔
937
            for j in 1:n
4,878✔
938
                for i in 1:m
34,294✔
939
                    Cij = (unit ? oA : A[i,i]) * B[i,j]
232,057✔
940
                    for k in i + 1:m
291,731✔
941
                        Cij += A[i,k] * B[k,j]
840,448✔
942
                    end
1,105,596✔
943
                    C[i,j] = Cij
154,439✔
944
                end
291,731✔
945
            end
31,855✔
946
        else # tfun in (transpose, adjoint)
947
            for j in 1:n
5,914✔
948
                for i in m:-1:1
46,576✔
949
                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
312,664✔
950
                    for k in 1:i - 1
393,864✔
951
                        Cij += tfun(A[k,i]) * B[k,j]
1,158,748✔
952
                    end
1,478,758✔
953
                    C[i,j] = Cij
208,576✔
954
                end
393,864✔
955
            end
46,058✔
956
        end
957
    else # uploc == 'L'
958
        if tfun === identity
5,241✔
959
            for j in 1:n
4,374✔
960
                for i in m:-1:1
32,692✔
961
                    Cij = (unit ? oA : A[i,i]) * B[i,j]
219,253✔
962
                    for k in 1:i - 1
275,018✔
963
                        Cij += A[i,k] * B[k,j]
787,461✔
964
                    end
1,036,004✔
965
                    C[i,j] = Cij
145,682✔
966
                end
275,018✔
967
            end
16,346✔
968
        else # tfun in (transpose, adjoint)
969
            for j in 1:n
6,108✔
970
                for i in 1:m
47,468✔
971
                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
316,746✔
972
                    for k in i + 1:m
396,998✔
973
                        Cij += tfun(A[k,i]) * B[k,j]
1,164,408✔
974
                    end
1,480,096✔
975
                    C[i,j] = Cij
210,366✔
976
                end
396,998✔
977
            end
23,734✔
978
        end
979
    end
980
    return C
10,637✔
981
end
982
# conjugate cases
983
function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
8✔
984
    A = parent(xA)
8✔
985
    require_one_based_indexing(C, A, B)
8✔
986
    m, n = size(B, 1), size(B, 2)
8✔
987
    N = size(A, 1)
8✔
988
    if m != N
8✔
989
        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
×
990
    end
991
    mc, nc = size(C, 1), size(C, 2)
8✔
992
    if mc != N || nc != n
16✔
993
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
×
994
    end
995
    oA = oneunit(eltype(A))
8✔
996
    unit = isunitc == 'U'
8✔
997
    @inbounds if uploc == 'U'
8✔
998
        for j in 1:n
8✔
999
            for i in 1:m
8✔
1000
                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
20✔
1001
                for k in i + 1:m
16✔
1002
                    Cij += conj(A[i,k]) * B[k,j]
12✔
1003
                end
18✔
1004
                C[i,j] = Cij
10✔
1005
            end
16✔
1006
        end
4✔
1007
    else # uploc == 'L'
1008
        for j in 1:n
8✔
1009
            for i in m:-1:1
8✔
1010
                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
20✔
1011
                for k in 1:i - 1
16✔
1012
                    Cij += conj(A[i,k]) * B[k,j]
12✔
1013
                end
18✔
1014
                C[i,j] = Cij
10✔
1015
            end
16✔
1016
        end
4✔
1017
    end
1018
    return C
8✔
1019
end
1020

1021
function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
4,462✔
1022
    require_one_based_indexing(C, A, B)
4,462✔
1023
    m, n = size(A, 1), size(A, 2)
4,462✔
1024
    N = size(B, 1)
4,462✔
1025
    if n != N
4,462✔
1026
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
2,472✔
1027
    end
1028
    mc, nc = size(C, 1), size(C, 2)
1,990✔
1029
    if mc != m || nc != N
3,980✔
1030
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
×
1031
    end
1032
    oB = oneunit(eltype(B))
1,990✔
1033
    unit = isunitc == 'U'
1,990✔
1034
    @inbounds if uploc == 'U'
1,990✔
1035
        if tfun === identity
940✔
1036
            for i in 1:m
844✔
1037
                for j in n:-1:1
7,156✔
1038
                    Cij = A[i,j] * (unit ? oB : B[j,j])
49,334✔
1039
                    for k in 1:j - 1
60,990✔
1040
                        Cij += A[i,k] * B[k,j]
135,574✔
1041
                    end
237,722✔
1042
                    C[i,j] = Cij
32,284✔
1043
                end
60,990✔
1044
            end
6,734✔
1045
        else # tfun in (transpose, adjoint)
1046
            for i in 1:m
1,036✔
1047
                for j in 1:n
8,892✔
1048
                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
59,076✔
1049
                    for k in j + 1:n
73,422✔
1050
                        Cij += A[i,k] * tfun(B[j,k])
155,736✔
1051
                    end
271,584✔
1052
                    C[i,j] = Cij
38,934✔
1053
                end
73,422✔
1054
            end
8,796✔
1055
        end
1056
    else # uploc == 'L'
1057
        if tfun === identity
1,050✔
1058
            for i in 1:m
848✔
1059
                for j in 1:n
7,184✔
1060
                    Cij = A[i,j] * (unit ? oB : B[j,j])
47,302✔
1061
                    for k in j + 1:n
59,944✔
1062
                        Cij += A[i,k] * B[k,j]
132,672✔
1063
                    end
228,768✔
1064
                    C[i,j] = Cij
31,768✔
1065
                end
59,944✔
1066
            end
3,592✔
1067
        else # tfun in (transpose, adjoint)
1068
            for i in 1:m
1,252✔
1069
                for j in n:-1:1
9,972✔
1070
                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
61,776✔
1071
                    for k in 1:j - 1
78,282✔
1072
                        Cij += A[i,k] * tfun(B[j,k])
166,536✔
1073
                    end
280,224✔
1074
                    C[i,j] = Cij
41,634✔
1075
                end
78,282✔
1076
            end
4,986✔
1077
        end
1078
    end
1079
    return C
1,990✔
1080
end
1081
# conjugate cases
1082
function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
×
1083
    B = parent(xB)
×
1084
    require_one_based_indexing(C, A, B)
×
1085
    m, n = size(A, 1), size(A, 2)
×
1086
    N = size(B, 1)
×
1087
    if n != N
×
1088
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
×
1089
    end
1090
    mc, nc = size(C, 1), size(C, 2)
×
1091
    if mc != m || nc != N
×
1092
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
×
1093
    end
1094
    oB = oneunit(eltype(B))
×
1095
    unit = isunitc == 'U'
×
1096
    @inbounds if uploc == 'U'
×
1097
        for i in 1:m
×
1098
            for j in n:-1:1
×
1099
                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
×
1100
                for k in 1:j - 1
×
1101
                    Cij += A[i,k] * conj(B[k,j])
×
1102
                end
×
1103
                C[i,j] = Cij
×
1104
            end
×
1105
        end
×
1106
    else # uploc == 'L'
1107
        for i in 1:m
×
1108
            for j in 1:n
×
1109
                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
×
1110
                for k in j + 1:n
×
1111
                    Cij += A[i,k] * conj(B[k,j])
×
1112
                end
×
1113
                C[i,j] = Cij
×
1114
            end
×
1115
        end
×
1116
    end
1117
    return C
×
1118
end
1119

1120
#Generic solver using naive substitution
1121

1122
@inline _ustrip(a) = oneunit(a) \ a
284,264✔
1123
@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
2,084,805✔
1124

1125
# manually hoisting b[j] significantly improves performance as of Dec 2015
1126
# manually eliding bounds checking significantly improves performance as of Dec 2015
1127
# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
1128
# does not significantly impact performance as of Dec 2015
1129
# in the transpose and conjugate transpose naive substitution variants,
1130
# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
1131
function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
10,149✔
1132
    require_one_based_indexing(C, A, B)
10,149✔
1133
    mA, nA = size(A)
10,149✔
1134
    m, n = size(B, 1), size(B,2)
10,149✔
1135
    if nA != m
10,149✔
1136
        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
228✔
1137
    end
1138
    if size(C) != size(B)
19,842✔
1139
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
×
1140
    end
1141
    oA = oneunit(eltype(A))
9,921✔
1142
    @inbounds if uploc == 'U'
9,921✔
1143
        if isunitc == 'N'
5,165✔
1144
            if tfun === identity
4,360✔
1145
                for k in 1:n
4,953✔
1146
                    amm = A[m,m]
19,111✔
1147
                    iszero(amm) && throw(SingularException(m))
19,111✔
1148
                    Cm = C[m,k] = amm \ B[m,k]
22,793✔
1149
                    # fill C-column
1150
                    for i in m-1:-1:1
38,145✔
1151
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
188,652✔
1152
                    end
302,424✔
1153
                    for j in m-1:-1:1
38,145✔
1154
                        ajj = A[j,j]
160,748✔
1155
                        iszero(ajj) && throw(SingularException(j))
160,748✔
1156
                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
160,748✔
1157
                        for i in j-1:-1:1
302,424✔
1158
                            C[i,k] -= _ustrip(A[i,j]) * Cj
685,978✔
1159
                        end
1,230,190✔
1160
                    end
302,424✔
1161
                end
35,703✔
1162
            else # tfun in (adjoint, transpose)
1163
                for k in 1:n
3,750✔
1164
                    for j in 1:m
26,500✔
1165
                        ajj = A[j,j]
121,832✔
1166
                        iszero(ajj) && throw(SingularException(j))
121,832✔
1167
                        Bj = B[j,k]
133,400✔
1168
                        for i in 1:j-1
230,414✔
1169
                            Bj -= tfun(A[i,j]) * C[i,k]
620,484✔
1170
                        end
895,134✔
1171
                        C[j,k] = tfun(ajj) \ Bj
150,867✔
1172
                    end
230,414✔
1173
                end
27,064✔
1174
            end
1175
        else # isunitc == 'U'
1176
            if tfun === identity
805✔
1177
                for k in 1:n
1,138✔
1178
                    Cm = C[m,k] = oA \ B[m,k]
6,887✔
1179
                    # fill C-column
1180
                    for i in m-1:-1:1
9,836✔
1181
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
53,564✔
1182
                    end
76,048✔
1183
                    for j in m-1:-1:1
9,836✔
1184
                        Cj = C[j,k]
40,483✔
1185
                        for i in 1:j-1
76,048✔
1186
                            C[i,k] -= _ustrip(A[i,j]) * Cj
146,900✔
1187
                        end
258,235✔
1188
                    end
76,048✔
1189
                end
4,918✔
1190
            else # tfun in (adjoint, transpose)
1191
                for k in 1:n
472✔
1192
                    for j in 1:m
3,032✔
1193
                        Bj = B[j,k]
13,932✔
1194
                        for i in 1:j-1
25,772✔
1195
                            Bj -= tfun(A[i,j]) * C[i,k]
68,408✔
1196
                        end
97,024✔
1197
                        C[j,k] = oA \ Bj
17,254✔
1198
                    end
25,772✔
1199
                end
6,643✔
1200
            end
1201
        end
1202
    else # uploc == 'L'
1203
        if isunitc == 'N'
4,756✔
1204
            if tfun === identity
3,391✔
1205
                for k in 1:n
3,690✔
1206
                    a11 = A[1,1]
15,215✔
1207
                    iszero(a11) && throw(SingularException(1))
15,215✔
1208
                    C1 = C[1,k] = a11 \ B[1,k]
19,222✔
1209
                    # fill C-column
1210
                    for i in 2:m
30,354✔
1211
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
152,318✔
1212
                    end
233,805✔
1213
                    for j in 2:m
30,354✔
1214
                        ajj = A[j,j]
124,491✔
1215
                        iszero(ajj) && throw(SingularException(j))
124,491✔
1216
                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
124,491✔
1217
                        for i in j+1:m
233,805✔
1218
                            C[i,k] -= _ustrip(A[i,j]) * Cj
450,948✔
1219
                        end
792,522✔
1220
                    end
233,805✔
1221
                end
28,545✔
1222
            else # tfun in (adjoint, transpose)
1223
                for k in 1:n
3,084✔
1224
                    for j in m:-1:1
23,982✔
1225
                        ajj = A[j,j]
110,002✔
1226
                        iszero(ajj) && throw(SingularException(j))
110,002✔
1227
                        Bj = B[j,k]
121,595✔
1228
                        for i in j+1:m
208,013✔
1229
                            Bj -= tfun(A[i,j]) * C[i,k]
567,785✔
1230
                        end
803,035✔
1231
                        C[j,k] = tfun(ajj) \ Bj
138,766✔
1232
                    end
208,013✔
1233
                end
13,800✔
1234
            end
1235
        else # isunitc == 'U'
1236
            if tfun === identity
1,365✔
1237
                for k in 1:n
1,713✔
1238
                    C1 = C[1,k] = oA \ B[1,k]
8,531✔
1239
                    # fill C-column
1240
                    for i in 2:m
13,156✔
1241
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
71,377✔
1242
                    end
110,126✔
1243
                    for j in 2:m
13,156✔
1244
                        Cj = C[j,k]
58,352✔
1245
                        for i in j+1:m
110,126✔
1246
                            C[i,k] -= _ustrip(A[i,j]) * Cj
314,685✔
1247
                        end
577,596✔
1248
                    end
110,126✔
1249
                end
6,578✔
1250
            else # tfun in (adjoint, transpose)
1251
                for k in 1:n
1,008✔
1252
                    for j in m:-1:1
4,554✔
1253
                        Bj = B[j,k]
20,732✔
1254
                        for i in j+1:m
38,611✔
1255
                            Bj -= tfun(A[i,j]) * C[i,k]
98,387✔
1256
                        end
148,135✔
1257
                        C[j,k] = oA \ Bj
24,405✔
1258
                    end
38,611✔
1259
                end
2,277✔
1260
            end
1261
        end
1262
    end
1263
    return C
9,845✔
1264
end
1265
# conjugate cases
1266
function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
164✔
1267
    A = parent(xA)
164✔
1268
    require_one_based_indexing(C, A, B)
164✔
1269
    mA, nA = size(A)
164✔
1270
    m, n = size(B, 1), size(B,2)
164✔
1271
    if nA != m
164✔
1272
        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
×
1273
    end
1274
    if size(C) != size(B)
328✔
1275
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
×
1276
    end
1277
    oA = oneunit(eltype(A))
164✔
1278
    @inbounds if uploc == 'U'
164✔
1279
        if isunitc == 'N'
82✔
1280
            for k in 1:n
164✔
1281
                amm = conj(A[m,m])
742✔
1282
                iszero(amm) && throw(SingularException(m))
742✔
1283
                Cm = C[m,k] = amm \ B[m,k]
778✔
1284
                # fill C-column
1285
                for i in m-1:-1:1
1,484✔
1286
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
5,976✔
1287
                end
11,210✔
1288
                for j in m-1:-1:1
1,484✔
1289
                    ajj = conj(A[j,j])
5,976✔
1290
                    iszero(ajj) && throw(SingularException(j))
5,976✔
1291
                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
5,976✔
1292
                    for i in j-1:-1:1
11,210✔
1293
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
21,096✔
1294
                    end
36,958✔
1295
                end
11,210✔
1296
            end
1,402✔
1297
        else # isunitc == 'U'
1298
            for k in 1:n
×
1299
                Cm = C[m,k] = oA \ B[m,k]
×
1300
                # fill C-column
1301
                for i in m-1:-1:1
×
1302
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
×
1303
                end
×
1304
                for j in m-1:-1:1
×
1305
                    Cj = C[j,k]
×
1306
                    for i in 1:j-1
×
1307
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
×
1308
                    end
×
1309
                end
×
1310
            end
82✔
1311
        end
1312
    else # uploc == 'L'
1313
        if isunitc == 'N'
82✔
1314
            for k in 1:n
164✔
1315
                a11 = conj(A[1,1])
742✔
1316
                iszero(a11) && throw(SingularException(1))
742✔
1317
                C1 = C[1,k] = a11 \ B[1,k]
788✔
1318
                # fill C-column
1319
                for i in 2:m
1,484✔
1320
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
5,976✔
1321
                end
11,210✔
1322
                for j in 2:m
1,484✔
1323
                    ajj = conj(A[j,j])
5,976✔
1324
                    iszero(ajj) && throw(SingularException(j))
5,976✔
1325
                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
5,976✔
1326
                    for i in j+1:m
11,210✔
1327
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
21,096✔
1328
                    end
36,958✔
1329
                end
11,210✔
1330
            end
742✔
1331
        else # isunitc == 'U'
1332
            for k in 1:n
×
1333
                C1 = C[1,k] = oA \ B[1,k]
×
1334
                # fill C-column
1335
                for i in 2:m
×
1336
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
×
1337
                end
×
1338
                for j in 1:m
×
1339
                    Cj = C[j,k]
×
1340
                    for i in j+1:m
×
1341
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
×
1342
                    end
×
1343
                end
×
1344
            end
×
1345
        end
1346
    end
1347
    return C
164✔
1348
end
1349

1350
function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
4,782✔
1351
    require_one_based_indexing(C, A, B)
4,782✔
1352
    m, n = size(A)
4,782✔
1353
    if size(B, 1) != n
4,782✔
1354
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
2,028✔
1355
    end
1356
    if size(C) != size(A)
5,508✔
1357
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
×
1358
    end
1359
    oB = oneunit(eltype(B))
2,754✔
1360
    unit = isunitc == 'U'
2,754✔
1361
    @inbounds if uploc == 'U'
2,754✔
1362
        if tfun === identity
1,381✔
1363
            for i in 1:m
2,078✔
1364
                for j in 1:n
19,202✔
1365
                    Aij = A[i,j]
119,700✔
1366
                    for k in 1:j - 1
168,257✔
1367
                        Aij -= C[i,k]*B[k,j]
462,412✔
1368
                    end
657,404✔
1369
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
134,425✔
1370
                    C[i,j] = Aij / (unit ? oB : B[j,j])
132,963✔
1371
                end
168,257✔
1372
            end
18,163✔
1373
        else # tfun in (adjoint, transpose)
1374
            for i in 1:m
684✔
1375
                for j in n:-1:1
6,200✔
1376
                    Aij = A[i,j]
28,696✔
1377
                    for k in j + 1:n
53,140✔
1378
                        Aij -= C[i,k]*tfun(B[j,k])
139,788✔
1379
                    end
202,140✔
1380
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
43,280✔
1381
                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
44,216✔
1382
                end
53,140✔
1383
            end
6,897✔
1384
        end
1385
    else # uploc == 'L'
1386
        if tfun === identity
1,373✔
1387
            for i in 1:m
2,062✔
1388
                for j in n:-1:1
19,042✔
1389
                    Aij = A[i,j]
118,756✔
1390
                    for k in j + 1:n
166,737✔
1391
                        Aij -= C[i,k]*B[k,j]
458,812✔
1392
                    end
650,924✔
1393
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
133,225✔
1394
                    C[i,j] = Aij / (unit ? oB : B[j,j])
131,763✔
1395
                end
166,737✔
1396
            end
9,521✔
1397
        else # tfun in (adjoint, transpose)
1398
            for i in 1:m
684✔
1399
                for j in 1:n
6,200✔
1400
                    Aij = A[i,j]
28,696✔
1401
                    for k in 1:j - 1
53,140✔
1402
                        Aij -= C[i,k]*tfun(B[j,k])
139,788✔
1403
                    end
202,140✔
1404
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
43,280✔
1405
                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
44,216✔
1406
                end
53,140✔
1407
            end
3,100✔
1408
        end
1409
    end
1410
    return C
2,754✔
1411
end
1412
function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
×
1413
    B = parent(xB)
×
1414
    require_one_based_indexing(C, A, B)
×
1415
    m, n = size(A)
×
1416
    if size(B, 1) != n
×
1417
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
×
1418
    end
1419
    if size(C) != size(A)
×
1420
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
×
1421
    end
1422
    oB = oneunit(eltype(B))
×
1423
    unit = isunitc == 'U'
×
1424
    if uploc == 'U'
×
1425
        @inbounds for i in 1:m
×
1426
            for j in 1:n
×
1427
                Aij = A[i,j]
×
1428
                for k in 1:j - 1
×
1429
                    Aij -= C[i,k]*conj(B[k,j])
×
1430
                end
×
1431
                unit || (iszero(B[j,j]) && throw(SingularException(j)))
×
1432
                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
×
1433
            end
×
1434
        end
×
1435
    else # uploc == 'L'
1436
        @inbounds for i in 1:m
×
1437
            for j in n:-1:1
×
1438
                Aij = A[i,j]
×
1439
                for k in j + 1:n
×
1440
                    Aij -= C[i,k]*conj(B[k,j])
×
1441
                end
×
1442
                unit || (iszero(B[j,j]) && throw(SingularException(j)))
×
1443
                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
×
1444
            end
×
1445
        end
×
1446
    end
1447
    return C
×
1448
end
1449

1450
# these are needed because we don't keep track of left- and right-multiplication in tritrimul!
1451
rmul!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rmul!(triu!(A.data), B))
12✔
1452
rmul!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rmul!(triu!(A.data), B))
12✔
1453
rmul!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rmul!(tril!(A.data), B))
12✔
1454
rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(A.data), B))
12✔
1455

1456
# Promotion
1457
## Promotion methods in matmul don't apply to triangular multiplication since
1458
## it is inplace. Hence we have to make very similar definitions, but without
1459
## allocation of a result array. For multiplication and unit diagonal division
1460
## the element type doesn't have to be stable under division whereas that is
1461
## necessary in the general triangular solve problem.
1462

1463
_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
80✔
1464
    _init_eltype(*, TA, TB)
1465
_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
4,376✔
1466
    _init_eltype(op, TA, TB)
1467
## The general promotion methods
1468
function *(A::AbstractTriangular, B::AbstractTriangular)
3,643✔
1469
    TAB = _init_eltype(*, eltype(A), eltype(B))
3,643✔
1470
    mul!(similar(B, TAB, size(B)), A, B)
3,643✔
1471
end
1472

1473
for mat in (:AbstractVector, :AbstractMatrix)
1474
    ### Multiplication with triangle to the left and hence rhs cannot be transposed.
1475
    @eval function *(A::AbstractTriangular, B::$mat)
12,531✔
1476
        require_one_based_indexing(B)
12,531✔
1477
        TAB = _init_eltype(*, eltype(A), eltype(B))
12,531✔
1478
        mul!(similar(B, TAB, size(B)), A, B)
12,638✔
1479
    end
1480
    ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
1481
    @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
1,919✔
1482
        require_one_based_indexing(B)
1,919✔
1483
        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
1,919✔
1484
        ldiv!(similar(B, TAB, size(B)), A, B)
1,935✔
1485
    end
1486
    ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
1487
    @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
8,333✔
1488
        require_one_based_indexing(B)
8,333✔
1489
        TAB = _init_eltype(\, eltype(A), eltype(B))
8,333✔
1490
        ldiv!(similar(B, TAB, size(B)), A, B)
9,019✔
1491
    end
1492
    ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
1493
    @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
2,537✔
1494
        require_one_based_indexing(A)
2,537✔
1495
        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
2,537✔
1496
        _rdiv!(similar(A, TAB, size(A)), A, B)
2,553✔
1497
    end
1498
    ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
1499
    @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
2,603✔
1500
        require_one_based_indexing(A)
2,603✔
1501
        TAB = _init_eltype(/, eltype(A), eltype(B))
2,603✔
1502
        _rdiv!(similar(A, TAB, size(A)), A, B)
2,619✔
1503
    end
1504
end
1505
### Multiplication with triangle to the right and hence lhs cannot be transposed.
1506
# Only for AbstractMatrix, hence outside the above loop.
1507
function *(A::AbstractMatrix, B::AbstractTriangular)
4,781✔
1508
    require_one_based_indexing(A)
4,781✔
1509
    TAB = _init_eltype(*, eltype(A), eltype(B))
4,781✔
1510
    mul!(similar(A, TAB, size(A)), A, B)
5,089✔
1511
end
1512
# ambiguity resolution with definitions in matmul.jl
1513
*(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
344✔
1514
*(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
344✔
1515

1516
## Some Triangular-Triangular cases. We might want to write tailored methods
1517
## for these cases, but I'm not sure it is worth it.
1518
for f in (:*, :\)
1519
    @eval begin
1520
        ($f)(A::LowerTriangular, B::LowerTriangular) =
739✔
1521
            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
1522
        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
672✔
1523
            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
1524
        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
627✔
1525
            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
1526
        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
616✔
1527
            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
1528
        ($f)(A::UpperTriangular, B::UpperTriangular) =
2,547✔
1529
            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
1530
        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
672✔
1531
            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
1532
        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
627✔
1533
            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
1534
        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
621✔
1535
            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
1536
    end
1537
end
1538
(/)(A::LowerTriangular, B::LowerTriangular) =
123✔
1539
    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
1540
(/)(A::LowerTriangular, B::UnitLowerTriangular) =
117✔
1541
    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
1542
(/)(A::UnitLowerTriangular, B::LowerTriangular) =
98✔
1543
    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
1544
(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
106✔
1545
    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
1546
(/)(A::UpperTriangular, B::UpperTriangular) =
169✔
1547
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
1548
(/)(A::UpperTriangular, B::UnitUpperTriangular) =
117✔
1549
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
1550
(/)(A::UnitUpperTriangular, B::UpperTriangular) =
98✔
1551
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
1552
(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
106✔
1553
    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
1554

1555
# Complex matrix power for upper triangular factor, see:
1556
#   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
1557
#     SIAM J. Matrix Anal. & Appl., 32 (3), (2011) 1056–1078.
1558
#   Higham and Lin, "An improved Schur-Padé algorithm for fractional powers of
1559
#     a matrix and their Fréchet derivatives", SIAM. J. Matrix Anal. & Appl.,
1560
#     34(3), (2013) 1341–1360.
1561
function powm!(A0::UpperTriangular{<:BlasFloat}, p::Real)
60✔
1562
    if abs(p) >= 1
60✔
1563
        throw(ArgumentError("p must be a real number in (-1,1), got $p"))
2✔
1564
    end
1565

1566
    normA0 = opnorm(A0, 1)
58✔
1567
    rmul!(A0, 1/normA0)
58✔
1568

1569
    theta = [1.53e-5, 2.25e-3, 1.92e-2, 6.08e-2, 1.25e-1, 2.03e-1, 2.84e-1]
406✔
1570
    n = checksquare(A0)
58✔
1571

1572
    A, m, s = invsquaring(A0, theta)
58✔
1573
    A = I - A
58✔
1574

1575
    # Compute accurate diagonal of I - T
1576
    sqrt_diag!(A0, A, s)
58✔
1577
    for i = 1:n
116✔
1578
        A[i, i] = -A[i, i]
202✔
1579
    end
346✔
1580
    # Compute the Padé approximant
1581
    c = 0.5 * (p - m) / (2 * m - 1)
58✔
1582
    triu!(A)
58✔
1583
    S = c * A
58✔
1584
    Stmp = similar(S)
58✔
1585
    for j = m-1:-1:1
116✔
1586
        j4 = 4 * j
248✔
1587
        c = (-p - j) / (j4 + 2)
248✔
1588
        for i = 1:n
496✔
1589
            @inbounds S[i, i] = S[i, i] + 1
884✔
1590
        end
1,520✔
1591
        copyto!(Stmp, S)
248✔
1592
        mul!(S, A, c)
248✔
1593
        ldiv!(Stmp, S.data)
248✔
1594

1595
        c = (p - j) / (j4 - 2)
248✔
1596
        for i = 1:n
496✔
1597
            @inbounds S[i, i] = S[i, i] + 1
884✔
1598
        end
1,520✔
1599
        copyto!(Stmp, S)
248✔
1600
        mul!(S, A, c)
248✔
1601
        ldiv!(Stmp, S.data)
248✔
1602
    end
438✔
1603
    for i = 1:n
116✔
1604
        S[i, i] = S[i, i] + 1
202✔
1605
    end
346✔
1606
    copyto!(Stmp, S)
58✔
1607
    mul!(S, A, -p)
58✔
1608
    ldiv!(Stmp, S.data)
58✔
1609
    for i = 1:n
116✔
1610
        @inbounds S[i, i] = S[i, i] + 1
202✔
1611
    end
346✔
1612

1613
    blockpower!(A0, S, p/(2^s))
58✔
1614
    for m = 1:s
116✔
1615
        mul!(Stmp.data, S, S)
234✔
1616
        copyto!(S, Stmp)
234✔
1617
        blockpower!(A0, S, p/(2^(s-m)))
234✔
1618
    end
410✔
1619
    rmul!(S, normA0^p)
58✔
1620
    return S
58✔
1621
end
1622
powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::Real)))
2✔
1623

1624
# Complex matrix logarithm for the upper triangular factor, see:
1625
#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
1626
#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
1627
#   Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
1628
#     logarithm and estimating the condition number", SIAM J. Sci. Comput.,
1629
#     35(4), (2013), C394–C410.
1630
#
1631
# Based on the code available at http://eprints.ma.man.ac.uk/1851/02/logm.zip,
1632
# Copyright (c) 2011, Awad H. Al-Mohy and Nicholas J. Higham
1633
# Julia version relicensed with permission from original authors
1634
log(A::UpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
263✔
1635
log(A::UnitUpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
26✔
1636
log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
4✔
1637
log(A::UnitLowerTriangular) = copy(transpose(log(copy(transpose(A)))))
4✔
1638

1639
function log_quasitriu(A0::AbstractMatrix{T}) where T<:BlasFloat
302✔
1640
    # allocate real A if log(A) will be real and complex A otherwise
1641
    n = checksquare(A0)
302✔
1642
    if isreal(A0) && (!istriu(A0) || !any(x -> real(x) < zero(real(T)), diag(A0)))
871✔
1643
        A = T <: Complex ? real(A0) : copy(A0)
91✔
1644
    else
1645
        A = T <: Complex ? copy(A0) : complex(A0)
211✔
1646
    end
1647
    if A0 isa UnitUpperTriangular
302✔
1648
        A = UpperTriangular(parent(A))
26✔
1649
        @inbounds for i in 1:n
52✔
1650
            A[i,i] = 1
234✔
1651
        end
442✔
1652
    end
1653
    Y0 = _log_quasitriu!(A0, A)
393✔
1654
    # return complex result for complex input
1655
    Y = T <: Complex ? complex(Y0) : Y0
328✔
1656

1657
    if A0 isa UpperTriangular || A0 isa UnitUpperTriangular
302✔
1658
        return UpperTriangular(Y)
289✔
1659
    else
1660
        return Y
13✔
1661
    end
1662
end
1663
# type-stable implementation of log_quasitriu
1664
# A is a copy of A0 that is overwritten while computing the result. It has the same eltype
1665
# as the result.
1666
function _log_quasitriu!(A0, A)
302✔
1667
    # Find Padé degree m and s while replacing A with A^(1/2^s)
1668
    m, s = _find_params_log_quasitriu!(A)
302✔
1669

1670
    # Compute accurate superdiagonal of A
1671
    _pow_superdiag_quasitriu!(A, A0, 0.5^s)
597✔
1672

1673
    # Compute accurate block diagonal of A
1674
    _sqrt_pow_diag_quasitriu!(A, A0, s)
302✔
1675

1676
    # Get the Gauss-Legendre quadrature points and weights
1677
    R = zeros(Float64, m, m)
10,234✔
1678
    for i = 1:m - 1
595✔
1679
        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
1,424✔
1680
        R[i+1,i] = R[i,i+1]
1,424✔
1681
    end
2,555✔
1682
    x,V = eigen(R)
604✔
1683
    w = Vector{Float64}(undef, m)
302✔
1684
    for i = 1:m
604✔
1685
        x[i] = (x[i] + 1) / 2
3,452✔
1686
        w[i] = V[1,i]^2
1,726✔
1687
    end
3,150✔
1688

1689
    # Compute the Padé approximation
1690
    t = eltype(A)
302✔
1691
    n = size(A, 1)
302✔
1692
    Y = zeros(t, n, n)
10,060✔
1693
    B = similar(A)
302✔
1694
    for k = 1:m
604✔
1695
        B .= t(x[k]) .* A
1,787✔
1696
        @inbounds for i in 1:n
3,452✔
1697
            B[i,i] += 1
8,311✔
1698
        end
14,896✔
1699
        Y .+= t(w[k]) .* rdiv_quasitriu!(A, B)
3,452✔
1700
    end
3,150✔
1701

1702
    # Scale back
1703
    lmul!(2.0^s, Y)
597✔
1704

1705
    # Compute accurate diagonal and superdiagonal of log(A)
1706
    _log_diag_quasitriu!(Y, A0)
302✔
1707

1708
    return Y
302✔
1709
end
1710

1711
# Auxiliary functions for matrix logarithm and matrix power
1712

1713
# Find Padé degree m and s while replacing A with A^(1/2^s)
1714
#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
1715
#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
1716
#   from Algorithm 4.1
1717
function _find_params_log_quasitriu!(A)
302✔
1718
    maxsqrt = 100
302✔
1719
    theta = [1.586970738772063e-005,
2,114✔
1720
         2.313807884242979e-003,
1721
         1.938179313533253e-002,
1722
         6.209171588994762e-002,
1723
         1.276404810806775e-001,
1724
         2.060962623452836e-001,
1725
         2.879093714241194e-001]
1726
    tmax = size(theta, 1)
302✔
1727
    n = size(A, 1)
302✔
1728
    p = 0
302✔
1729
    m = 0
302✔
1730

1731
    # Find s0, the smallest s such that the ρ(triu(A)^(1/2^s) - I) ≤ theta[tmax], where ρ(X)
1732
    # is the spectral radius of X
1733
    d = complex.(@view(A[diagind(A)]))
302✔
1734
    dm1 = d .- 1
604✔
1735
    s = 0
302✔
1736
    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
1,417✔
1737
        d .= sqrt.(d)
1,115✔
1738
        dm1 .= d .- 1
2,230✔
1739
        s = s + 1
1,115✔
1740
    end
1,115✔
1741
    s0 = s
302✔
1742

1743
    # Compute repeated roots
1744
    for k = 1:min(s, maxsqrt)
570✔
1745
        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
1,115✔
1746
    end
1,962✔
1747

1748
    # these three never needed at the same time, so reuse the same temporary
1749
    AmI = AmI4 = AmI5 = A - I
302✔
1750
    AmI2 = AmI * AmI
302✔
1751
    AmI3 = AmI2 * AmI
302✔
1752
    d2 = sqrt(opnorm(AmI2, 1))
302✔
1753
    d3 = cbrt(opnorm(AmI3, 1))
302✔
1754
    alpha2 = max(d2, d3)
302✔
1755
    foundm = false
302✔
1756
    if alpha2 <= theta[2]
302✔
1757
        m = alpha2 <= theta[1] ? 1 : 2
9✔
1758
        foundm = true
9✔
1759
    end
1760

1761
    while !foundm
648✔
1762
        more_sqrt = false
639✔
1763
        mul!(AmI4, AmI2, AmI2)
639✔
1764
        d4 = opnorm(AmI4, 1)^(1/4)
1,078✔
1765
        alpha3 = max(d3, d4)
639✔
1766
        if alpha3 <= theta[tmax]
639✔
1767
            local j
×
1768
            for outer j = 3:tmax
748✔
1769
                if alpha3 <= theta[j]
1,538✔
1770
                    break
374✔
1771
                end
1772
            end
1,164✔
1773
            if j <= 6
374✔
1774
                m = j
230✔
1775
                break
230✔
1776
            elseif alpha3 / 2 <= theta[5] && p < 2
144✔
1777
                more_sqrt = true
104✔
1778
                p = p + 1
104✔
1779
           end
1780
        end
1781

1782
        if !more_sqrt
409✔
1783
            mul!(AmI5, AmI3, AmI2)
305✔
1784
            d5 = opnorm(AmI5, 1)^(1/5)
530✔
1785
            alpha4 = max(d4, d5)
305✔
1786
            eta = min(alpha3, alpha4)
305✔
1787
            if eta <= theta[tmax]
305✔
1788
                j = 0
62✔
1789
                for outer j = 6:tmax
124✔
1790
                    if eta <= theta[j]
122✔
1791
                        m = j
62✔
1792
                        break
62✔
1793
                    end
1794
                end
60✔
1795
                break
62✔
1796
            end
1797
        end
1798

1799
        if s == maxsqrt
347✔
1800
            m = tmax
1✔
1801
            break
1✔
1802
        end
1803
        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
346✔
1804
        copyto!(AmI, A)
346✔
1805
        for i in 1:n
692✔
1806
            @inbounds AmI[i,i] -= 1
1,770✔
1807
        end
3,194✔
1808
        mul!(AmI2, AmI, AmI)
346✔
1809
        mul!(AmI3, AmI2, AmI)
346✔
1810
        d3 = cbrt(opnorm(AmI3, 1))
346✔
1811
        s = s + 1
346✔
1812
    end
346✔
1813
    return m, s
302✔
1814
end
1815

1816
# Compute accurate diagonal of A = A0^s - I
1817
function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
58✔
1818
    n = checksquare(A0)
58✔
1819
    T = eltype(A)
58✔
1820
    @inbounds for i = 1:n
116✔
1821
        a = complex(A0[i,i])
202✔
1822
        A[i,i] = _sqrt_pow(a, s)
202✔
1823
    end
202✔
1824
end
1825
# Compute accurate block diagonal of A = A0^s - I for upper quasi-triangular A0 produced
1826
# by the Schur decomposition. Diagonal is made of 1x1 and 2x2 blocks.
1827
# 2x2 blocks are real with non-negative conjugate pair eigenvalues
1828
function _sqrt_pow_diag_quasitriu!(A, A0, s)
302✔
1829
    n = checksquare(A0)
302✔
1830
    t = typeof(sqrt(zero(eltype(A))))
302✔
1831
    i = 1
302✔
1832
    @inbounds while i < n
302✔
1833
        if iszero(A0[i+1,i])  # 1x1 block
2,211✔
1834
            A[i,i] = _sqrt_pow(t(A0[i,i]), s)
1,101✔
1835
            i += 1
1,101✔
1836
        else  # real 2x2 block
1837
            @views _sqrt_pow_diag_block_2x2!(A[i:i+1,i:i+1], A0[i:i+1,i:i+1], s)
17✔
1838
            i += 2
17✔
1839
        end
1840
    end
1,118✔
1841
    if i == n  # last block is 1x1
302✔
1842
        @inbounds A[n,n] = _sqrt_pow(t(A0[n,n]), s)
295✔
1843
    end
1844
    return A
302✔
1845
end
1846
# compute a^(1/2^s)-1
1847
#   Al-Mohy, "A more accurate Briggs method for the logarithm",
1848
#      Numer. Algorithms, 59, (2012), 393–402.
1849
#   Algorithm 2
1850
function _sqrt_pow(a::Number, s)
1,598✔
1851
    T = typeof(sqrt(zero(a)))
1,598✔
1852
    s == 0 && return T(a) - 1
1,598✔
1853
    s0 = s
1,571✔
1854
    if imag(a) >= 0 && real(a) <= 0 && !iszero(a)  # angle(a) ≥ π / 2
1,571✔
1855
        a = sqrt(a)
141✔
1856
        s0 = s - 1
141✔
1857
    end
1858
    z0 = a - 1
1,571✔
1859
    a = sqrt(a)
1,571✔
1860
    r = 1 + a
1,571✔
1861
    for j = 1:s0-1
3,126✔
1862
        a = sqrt(a)
4,464✔
1863
        r = r * (1 + a)
4,464✔
1864
    end
7,373✔
1865
    return z0 / r
1,571✔
1866
end
1867
# compute A0 = A^(1/2^s)-I for 2x2 real matrices A and A0
1868
# A has non-negative conjugate pair eigenvalues
1869
# "Improved Inverse Scaling and Squaring Algorithms for the Matrix Logarithm"
1870
# SIAM J. Sci. Comput., 34(4), (2012) C153–C169. doi: 10.1137/110852553
1871
# Algorithm 5.1
1872
Base.@propagate_inbounds function _sqrt_pow_diag_block_2x2!(A, A0, s)
17✔
1873
    _sqrt_real_2x2!(A, A0)
17✔
1874
    if isone(s)
17✔
1875
        A[1,1] -= 1
×
1876
        A[2,2] -= 1
×
1877
    else
1878
        # Z = A - I
1879
        z11, z21, z12, z22 = A[1,1] - 1, A[2,1], A[1,2], A[2,2] - 1
17✔
1880
        # A = sqrt(A)
1881
        _sqrt_real_2x2!(A, A)
17✔
1882
        # P = A + I
1883
        p11, p21, p12, p22 = A[1,1] + 1, A[2,1], A[1,2], A[2,2] + 1
17✔
1884
        for i in 1:(s - 2)
34✔
1885
            # A = sqrt(A)
1886
            _sqrt_real_2x2!(A, A)
423✔
1887
            a11, a21, a12, a22 = A[1,1], A[2,1], A[1,2], A[2,2]
423✔
1888
            # P += P * A
1889
            r11 = p11*(1 + a11) + p12*a21
423✔
1890
            r22 = p21*a12 + p22*(1 + a22)
423✔
1891
            p21 = p21*(1 + a11) + p22*a21
423✔
1892
            p12 = p11*a12 + p12*(1 + a22)
423✔
1893
            p11 = r11
423✔
1894
            p22 = r22
423✔
1895
        end
829✔
1896
        # A = Z / P
1897
        c = inv(p11*p22 - p21*p12)
17✔
1898
        A[1,1] = (p22*z11 - p21*z12) * c
17✔
1899
        A[2,1] = (p22*z21 - p21*z22) * c
17✔
1900
        A[1,2] = (p11*z12 - p12*z11) * c
17✔
1901
        A[2,2] = (p11*z22 - p12*z21) * c
17✔
1902
    end
1903
    return A
17✔
1904
end
1905
# Compute accurate superdiagonal of A = A0^s - I for upper quasi-triangular A0 produced
1906
# by a Schur decomposition.
1907
# Higham and Lin, "A Schur–Padé Algorithm for Fractional Powers of a Matrix"
1908
# SIAM J. Matrix Anal. Appl., 32(3), (2011), 1056–1078.
1909
# Equation 5.6
1910
# see also blockpower for when A0 is upper triangular
1911
function _pow_superdiag_quasitriu!(A, A0, p)
302✔
1912
    n = checksquare(A0)
302✔
1913
    t = eltype(A)
302✔
1914
    k = 1
302✔
1915
    @inbounds while k < n
302✔
1916
        if !iszero(A[k+1,k])
2,204✔
1917
            k += 2
10✔
1918
            continue
10✔
1919
        end
1920
        if !(k == n - 1 || iszero(A[k+2,k+1]))
1,916✔
1921
            k += 3
7✔
1922
            continue
7✔
1923
        end
1924
        Ak = t(A0[k,k])
1,254✔
1925
        Akp1 = t(A0[k+1,k+1])
1,254✔
1926

1927
        Akp = Ak^p
1,094✔
1928
        Akp1p = Akp1^p
1,094✔
1929

1930
        if Ak == Akp1
1,094✔
1931
            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
285✔
1932
        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
1,602✔
1933
            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
427✔
1934
        else
1935
            logAk = log(Ak)
382✔
1936
            logAkp1 = log(Akp1)
382✔
1937
            z = (Akp1 - Ak)/(Akp1 + Ak)
382✔
1938
            if abs(z) > 1
495✔
1939
                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
75✔
1940
            else
1941
                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
403✔
1942
                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
518✔
1943
                A[k,k+1] = A0[k,k+1] * dd
307✔
1944
            end
1945
        end
1946
        k += 1
1,094✔
1947
    end
1,413✔
1948
end
1949

1950
# Compute accurate block diagonal and superdiagonal of A = log(A0) for upper
1951
# quasi-triangular A0 produced by the Schur decomposition.
1952
function _log_diag_quasitriu!(A, A0)
302✔
1953
    n = checksquare(A0)
302✔
1954
    t = eltype(A)
302✔
1955
    k = 1
302✔
1956
    @inbounds while k < n
302✔
1957
        if iszero(A0[k+1,k])  # 1x1 block
1,524✔
1958
            Ak = t(A0[k,k])
857✔
1959
            logAk = log(Ak)
757✔
1960
            A[k,k] = logAk
757✔
1961
            if k < n - 2 && iszero(A0[k+2,k+1])
1,100✔
1962
                Akp1 = t(A0[k+1,k+1])
404✔
1963
                logAkp1 = log(Akp1)
344✔
1964
                A[k+1,k+1] = logAkp1
344✔
1965
                if Ak == Akp1
344✔
1966
                    A[k,k+1] = A0[k,k+1] / Ak
103✔
1967
                elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
486✔
1968
                    A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
121✔
1969
                else
1970
                    z = (Akp1 - Ak)/(Akp1 + Ak)
120✔
1971
                    if abs(z) > 1
152✔
1972
                        A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
32✔
1973
                    else
1974
                        w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
128✔
1975
                        A[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
88✔
1976
                    end
1977
                end
1978
                k += 2
344✔
1979
            else
1980
                k += 1
1,170✔
1981
            end
1982
        else  # real 2x2 block
1983
            @views _log_diag_block_2x2!(A[k:k+1,k:k+1], A0[k:k+1,k:k+1])
17✔
1984
            k += 2
17✔
1985
        end
1986
    end
774✔
1987
    if k == n  # last 1x1 block
302✔
1988
        @inbounds A[n,n] = log(t(A0[n,n]))
315✔
1989
    end
1990
    return A
302✔
1991
end
1992
# compute A0 = log(A) for 2x2 real matrices A and A0, where A0 is a diagonal 2x2 block
1993
# produced by real Schur decomposition.
1994
# Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
1995
# logarithm and estimating the condition number", SIAM J. Sci. Comput.,
1996
# 35(4), (2013), C394–C410.
1997
# Eq. 6.1
1998
Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
17✔
1999
    a, b, c = A0[1,1], A0[1,2], A0[2,1]
17✔
2000
    # avoid underflow/overflow for large/small b and c
2001
    s = sqrt(abs(b)) * sqrt(abs(c))
17✔
2002
    θ = atan(s, a)
17✔
2003
    t = θ / s
17✔
2004
    au = abs(a)
17✔
2005
    if au > s
17✔
2006
        a1 = log1p((s / au)^2) / 2 + log(au)
7✔
2007
    else
2008
        a1 = log1p((au / s)^2) / 2 + log(s)
10✔
2009
    end
2010
    A[1,1] = a1
17✔
2011
    A[2,1] = c*t
17✔
2012
    A[1,2] = b*t
17✔
2013
    A[2,2] = a1
17✔
2014
    return A
17✔
2015
end
2016

2017
# Used only by powm at the moment
2018
# Repeatedly compute the square roots of A so that in the end its
2019
# eigenvalues are close enough to the positive real line
2020
function invsquaring(A0::UpperTriangular, theta)
58✔
2021
    require_one_based_indexing(theta)
58✔
2022
    # assumes theta is in ascending order
2023
    maxsqrt = 100
58✔
2024
    tmax = size(theta, 1)
58✔
2025
    n = checksquare(A0)
58✔
2026
    A = complex(copy(A0))
58✔
2027
    p = 0
58✔
2028
    m = 0
58✔
2029

2030
    # Compute repeated roots
2031
    d = complex(diag(A))
58✔
2032
    dm1 = d .- 1
116✔
2033
    s = 0
58✔
2034
    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
206✔
2035
        d .= sqrt.(d)
148✔
2036
        dm1 .= d .- 1
296✔
2037
        s = s + 1
148✔
2038
    end
148✔
2039
    s0 = s
58✔
2040
    for k = 1:min(s, maxsqrt)
102✔
2041
        A = sqrt(A)
148✔
2042
    end
252✔
2043

2044
    AmI = A - I
58✔
2045
    d2 = sqrt(opnorm(AmI^2, 1))
58✔
2046
    d3 = cbrt(opnorm(AmI^3, 1))
58✔
2047
    alpha2 = max(d2, d3)
58✔
2048
    foundm = false
58✔
2049
    if alpha2 <= theta[2]
58✔
2050
        m = alpha2 <= theta[1] ? 1 : 2
×
2051
        foundm = true
×
2052
    end
2053

2054
    while !foundm
186✔
2055
        more = false
186✔
2056
        if s > s0
186✔
2057
            d3 = cbrt(opnorm(AmI^3, 1))
114✔
2058
        end
2059
        d4 = opnorm(AmI^4, 1)^(1/4)
372✔
2060
        alpha3 = max(d3, d4)
186✔
2061
        if alpha3 <= theta[tmax]
186✔
2062
            local j
×
2063
            for outer j = 3:tmax
284✔
2064
                if alpha3 <= theta[j]
610✔
2065
                    break
142✔
2066
                elseif alpha3 / 2 <= theta[5] && p < 2
468✔
2067
                    more = true
116✔
2068
                    p = p + 1
116✔
2069
                end
2070
            end
468✔
2071
            if j <= 6
142✔
2072
                m = j
58✔
2073
                foundm = true
58✔
2074
                break
58✔
2075
            elseif alpha3 / 2 <= theta[5] && p < 2
84✔
2076
                more = true
×
2077
                p = p + 1
×
2078
           end
2079
        end
2080

2081
        if !more
128✔
2082
            d5 = opnorm(AmI^5, 1)^(1/5)
172✔
2083
            alpha4 = max(d4, d5)
86✔
2084
            eta = min(alpha3, alpha4)
86✔
2085
            if eta <= theta[tmax]
86✔
2086
                j = 0
56✔
2087
                for outer j = 6:tmax
112✔
2088
                    if eta <= theta[j]
56✔
2089
                        m = j
14✔
2090
                        break
14✔
2091
                    end
2092
                    break
42✔
2093
                end
×
2094
            end
2095
            if s == maxsqrt
86✔
2096
                m = tmax
×
2097
                break
×
2098
            end
2099
            A = sqrt(A)
86✔
2100
            AmI = A - I
86✔
2101
            s = s + 1
86✔
2102
        end
2103
    end
128✔
2104

2105
    # Compute accurate superdiagonal of T
2106
    p = 1 / 2^s
58✔
2107
    A = complex(A)
58✔
2108
    blockpower!(A, A0, p)
58✔
2109
    return A,m,s
58✔
2110
end
2111

2112
# Compute accurate diagonal and superdiagonal of A = A0^p
2113
function blockpower!(A::UpperTriangular, A0::UpperTriangular, p)
350✔
2114
    n = checksquare(A0)
350✔
2115
    @inbounds for k = 1:n-1
700✔
2116
        Ak = complex(A0[k,k])
924✔
2117
        Akp1 = complex(A0[k+1,k+1])
924✔
2118

2119
        Akp = Ak^p
924✔
2120
        Akp1p = Akp1^p
924✔
2121

2122
        A[k,k] = Akp
924✔
2123
        A[k+1,k+1] = Akp1p
924✔
2124

2125
        if Ak == Akp1
924✔
2126
            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
98✔
2127
        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
1,610✔
2128
            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
98✔
2129
        else
2130
            logAk = log(Ak)
728✔
2131
            logAkp1 = log(Akp1)
728✔
2132
            z = (Akp1 - Ak)/(Akp1 + Ak)
728✔
2133
            if abs(z) > 1
728✔
2134
                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
×
2135
            else
2136
                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
728✔
2137
                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
1,456✔
2138
                A[k,k+1] = A0[k,k+1] * dd
728✔
2139
            end
2140
        end
2141
    end
924✔
2142
end
2143

2144
# Unwinding number
2145
unw(x::Real) = 0
272✔
2146
unw(x::Number) = ceil((imag(x) - pi) / (2 * pi))
1,974✔
2147

2148
# compute A / B for upper quasi-triangular B, possibly overwriting B
2149
function rdiv_quasitriu!(A, B)
1,726✔
2150
    n = checksquare(A)
1,726✔
2151
    AG = copy(A)
1,726✔
2152
    # use Givens rotations to annihilate 2x2 blocks
2153
    @inbounds for k in 1:(n-1)
3,437✔
2154
        s = B[k+1,k]
12,975✔
2155
        iszero(s) && continue  # 1x1 block
6,585✔
2156
        G = first(givens(B[k+1,k+1], s, k, k+1))
85✔
2157
        rmul!(B, G)
400✔
2158
        rmul!(AG, G)
85✔
2159
    end
11,459✔
2160
    return rdiv!(AG, UpperTriangular(B))
1,726✔
2161
end
2162

2163
# End of auxiliary functions for matrix logarithm and matrix power
2164

2165
sqrt(A::UpperTriangular) = sqrt_quasitriu(A)
586✔
2166
function sqrt(A::UnitUpperTriangular{T}) where T
20✔
2167
    B = A.data
20✔
2168
    n = checksquare(B)
20✔
2169
    t = typeof(sqrt(zero(T)))
20✔
2170
    R = Matrix{t}(I, n, n)
20✔
2171
    tt = typeof(oneunit(t)*oneunit(t))
20✔
2172
    half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
22✔
2173
    @inbounds for j = 1:n
40✔
2174
        for i = j-1:-1:1
264✔
2175
            r::tt = B[i,j]
518✔
2176
            @simd for k = i+1:j-1
640✔
2177
                r -= R[i,k]*R[k,j]
1,180✔
2178
            end
×
2179
            iszero(r) || (R[i,j] = half*r)
1,036✔
2180
        end
914✔
2181
    end
264✔
2182
    return UnitUpperTriangular(R)
20✔
2183
end
2184
sqrt(A::LowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
8✔
2185
sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
8✔
2186

2187
# Auxiliary functions for matrix square root
2188

2189
# square root of upper triangular or real upper quasitriangular matrix
2190
function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
1,348✔
2191
    n = checksquare(A0)
674✔
2192
    T = eltype(A0)
674✔
2193
    Tr = typeof(sqrt(real(zero(T))))
674✔
2194
    Tc = typeof(sqrt(complex(zero(T))))
674✔
2195
    if isreal(A0)
2,137✔
2196
        is_sqrt_real = true
288✔
2197
        if istriu(A0)
288✔
2198
            for i in 1:n
408✔
2199
                Aii = real(A0[i,i])
602✔
2200
                if Aii < zero(Aii)
602✔
2201
                    is_sqrt_real = false
44✔
2202
                    break
44✔
2203
                end
2204
            end
558✔
2205
        end
2206
        if is_sqrt_real
288✔
2207
            R = zeros(Tr, n, n)
27,010✔
2208
            A = real(A0)
244✔
2209
        else
2210
            R = zeros(Tc, n, n)
363✔
2211
            A = A0
332✔
2212
        end
2213
    else
2214
        A = A0
386✔
2215
        R = zeros(Tc, n, n)
386✔
2216
    end
2217
    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
915✔
2218
    Rc = eltype(A0) <: Real ? R : complex(R)
769✔
2219
    if A0 isa UpperTriangular
674✔
2220
        return UpperTriangular(Rc)
586✔
2221
    elseif A0 isa UnitUpperTriangular
88✔
2222
        return UnitUpperTriangular(Rc)
×
2223
    else
2224
        return Rc
88✔
2225
    end
2226
end
2227

2228
# in-place recursive sqrt of upper quasi-triangular matrix A from
2229
# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
2230
# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
2231
# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
2232
function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
4,395✔
2233
    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
2,229✔
2234
        _sqrt_quasitriu_block!(R, A)
2,166✔
2235
    else  # compute blockwise recursion
2236
        split = div(n, 2)
31✔
2237
        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
39✔
2238
        r1 = 1:split
31✔
2239
        r2 = (split + 1):n
31✔
2240
        n1, n2 = split, n - split
31✔
2241
        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
31✔
2242
        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
31✔
2243
        # solve diagonal blocks recursively
2244
        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
31✔
2245
        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
31✔
2246
        # solve off-diagonal block
2247
        R12 .= .- A12
62✔
2248
        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
31✔
2249
    end
2250
    return R
2,197✔
2251
end
2252

2253
function _sqrt_quasitriu_block!(R, A)
2,166✔
2254
    _sqrt_quasitriu_diag_block!(R, A)
2,166✔
2255
    _sqrt_quasitriu_offdiag_block!(R, A)
2,166✔
2256
    return R
2,166✔
2257
end
2258

2259
function _sqrt_quasitriu_diag_block!(R, A)
2,166✔
2260
    n = size(R, 1)
2,166✔
2261
    ta = eltype(R) <: Complex ? complex(eltype(A)) : eltype(A)
2,166✔
2262
    i = 1
2,166✔
2263
    @inbounds while i < n
2,166✔
2264
        if iszero(A[i + 1, i])
13,919✔
2265
            R[i, i] = sqrt(ta(A[i, i]))
7,018✔
2266
            i += 1
7,018✔
2267
        else
2268
            # this branch is never reached when A is complex triangular
2269
            @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
736✔
2270
            i += 2
736✔
2271
        end
2272
    end
7,754✔
2273
    if i == n
2,166✔
2274
        R[n, n] = sqrt(ta(A[n, n]))
1,728✔
2275
    end
2276
    return R
2,166✔
2277
end
2278

2279
function _sqrt_quasitriu_offdiag_block!(R, A)
2,166✔
2280
    n = size(R, 1)
2,166✔
2281
    j = 1
2,166✔
2282
    @inbounds while j ≤ n
2,166✔
2283
        jsize_is_2 = j < n && !iszero(A[j + 1, j])
15,647✔
2284
        i = j - 1
9,482✔
2285
        while i > 0
76,103✔
2286
            isize_is_2 = i > 1 && !iszero(A[i, i - 1])
83,928✔
2287
            if isize_is_2
66,621✔
2288
                if jsize_is_2
831✔
2289
                    _sqrt_quasitriu_offdiag_block_2x2!(R, A, i - 1, j)
414✔
2290
                else
2291
                    _sqrt_quasitriu_offdiag_block_2x1!(R, A, i - 1, j)
417✔
2292
                end
2293
                i -= 2
831✔
2294
            else
2295
                if jsize_is_2
65,790✔
2296
                    _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
250✔
2297
                else
2298
                    _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
65,542✔
2299
                end
2300
                i -= 1
65,790✔
2301
            end
2302
        end
66,621✔
2303
        j += 2 - !jsize_is_2
9,482✔
2304
    end
9,482✔
2305
    return R
2,166✔
2306
end
2307

2308
# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
2309
# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
2310
# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
2311
Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
1,193✔
2312
    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
2313
    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
1,193✔
2314
    # avoid overflow/underflow of μ
2315
    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
2316
    μ = sqrt(abs(a12)) * sqrt(abs(a21))
1,193✔
2317
    α = _real_sqrt(θ, μ)
1,307✔
2318
    c = 2α
1,193✔
2319
    R[1, 1] = α
1,193✔
2320
    R[2, 1] = a21 / c
1,193✔
2321
    R[1, 2] = a12 / c
1,193✔
2322
    R[2, 2] = α
1,193✔
2323
    return R
1,193✔
2324
end
2325

2326
# real part of square root of θ+im*μ
2327
@inline function _real_sqrt(θ, μ)
1,193✔
2328
    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
1,278✔
2329
    return θ ≥ 0 ? t : μ / 2t
1,307✔
2330
end
2331

2332
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
65,540✔
2333
    Rii = R[i, i]
65,540✔
2334
    Rjj = R[j, j]
65,540✔
2335
    iszero(Rii) && iszero(Rjj) && return R
65,540✔
2336
    t = eltype(R)
65,539✔
2337
    tt = typeof(zero(t)*zero(t))
65,539✔
2338
    r = tt(-A[i, j])
65,539✔
2339
    @simd for k in (i + 1):(j - 1)
72,446✔
2340
        r += R[i, k] * R[k, j]
2,991,358✔
2341
    end
×
2342
    iszero(r) && return R
65,539✔
2343
    R[i, j] = sylvester(Rii, Rjj, r)
56,975✔
2344
    return R
56,975✔
2345
end
2346

2347
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
250✔
2348
    jrange = j:(j + 1)
250✔
2349
    t = eltype(R)
250✔
2350
    tt = typeof(zero(t)*zero(t))
250✔
2351
    r1 = tt(-A[i, j])
250✔
2352
    r2 = tt(-A[i, j + 1])
250✔
2353
    @simd for k in (i + 1):(j - 1)
360✔
2354
        rik = R[i, k]
476✔
2355
        r1 += rik * R[k, j]
476✔
2356
        r2 += rik * R[k, j + 1]
476✔
2357
    end
×
2358
    Rjj = @view R[jrange, jrange]
250✔
2359
    Rij = @view R[i, jrange]
250✔
2360
    Rij[1] = r1
250✔
2361
    Rij[2] = r2
250✔
2362
    _sylvester_1x2!(R[i, i], Rjj, Rij)
250✔
2363
    return R
250✔
2364
end
2365

2366
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x1!(R, A, i, j)
417✔
2367
    irange = i:(i + 1)
417✔
2368
    t = eltype(R)
417✔
2369
    tt = typeof(zero(t)*zero(t))
417✔
2370
    r1 = tt(-A[i, j])
417✔
2371
    r2 = tt(-A[i + 1, j])
417✔
2372
    @simd for k in (i + 2):(j - 1)
546✔
2373
        rkj = R[k, j]
1,003✔
2374
        r1 += R[i, k] * rkj
1,003✔
2375
        r2 += R[i + 1, k] * rkj
1,003✔
2376
    end
×
2377
    Rii = @view R[irange, irange]
417✔
2378
    Rij = @view R[irange, j]
417✔
2379
    Rij[1] = r1
417✔
2380
    Rij[2] = r2
417✔
2381
    @views _sylvester_2x1!(Rii, R[j, j], Rij)
417✔
2382
    return R
417✔
2383
end
2384

2385
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
414✔
2386
    irange = i:(i + 1)
414✔
2387
    jrange = j:(j + 1)
414✔
2388
    t = eltype(R)
414✔
2389
    tt = typeof(zero(t)*zero(t))
414✔
2390
    for i′ in irange, j′ in jrange
1,242✔
2391
        Cij = tt(-A[i′, j′])
1,656✔
2392
        @simd for k in (i + 2):(j - 1)
2,332✔
2393
            Cij += R[i′, k] * R[k, j′]
3,464✔
2394
        end
×
2395
        R[i′, j′] = Cij
1,656✔
2396
    end
2,070✔
2397
    Rii = @view R[irange, irange]
414✔
2398
    Rjj = @view R[jrange, jrange]
414✔
2399
    Rij = @view R[irange, jrange]
414✔
2400
    if !iszero(Rij) && !all(isnan, Rij)
414✔
2401
        _sylvester_2x2!(Rii, Rjj, Rij)
411✔
2402
    end
2403
    return R
414✔
2404
end
2405

2406
# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
2407
# A and B are no greater than blockwidth, based on Algorithm 1 from
2408
# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
2409
# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
2410
# 28(4), https://doi.org/10.1145/592843.592845.
2411
# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
2412
# computing one of the blocks.
2413
function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
1,154✔
2414
    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
539✔
2415
        _sylvester_quasitriu_base!(A, B, C; raise=raise)
398✔
2416
    elseif nA ≥ 2nB ≥ 2
141✔
2417
        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
12✔
2418
    elseif nB ≥ 2nA ≥ 2
129✔
2419
        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
12✔
2420
    else
2421
        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
117✔
2422
    end
2423
    return C
499✔
2424
end
2425
function _sylvester_quasitriu_base!(A, B, C; raise=true)
796✔
2426
    try
398✔
2427
        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
398✔
2428
        rmul!(C, -inv(scale))
398✔
2429
    catch e
2430
        if !(e isa LAPACKException) || raise
296✔
2431
            throw(e)
148✔
2432
        end
2433
    end
2434
    return C
382✔
2435
end
2436
function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
24✔
2437
    iA = div(nA, 2)
12✔
2438
    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
12✔
2439
    rA1, rA2 = 1:iA, (iA + 1):nA
12✔
2440
    nA1, nA2 = iA, nA-iA
12✔
2441
    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
12✔
2442
    C1, C2 = @views C[rA1,:], C[rA2,:]
12✔
2443
    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
24✔
2444
    mul!(C1, A12, C2, true, true)
8✔
2445
    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
16✔
2446
    return C
8✔
2447
end
2448
function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
24✔
2449
    iB = div(nB, 2)
12✔
2450
    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
12✔
2451
    rB1, rB2 = 1:iB, (iB + 1):nB
12✔
2452
    nB1, nB2 = iB, nB-iB
12✔
2453
    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
12✔
2454
    C1, C2 = @views C[:,rB1], C[:,rB2]
12✔
2455
    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
24✔
2456
    mul!(C2, C1, B12, true, true)
8✔
2457
    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
16✔
2458
    return C
8✔
2459
end
2460
function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
234✔
2461
    iA = div(nA, 2)
117✔
2462
    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
128✔
2463
    iB = div(nB, 2)
117✔
2464
    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
136✔
2465
    rA1, rA2 = 1:iA, (iA + 1):nA
117✔
2466
    nA1, nA2 = iA, nA-iA
117✔
2467
    rB1, rB2 = 1:iB, (iB + 1):nB
117✔
2468
    nB1, nB2 = iB, nB-iB
117✔
2469
    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
117✔
2470
    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
117✔
2471
    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
117✔
2472
    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
129✔
2473
    mul!(C11, A12, C21, true, true)
101✔
2474
    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
109✔
2475
    mul!(C22, C21, B12, true, true)
101✔
2476
    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
109✔
2477
    mul!(C12, A12, C22, true, true)
101✔
2478
    mul!(C12, C11, B12, true, true)
101✔
2479
    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
109✔
2480
    return C
101✔
2481
end
2482

2483
# End of auxiliary functions for matrix square root
2484

2485
# Generic eigensystems
2486
eigvals(A::AbstractTriangular) = diag(A)
100✔
2487
function eigvecs(A::AbstractTriangular{T}) where T
4✔
2488
    TT = promote_type(T, Float32)
4✔
2489
    if TT <: BlasFloat
4✔
2490
        return eigvecs(convert(AbstractMatrix{TT}, A))
4✔
2491
    else
2492
        throw(ArgumentError("eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
×
2493
    end
2494
end
2495
det(A::UnitUpperTriangular{T}) where {T} = one(T)
7✔
2496
det(A::UnitLowerTriangular{T}) where {T} = one(T)
7✔
2497
logdet(A::UnitUpperTriangular{T}) where {T} = zero(T)
7✔
2498
logdet(A::UnitLowerTriangular{T}) where {T} = zero(T)
7✔
2499
logabsdet(A::UnitUpperTriangular{T}) where {T} = zero(T), one(T)
7✔
2500
logabsdet(A::UnitLowerTriangular{T}) where {T} = zero(T), one(T)
7✔
2501
det(A::UpperTriangular) = prod(diag(A.data))
307✔
2502
det(A::LowerTriangular) = prod(diag(A.data))
7✔
2503
function logabsdet(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
104✔
2504
    sgn = one(T)
104✔
2505
    abs_det = zero(real(T))
104✔
2506
    @inbounds for i in 1:size(A,1)
208✔
2507
        diag_i = A.data[i,i]
664✔
2508
        sgn *= sign(diag_i)
804✔
2509
        abs_det += log(abs(diag_i))
775✔
2510
    end
1,224✔
2511
    return abs_det, sgn
104✔
2512
end
2513

2514
eigen(A::AbstractTriangular) = Eigen(eigvals(A), eigvecs(A))
20✔
2515

2516
# Generic singular systems
2517
for func in (:svd, :svd!, :svdvals)
2518
    @eval begin
2519
        ($func)(A::AbstractTriangular; kwargs...) = ($func)(copyto!(similar(parent(A)), A); kwargs...)
112✔
2520
    end
2521
end
2522

2523
factorize(A::AbstractTriangular) = A
28✔
2524

2525
# disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
2526
/(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
×
2527
/(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
×
2528
# disambiguation methods: /(Transpose of AbsVec, <:AbstractTriangular)
2529
/(u::TransposeAbsVec, A::Union{LowerTriangular,UpperTriangular}) = transpose(transpose(A) \ u.parent)
×
2530
/(u::TransposeAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = transpose(transpose(A) \ u.parent)
×
2531
# disambiguation methods: /(Transpose of AbsVec, Adj/Trans of <:AbstractTriangular)
2532
for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
2533
                               (:UnitLowerTriangular, :UnitUpperTriangular),
2534
                               (:UpperTriangular, :LowerTriangular),
2535
                               (:UnitUpperTriangular, :UnitLowerTriangular))
2536
    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
×
2537
    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
×
2538
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc