• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JuliaLang / julia / #37591

pending completion
#37591

push

local

web-flow
Allocation Profiler: Types for all allocations (#50337)

Pass the types to the allocator functions.

-------

Before this PR, we were missing the types for allocations in two cases:

1. allocations from codegen
2. allocations in `gc_managed_realloc_`

The second one is easy: those are always used for buffers, right?

For the first one: we extend the allocation functions called from
codegen, to take the type as a parameter, and set the tag there.

I kept the old interfaces around, since I think that they cannot be
removed due to supporting legacy code?

------

An example of the generated code:
```julia
  %ptls_field6 = getelementptr inbounds {}**, {}*** %4, i64 2
  %13 = bitcast {}*** %ptls_field6 to i8**
  %ptls_load78 = load i8*, i8** %13, align 8
  %box = call noalias nonnull dereferenceable(32) {}* @ijl_gc_pool_alloc_typed(i8* %ptls_load78, i32 1184, i32 32, i64 4366152144) #7
```

Fixes #43688.
Fixes #45268.

Co-authored-by: Valentin Churavy <vchuravy@users.noreply.github.com>

72755 of 84117 relevant lines covered (86.49%)

22738368.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.37
/stdlib/LinearAlgebra/src/triangular.jl
1
# This file is a part of Julia. License is MIT: https://julialang.org/license
2

3
## Triangular
4

5
# could be renamed to Triangular when that name has been fully deprecated
6
abstract type AbstractTriangular{T} <: AbstractMatrix{T} end
7

8
# First loop through all methods that don't need special care for upper/lower and unit diagonal
9
for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
10
    @eval begin
11
        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T}
12
            data::S
13

14
            function $t{T,S}(data) where {T,S<:AbstractMatrix{T}}
87,973✔
15
                require_one_based_indexing(data)
87,973✔
16
                checksquare(data)
87,974✔
17
                new{T,S}(data)
87,972✔
18
            end
19
        end
20
        $t(A::$t) = A
1,691✔
21
        $t{T}(A::$t{T}) where {T} = A
56✔
22
        function $t(A::AbstractMatrix)
87,939✔
23
            return $t{eltype(A), typeof(A)}(A)
87,939✔
24
        end
25
        function $t{T}(A::AbstractMatrix) where T
28✔
26
            $t(convert(AbstractMatrix{T}, A))
28✔
27
        end
28

29
        function $t{T}(A::$t) where T
3,610✔
30
            Anew = convert(AbstractMatrix{T}, A.data)
3,610✔
31
            $t(Anew)
3,370✔
32
        end
33
        Matrix(A::$t{T}) where {T} = Matrix{T}(A)
33,383✔
34

35
        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
3,582✔
36

37
        size(A::$t, d) = size(A.data, d)
110,725✔
38
        size(A::$t) = size(A.data)
318,885✔
39

40
        # For A<:AbstractTriangular, similar(A[, neweltype]) should yield a matrix with the same
41
        # triangular type and underlying storage type as A. The following method covers these cases.
42
        similar(A::$t, ::Type{T}) where {T} = $t(similar(parent(A), T))
1,246✔
43
        # On the other hand, similar(A, [neweltype,] shape...) should yield a matrix of the underlying
44
        # storage type of A (not wrapped in a triangular type). The following method covers these cases.
45
        similar(A::$t, ::Type{T}, dims::Dims{N}) where {T,N} = similar(parent(A), T, dims)
17,347✔
46

47
        copy(A::$t) = $t(copy(A.data))
4,926✔
48

49
        real(A::$t{<:Real}) = A
81✔
50
        real(A::$t{<:Complex}) = (B = real(A.data); $t(B))
710✔
51
    end
52
end
53

54
similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
55
    UpperTriangular(similar(parent(parent(A)), T))
56
similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
57
    UnitUpperTriangular(similar(parent(parent(A)), T))
58
similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
59
    LowerTriangular(similar(parent(parent(A)), T))
60
similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
×
61
    UnitLowerTriangular(similar(parent(parent(A)), T))
62

63

64
"""
65
    LowerTriangular(A::AbstractMatrix)
66

67
Construct a `LowerTriangular` view of the matrix `A`.
68

69
# Examples
70
```jldoctest
71
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
72
3×3 Matrix{Float64}:
73
 1.0  2.0  3.0
74
 4.0  5.0  6.0
75
 7.0  8.0  9.0
76

77
julia> LowerTriangular(A)
78
3×3 LowerTriangular{Float64, Matrix{Float64}}:
79
 1.0   ⋅    ⋅
80
 4.0  5.0   ⋅
81
 7.0  8.0  9.0
82
```
83
"""
84
LowerTriangular
85
"""
86
    UpperTriangular(A::AbstractMatrix)
87

88
Construct an `UpperTriangular` view of the matrix `A`.
89

90
# Examples
91
```jldoctest
92
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
93
3×3 Matrix{Float64}:
94
 1.0  2.0  3.0
95
 4.0  5.0  6.0
96
 7.0  8.0  9.0
97

98
julia> UpperTriangular(A)
99
3×3 UpperTriangular{Float64, Matrix{Float64}}:
100
 1.0  2.0  3.0
101
  ⋅   5.0  6.0
102
  ⋅    ⋅   9.0
103
```
104
"""
105
UpperTriangular
106
"""
107
    UnitLowerTriangular(A::AbstractMatrix)
108

109
Construct a `UnitLowerTriangular` view of the matrix `A`.
110
Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
111
of `A` on its diagonal.
112

113
# Examples
114
```jldoctest
115
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
116
3×3 Matrix{Float64}:
117
 1.0  2.0  3.0
118
 4.0  5.0  6.0
119
 7.0  8.0  9.0
120

121
julia> UnitLowerTriangular(A)
122
3×3 UnitLowerTriangular{Float64, Matrix{Float64}}:
123
 1.0   ⋅    ⋅
124
 4.0  1.0   ⋅
125
 7.0  8.0  1.0
126
```
127
"""
128
UnitLowerTriangular
129
"""
130
    UnitUpperTriangular(A::AbstractMatrix)
131

132
Construct an `UnitUpperTriangular` view of the matrix `A`.
133
Such a view has the [`oneunit`](@ref) of the [`eltype`](@ref)
134
of `A` on its diagonal.
135

136
# Examples
137
```jldoctest
138
julia> A = [1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0]
139
3×3 Matrix{Float64}:
140
 1.0  2.0  3.0
141
 4.0  5.0  6.0
142
 7.0  8.0  9.0
143

144
julia> UnitUpperTriangular(A)
145
3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
146
 1.0  2.0  3.0
147
  ⋅   1.0  6.0
148
  ⋅    ⋅   1.0
149
```
150
"""
151
UnitUpperTriangular
152

153
const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}}
154
const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
155
const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
156

157
imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
7✔
158
imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
7✔
159
imag(A::UnitLowerTriangular) = LowerTriangular(tril!(imag(A.data),-1))
7✔
160
imag(A::UnitUpperTriangular) = UpperTriangular(triu!(imag(A.data),1))
7✔
161

162
Array(A::AbstractTriangular) = Matrix(A)
1,399✔
163
parent(A::UpperOrLowerTriangular) = A.data
151,091✔
164

165
# then handle all methods that requires specific handling of upper/lower and unit diagonal
166

167
function Matrix{T}(A::LowerTriangular) where T
9,026✔
168
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
9,026✔
169
    copyto!(B, A.data)
9,299✔
170
    tril!(B)
9,026✔
171
    B
9,026✔
172
end
173
function Matrix{T}(A::UnitLowerTriangular) where T
7,827✔
174
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
7,827✔
175
    copyto!(B, A.data)
7,840✔
176
    tril!(B)
7,827✔
177
    for i = 1:size(B,1)
15,654✔
178
        B[i,i] = oneunit(T)
67,708✔
179
    end
127,589✔
180
    B
7,827✔
181
end
182
function Matrix{T}(A::UpperTriangular) where T
8,998✔
183
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
8,998✔
184
    copyto!(B, A.data)
9,276✔
185
    triu!(B)
8,998✔
186
    B
8,998✔
187
end
188
function Matrix{T}(A::UnitUpperTriangular) where T
8,239✔
189
    B = Matrix{T}(undef, size(A, 1), size(A, 1))
8,239✔
190
    copyto!(B, A.data)
8,252✔
191
    triu!(B)
8,239✔
192
    for i = 1:size(B,1)
16,472✔
193
        B[i,i] = oneunit(T)
69,637✔
194
    end
131,041✔
195
    B
8,239✔
196
end
197

198
function full!(A::LowerTriangular)
49✔
199
    B = A.data
49✔
200
    tril!(B)
49✔
201
    B
49✔
202
end
203
function full!(A::UnitLowerTriangular)
49✔
204
    B = A.data
49✔
205
    tril!(B)
49✔
206
    for i = 1:size(A,1)
98✔
207
        B[i,i] = oneunit(eltype(B))
441✔
208
    end
833✔
209
    B
49✔
210
end
211
function full!(A::UpperTriangular)
49✔
212
    B = A.data
49✔
213
    triu!(B)
49✔
214
    B
49✔
215
end
216
function full!(A::UnitUpperTriangular)
49✔
217
    B = A.data
49✔
218
    triu!(B)
49✔
219
    for i = 1:size(A,1)
98✔
220
        B[i,i] = oneunit(eltype(B))
441✔
221
    end
833✔
222
    B
49✔
223
end
224

225
Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) =
40,118✔
226
    i > j ? isassigned(A.data, i, j) : true
227
Base.isassigned(A::LowerTriangular, i::Int, j::Int) =
50✔
228
    i >= j ? isassigned(A.data, i, j) : true
229
Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) =
40,018✔
230
    i < j ? isassigned(A.data, i, j) : true
231
Base.isassigned(A::UpperTriangular, i::Int, j::Int) =
2,706✔
232
    i <= j ? isassigned(A.data, i, j) : true
233

234
Base.isstored(A::UnitLowerTriangular, i::Int, j::Int) =
×
235
    i > j ? Base.isstored(A.data, i, j) : false
236
Base.isstored(A::LowerTriangular, i::Int, j::Int) =
×
237
    i >= j ? Base.isstored(A.data, i, j) : false
238
Base.isstored(A::UnitUpperTriangular, i::Int, j::Int) =
×
239
    i < j ? Base.isstored(A.data, i, j) : false
240
Base.isstored(A::UpperTriangular, i::Int, j::Int) =
×
241
    i <= j ? Base.isstored(A.data, i, j) : false
242

243
getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
1,728,404✔
244
    i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
245
getindex(A::LowerTriangular, i::Integer, j::Integer) =
2,488,958✔
246
    i >= j ? A.data[i,j] : zero(A.data[j,i])
247
getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
1,771,255✔
248
    i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
249
getindex(A::UpperTriangular, i::Integer, j::Integer) =
3,256,956✔
250
    i <= j ? A.data[i,j] : zero(A.data[j,i])
251

252
function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
50,803✔
253
    if i > j
50,803✔
254
        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
2,434✔
255
            "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
256
    else
257
        A.data[i,j] = x
48,624✔
258
    end
259
    return A
50,548✔
260
end
261

262
function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
2,855✔
263
    if i > j
2,855✔
264
        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
1,204✔
265
            "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
266
    elseif i == j
1,903✔
267
        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
251✔
268
            "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
269
    else
270
        A.data[i,j] = x
1,652✔
271
    end
272
    return A
2,540✔
273
end
274

275
function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
5,353✔
276
    if i < j
5,353✔
277
        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
1,784✔
278
            "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
279
    else
280
        A.data[i,j] = x
3,823✔
281
    end
282
    return A
5,099✔
283
end
284

285
function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
2,855✔
286
    if i < j
2,855✔
287
        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
1,204✔
288
            "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
289
    elseif i == j
1,903✔
290
        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
251✔
291
            "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
292
    else
293
        A.data[i,j] = x
1,652✔
294
    end
295
    return A
2,540✔
296
end
297

298

299
## structured matrix methods ##
300
function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
21,362✔
301
                                      i::Integer, j::Integer, s::AbstractString)
302
    return i <= j ? s : Base.replace_with_centered_mark(s)
21,362✔
303
end
304
function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangular},
20,084✔
305
                                      i::Integer, j::Integer, s::AbstractString)
306
    return i >= j ? s : Base.replace_with_centered_mark(s)
20,084✔
307
end
308

309
function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
102✔
310
    k >= 0 && return true
102✔
311
    return _istril(A, k)
×
312
end
313
function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
953✔
314
    k <= 0 && return true
953✔
315
    return _istriu(A, k)
×
316
end
317
istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
6,602✔
318
istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
1,487✔
319
istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
6,613✔
320
istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
1,487✔
321

322
function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
35✔
323
    n = size(A,1)
35✔
324
    if k < 0
35✔
325
        fill!(A.data, zero(T))
1,134✔
326
        return A
14✔
327
    elseif k == 0
21✔
328
        for j in 1:n, i in 1:j-1
63✔
329
            A.data[i,j] = zero(T)
252✔
330
        end
308✔
331
        return A
7✔
332
    else
333
        return UpperTriangular(tril!(A.data,k))
14✔
334
    end
335
end
336
triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data, k))
186✔
337

338
function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
35✔
339
    n = size(A,1)
35✔
340
    if k < 0
35✔
341
        fill!(A.data, zero(T))
1,134✔
342
        return UpperTriangular(A.data)
14✔
343
    elseif k == 0
21✔
344
        fill!(A.data, zero(T))
567✔
345
        for i in diagind(A)
14✔
346
            A.data[i] = oneunit(T)
63✔
347
        end
119✔
348
        return UpperTriangular(A.data)
7✔
349
    else
350
        for i in diagind(A)
28✔
351
            A.data[i] = oneunit(T)
126✔
352
        end
238✔
353
        return UpperTriangular(tril!(A.data,k))
14✔
354
    end
355
end
356

357
function triu!(A::UnitUpperTriangular, k::Integer=0)
35✔
358
    for i in diagind(A)
70✔
359
        A.data[i] = oneunit(eltype(A))
315✔
360
    end
595✔
361
    return triu!(UpperTriangular(A.data), k)
35✔
362
end
363

364
function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
35✔
365
    n = size(A,1)
35✔
366
    if k > 0
35✔
367
        fill!(A.data, zero(T))
1,134✔
368
        return A
14✔
369
    elseif k == 0
21✔
370
        for j in 1:n, i in j+1:n
63✔
371
            A.data[i,j] = zero(T)
252✔
372
        end
308✔
373
        return A
7✔
374
    else
375
        return LowerTriangular(triu!(A.data, k))
14✔
376
    end
377
end
378

379
tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data, k))
70✔
380

381
function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
35✔
382
    n = size(A,1)
35✔
383
    if k > 0
35✔
384
        fill!(A.data, zero(T))
1,134✔
385
        return LowerTriangular(A.data)
14✔
386
    elseif k == 0
21✔
387
        fill!(A.data, zero(T))
567✔
388
        for i in diagind(A)
14✔
389
            A.data[i] = oneunit(T)
63✔
390
        end
119✔
391
        return LowerTriangular(A.data)
7✔
392
    else
393
        for i in diagind(A)
28✔
394
            A.data[i] = oneunit(T)
126✔
395
        end
238✔
396
        return LowerTriangular(triu!(A.data, k))
14✔
397
    end
398
end
399

400
function tril!(A::UnitLowerTriangular, k::Integer=0)
35✔
401
    for i in diagind(A)
70✔
402
        A.data[i] = oneunit(eltype(A))
315✔
403
    end
595✔
404
    return tril!(LowerTriangular(A.data), k)
35✔
405
end
406

407
adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
4,375✔
408
adjoint(A::UpperTriangular) = LowerTriangular(adjoint(A.data))
4,997✔
409
adjoint(A::UnitLowerTriangular) = UnitUpperTriangular(adjoint(A.data))
4,738✔
410
adjoint(A::UnitUpperTriangular) = UnitLowerTriangular(adjoint(A.data))
4,422✔
411
transpose(A::LowerTriangular) = UpperTriangular(transpose(A.data))
4,134✔
412
transpose(A::UpperTriangular) = LowerTriangular(transpose(A.data))
4,494✔
413
transpose(A::UnitLowerTriangular) = UnitUpperTriangular(transpose(A.data))
4,944✔
414
transpose(A::UnitUpperTriangular) = UnitLowerTriangular(transpose(A.data))
4,371✔
415

416
transpose!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L', false, true))
21✔
417
transpose!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L', false, true))
21✔
418
transpose!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U', false, true))
21✔
419
transpose!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U', false, true))
21✔
420
adjoint!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L' , true, true))
21✔
421
adjoint!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L' , true, true))
21✔
422
adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true))
21✔
423
adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
21✔
424

425
diag(A::LowerTriangular) = diag(A.data)
55✔
426
diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
159✔
427
diag(A::UpperTriangular) = diag(A.data)
218✔
428
diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
321✔
429

430
# Unary operations
431
-(A::LowerTriangular) = LowerTriangular(-A.data)
9✔
432
-(A::UpperTriangular) = UpperTriangular(-A.data)
162✔
433
function -(A::UnitLowerTriangular)
9✔
434
    Anew = -A.data
9✔
435
    for i = 1:size(A, 1)
18✔
436
        Anew[i, i] = -A[i, i]
126✔
437
    end
129✔
438
    LowerTriangular(Anew)
9✔
439
end
440
function -(A::UnitUpperTriangular)
9✔
441
    Anew = -A.data
9✔
442
    for i = 1:size(A, 1)
18✔
443
        Anew[i, i] = -A[i, i]
126✔
444
    end
129✔
445
    UpperTriangular(Anew)
9✔
446
end
447

448
tr(A::LowerTriangular) = tr(A.data)
7✔
449
tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
7✔
450
tr(A::UpperTriangular) = tr(A.data)
7✔
451
tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
7✔
452

453
# copy and scale
454
function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
1,641✔
455
    n = size(B,1)
1,641✔
456
    for j = 1:n
3,282✔
457
        for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
14,534✔
458
            @inbounds A[i,j] = B[i,j]
25,692✔
459
        end
44,131✔
460
    end
12,921✔
461
    return A
1,641✔
462
end
463
function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
56✔
464
    n = size(B,1)
56✔
465
    for j = 1:n
112✔
466
        for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
952✔
467
            @inbounds A[i,j] = B[i,j]
2,149✔
468
        end
3,836✔
469
    end
924✔
470
    return A
56✔
471
end
472

473
# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
474
# be permissive here and require compatibility later in _triscale!
475
@inline mul!(A::AbstractTriangular, B::AbstractTriangular, C::Number, alpha::Number, beta::Number) =
568✔
476
    _triscale!(A, B, C, MulAddMul(alpha, beta))
477
@inline mul!(A::AbstractTriangular, B::Number, C::AbstractTriangular, alpha::Number, beta::Number) =
14✔
478
    _triscale!(A, B, C, MulAddMul(alpha, beta))
479

480
function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
753✔
481
    n = checksquare(B)
753✔
482
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
753✔
483
    for j = 1:n
1,506✔
484
        for i = 1:j
5,862✔
485
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
8,683✔
486
        end
14,435✔
487
    end
5,109✔
488
    return A
753✔
489
end
490
function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
39✔
491
    n = checksquare(B)
39✔
492
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
39✔
493
    for j = 1:n
78✔
494
        for i = 1:j
766✔
495
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
2,075✔
496
        end
3,767✔
497
    end
727✔
498
    return A
39✔
499
end
500
function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
9✔
501
    n = checksquare(B)
9✔
502
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
9✔
503
    for j = 1:n
18✔
504
        @inbounds _modify!(_add, c, A, (j,j))
69✔
505
        for i = 1:(j - 1)
129✔
506
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
258✔
507
        end
456✔
508
    end
129✔
509
    return A
9✔
510
end
511
function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
7✔
512
    n = checksquare(B)
7✔
513
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
7✔
514
    for j = 1:n
14✔
515
        @inbounds _modify!(_add, c, A, (j,j))
63✔
516
        for i = 1:(j - 1)
119✔
517
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
252✔
518
        end
448✔
519
    end
119✔
520
    return A
7✔
521
end
522
function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
83✔
523
    n = checksquare(B)
83✔
524
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
83✔
525
    for j = 1:n
166✔
526
        for i = j:n
1,114✔
527
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
2,507✔
528
        end
4,457✔
529
    end
1,031✔
530
    return A
83✔
531
end
532
function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
39✔
533
    n = checksquare(B)
39✔
534
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
39✔
535
    for j = 1:n
78✔
536
        for i = j:n
766✔
537
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
2,075✔
538
        end
3,767✔
539
    end
727✔
540
    return A
39✔
541
end
542
function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
9✔
543
    n = checksquare(B)
9✔
544
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
9✔
545
    for j = 1:n
18✔
546
        @inbounds _modify!(_add, c, A, (j,j))
69✔
547
        for i = (j + 1):n
129✔
548
            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
258✔
549
        end
456✔
550
    end
129✔
551
    return A
9✔
552
end
553
function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
7✔
554
    n = checksquare(B)
7✔
555
    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
7✔
556
    for j = 1:n
14✔
557
        @inbounds _modify!(_add, c, A, (j,j))
63✔
558
        for i = (j + 1):n
119✔
559
            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
252✔
560
        end
448✔
561
    end
119✔
562
    return A
7✔
563
end
564

565
rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
286✔
566
lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
78✔
567

568
function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
42✔
569
    require_one_based_indexing(x, y)
42✔
570
    m = size(A, 1)
42✔
571
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
572
    if iszero(m)
42✔
573
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
574
    end
575
    x₁ = x[1]
42✔
576
    r = dot(x₁, A[1,1], y[1])
42✔
577
    @inbounds for j in 2:m
84✔
578
        yj = y[j]
336✔
579
        if !iszero(yj)
336✔
580
            temp = adjoint(A[1,j]) * x₁
336✔
581
            @simd for i in 2:j
336✔
582
                temp += adjoint(A[i,j]) * x[i]
1,512✔
583
            end
×
584
            r += dot(temp, yj)
336✔
585
        end
586
    end
630✔
587
    return r
42✔
588
end
589
function dot(x::AbstractVector, A::UnitUpperTriangular, y::AbstractVector)
42✔
590
    require_one_based_indexing(x, y)
42✔
591
    m = size(A, 1)
42✔
592
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
593
    if iszero(m)
42✔
594
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
595
    end
596
    x₁ = first(x)
42✔
597
    r = dot(x₁, y[1])
42✔
598
    @inbounds for j in 2:m
84✔
599
        yj = y[j]
336✔
600
        if !iszero(yj)
336✔
601
            temp = adjoint(A[1,j]) * x₁
336✔
602
            @simd for i in 2:j-1
378✔
603
                temp += adjoint(A[i,j]) * x[i]
1,176✔
604
            end
×
605
            r += dot(temp, yj)
420✔
606
            r += dot(x[j], yj)
336✔
607
        end
608
    end
630✔
609
    return r
42✔
610
end
611
function dot(x::AbstractVector, A::LowerTriangular, y::AbstractVector)
42✔
612
    require_one_based_indexing(x, y)
42✔
613
    m = size(A, 1)
42✔
614
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
615
    if iszero(m)
42✔
616
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
617
    end
618
    r = zero(typeof(dot(first(x), first(A), first(y))))
42✔
619
    @inbounds for j in 1:m
84✔
620
        yj = y[j]
378✔
621
        if !iszero(yj)
378✔
622
            temp = adjoint(A[j,j]) * x[j]
378✔
623
            @simd for i in j+1:m
420✔
624
                temp += adjoint(A[i,j]) * x[i]
1,512✔
625
            end
×
626
            r += dot(temp, yj)
378✔
627
        end
628
    end
714✔
629
    return r
42✔
630
end
631
function dot(x::AbstractVector, A::UnitLowerTriangular, y::AbstractVector)
42✔
632
    require_one_based_indexing(x, y)
42✔
633
    m = size(A, 1)
42✔
634
    (length(x) == m == length(y)) || throw(DimensionMismatch())
42✔
635
    if iszero(m)
42✔
636
        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
×
637
    end
638
    r = zero(typeof(dot(first(x), first(y))))
42✔
639
    @inbounds for j in 1:m
84✔
640
        yj = y[j]
378✔
641
        if !iszero(yj)
378✔
642
            temp = x[j]
378✔
643
            @simd for i in j+1:m
420✔
644
                temp += adjoint(A[i,j]) * x[i]
1,876✔
645
            end
×
646
            r += dot(temp, yj)
491✔
647
        end
648
    end
714✔
649
    return r
42✔
650
end
651

652
fillstored!(A::LowerTriangular, x)     = (fillband!(A.data, x, 1-size(A,1), 0); A)
2✔
653
fillstored!(A::UnitLowerTriangular, x) = (fillband!(A.data, x, 1-size(A,1), -1); A)
2✔
654
fillstored!(A::UpperTriangular, x)     = (fillband!(A.data, x, 0, size(A,2)-1); A)
2✔
655
fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1); A)
×
656

657
# Binary operations
658
+(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data + B.data)
171✔
659
+(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data + B.data)
63✔
660
+(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data + triu(B.data, 1) + I)
50✔
661
+(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data + tril(B.data, -1) + I)
50✔
662
+(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) + B.data + I)
50✔
663
+(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) + B.data + I)
49✔
664
+(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
49✔
665
+(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
49✔
666
+(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B)
406✔
667

668
-(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data - B.data)
221✔
669
-(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data - B.data)
212✔
670
-(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data - triu(B.data, 1) - I)
50✔
671
-(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data - tril(B.data, -1) - I)
49✔
672
-(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) - B.data + I)
50✔
673
-(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) - B.data + I)
49✔
674
-(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
57✔
675
-(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
56✔
676
-(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
396✔
677

678
######################
679
# BlasFloat routines #
680
######################
681

682
# which triangle to use of the underlying data
683
uplo_char(::UpperOrUnitUpperTriangular) = 'U'
19,662✔
684
uplo_char(::LowerOrUnitLowerTriangular) = 'L'
12,910✔
685
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:AdjOrTrans}) = 'L'
13,970✔
686
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:AdjOrTrans}) = 'U'
13,900✔
687
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'U'
87✔
688
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'L'
87✔
689
uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'U'
2✔
690
uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'L'
2✔
691

692
isunit_char(::UpperTriangular) = 'N'
21,623✔
693
isunit_char(::UnitUpperTriangular) = 'U'
12,098✔
694
isunit_char(::LowerTriangular) = 'N'
15,044✔
695
isunit_char(::UnitLowerTriangular) = 'U'
11,855✔
696

697
lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
168✔
698
mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _trimul!(C, A, B)
3,046✔
699
mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _trimul!(C, A, B)
6,113✔
700
mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _trimul!(C, A, B)
5,642✔
701
mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _trimul!(C, A, B)
12,206✔
702

703
# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
704
_trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) =
×
705
    lmul!(A, copyto!(C, B))
706
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) =
×
707
    lmul!(A, inplace_adj_or_trans(B)(C, _unwrap_at(B)))
708
_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
×
709
    rmul!(inplace_adj_or_trans(A)(C, _unwrap_at(A)), B)
710
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) =
×
711
    lmul!(A, copyto!(C, B))
712
# redirect for UpperOrLowerTriangular
713
_trimul!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVector) =
3,598✔
714
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
715
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractMatrix) =
6,401✔
716
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
717
_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
6,194✔
718
    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
719
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) =
12,254✔
720
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
721
# disambiguation with AbstractTriangular
722
_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractTriangular) =
×
723
    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
724
_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::UpperOrLowerTriangular) =
×
725
    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
726

727
lmul!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _trimul!(B, A, B)
792✔
728
rmul!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _trimul!(A, A, B)
552✔
729

730

731
for TC in (:AbstractVector, :AbstractMatrix)
732
    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
666✔
733
        if isone(alpha) && iszero(beta)
666✔
734
            return mul!(C, A, B)
257✔
735
        else
736
            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
409✔
737
        end
738
    end
739
end
740
for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
741
                    (:AbstractMatrix, :AbstractTriangular),
742
                    (:AbstractTriangular, :AbstractTriangular)
743
                )
744
    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
3,670✔
745
        if isone(alpha) && iszero(beta)
3,670✔
746
            return mul!(C, A, B)
1,666✔
747
        else
748
            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
2,004✔
749
        end
750
    end
751
end
752

753
ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = _ldiv!(C, A, B)
16,852✔
754
# generic fallback for AbstractTriangular, directs to 2-arg [l/r]div!
755
_ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
×
756
    ldiv!(A, inplace_adj_or_trans(B)(C, _unwrap_at(B)))
757
_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
×
758
    rdiv!(inplace_adj_or_trans(A)(C, _unwrap_at(A)), B)
759
# redirect for UpperOrLowerTriangular to generic_*div!
760
_ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) =
28,480✔
761
    generic_trimatdiv!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
762
_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
7,738✔
763
    generic_mattridiv!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
764

765
ldiv!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _ldiv!(B, A, B)
11,628✔
766
rdiv!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _rdiv!(A, A, B)
1,894✔
767

768
# preserve triangular structure in in-place multiplication/division
769
for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
770
                        (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
771
                        (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
772
                        (:UnitUpperTriangular, :UnitUpperTriangular, :UnitUpperTriangular),
773
                        (:LowerTriangular, :LowerTriangular, :LowerTriangular),
774
                        (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
775
                        (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
776
                        (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
777
    @eval begin
778
        function _trimul!(C::$cty, A::$aty, B::$bty)
1,320✔
779
            _trimul!(parent(C), A, B)
1,320✔
780
            return C
1,320✔
781
        end
782
        function _ldiv!(C::$cty, A::$aty, B::$bty)
72✔
783
            _ldiv!(parent(C), A, B)
72✔
784
            return C
72✔
785
        end
786
        function _rdiv!(C::$cty, A::$aty, B::$bty)
1,703✔
787
            _rdiv!(parent(C), A, B)
1,703✔
788
            return C
1,703✔
789
        end
790
    end
791
end
792

793
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
794
                            (:UnitLowerTriangular, 'L', 'U'),
795
                            (:UpperTriangular, 'U', 'N'),
796
                            (:UnitUpperTriangular, 'U', 'U'))
797
    @eval begin
798
        # Matrix inverse
799
        inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
16✔
800
            $t{T,S}(LAPACK.trtri!($uploc, $isunitc, A.data))
801

802
        function inv(A::$t{T}) where {T}
533✔
803
            S = typeof(inv(oneunit(T)))
533✔
804
            if S <: BlasFloat || S === T # i.e. A is unitless
533✔
805
                $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
464✔
806
            else
807
                J = (one(T)*I)(size(A, 1))
650✔
808
                $t(ldiv!(similar(A, S, size(A)), A, J))
69✔
809
            end
810
        end
811

812
        # Error bounds for triangular solve
813
        errorbounds(A::$t{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
160✔
814
            LAPACK.trrfs!($uploc, 'N', $isunitc, A.data, B, X)
815

816
        # Condition numbers
817
        function cond(A::$t{<:BlasFloat,<:StridedMatrix}, p::Real=2)
144✔
818
            checksquare(A)
144✔
819
            if p == 1
144✔
820
                return inv(LAPACK.trcon!('O', $uploc, $isunitc, A.data))
64✔
821
            elseif p == Inf
80✔
822
                return inv(LAPACK.trcon!('I', $uploc, $isunitc, A.data))
64✔
823
            else # use fallback
824
                return cond(copyto!(similar(parent(A)), A), p)
16✔
825
            end
826
        end
827
    end
828
end
829

830
# multiplication
831
generic_trimatmul!(c::StridedVector{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, b::AbstractVector{T}) where {T<:BlasFloat} =
1,064✔
832
    BLAS.trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b))
833
generic_trimatmul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
6,104✔
834
    BLAS.trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B))
835
generic_mattrimul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
1,720✔
836
    BLAS.trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
837
# division
838
generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
6,631✔
839
    LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B))
840
generic_mattridiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
2,956✔
841
    BLAS.trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
842

843
errorbounds(A::AbstractTriangular{T}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
×
844
    error("not implemented yet! Please submit a pull request.")
845
function errorbounds(A::AbstractTriangular{TA}, X::AbstractVecOrMat{TX}, B::AbstractVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
128✔
846
    TAXB = promote_type(TA, TB, TX, Float32)
128✔
847
    errorbounds(convert(AbstractMatrix{TAXB}, A), convert(AbstractArray{TAXB}, X), convert(AbstractArray{TAXB}, B))
128✔
848
end
849

850
# Eigensystems
851
## Notice that trecv works for quasi-triangular matrices and therefore the lower sub diagonal must be zeroed before calling the subroutine
852
function eigvecs(A::UpperTriangular{<:BlasFloat,<:StridedMatrix})
5✔
853
    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
5✔
854
end
855
function eigvecs(A::UnitUpperTriangular{<:BlasFloat,<:StridedMatrix})
5✔
856
    for i = 1:size(A, 1)
10✔
857
        A.data[i,i] = 1
45✔
858
    end
85✔
859
    LAPACK.trevc!('R', 'A', BlasInt[], triu!(A.data))
5✔
860
end
861
function eigvecs(A::LowerTriangular{<:BlasFloat,<:StridedMatrix})
5✔
862
    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
5✔
863
end
864
function eigvecs(A::UnitLowerTriangular{<:BlasFloat,<:StridedMatrix})
5✔
865
    for i = 1:size(A, 1)
10✔
866
        A.data[i,i] = 1
45✔
867
    end
85✔
868
    LAPACK.trevc!('L', 'A', BlasInt[], copy(tril!(A.data)'))
5✔
869
end
870

871
####################
872
# Generic routines #
873
####################
874

875
for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
876
                   (LowerTriangular, UnitLowerTriangular))
877
    @eval begin
878
        (*)(A::$t, x::Number) = $t(A.data*x)
36✔
879

880
        function (*)(A::$unitt, x::Number)
20✔
881
            B = A.data*x
20✔
882
            for i = 1:size(A, 1)
40✔
883
                B[i,i] = x
180✔
884
            end
340✔
885
            $t(B)
20✔
886
        end
887

888
        (*)(x::Number, A::$t) = $t(x*A.data)
640✔
889

890
        function (*)(x::Number, A::$unitt)
36✔
891
            B = x*A.data
36✔
892
            for i = 1:size(A, 1)
72✔
893
                B[i,i] = x
324✔
894
            end
612✔
895
            $t(B)
36✔
896
        end
897

898
        (/)(A::$t, x::Number) = $t(A.data/x)
58✔
899

900
        function (/)(A::$unitt, x::Number)
14✔
901
            B = A.data/x
14✔
902
            invx = inv(x)
14✔
903
            for i = 1:size(A, 1)
28✔
904
                B[i,i] = invx
126✔
905
            end
238✔
906
            $t(B)
14✔
907
        end
908

909
        (\)(x::Number, A::$t) = $t(x\A.data)
14✔
910

911
        function (\)(x::Number, A::$unitt)
14✔
912
            B = x\A.data
14✔
913
            invx = inv(x)
14✔
914
            for i = 1:size(A, 1)
28✔
915
                B[i,i] = invx
126✔
916
            end
238✔
917
            $t(B)
14✔
918
        end
919
    end
920
end
921

922
## Generic triangular multiplication
923
function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
14,907✔
924
    require_one_based_indexing(C, A, B)
14,907✔
925
    m, n = size(B, 1), size(B, 2)
14,907✔
926
    N = size(A, 1)
14,907✔
927
    if m != N
14,907✔
928
        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
2,472✔
929
    end
930
    mc, nc = size(C, 1), size(C, 2)
12,435✔
931
    if mc != N || nc != n
24,870✔
932
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
×
933
    end
934
    oA = oneunit(eltype(A))
12,435✔
935
    unit = isunitc == 'U'
12,435✔
936
    @inbounds if uploc == 'U'
12,435✔
937
        if tfun === identity
6,332✔
938
            for j in 1:n
5,871✔
939
                for i in 1:m
36,700✔
940
                    Cij = (unit ? oA : A[i,i]) * B[i,j]
238,795✔
941
                    for k in i + 1:m
300,668✔
942
                        Cij += A[i,k] * B[k,j]
852,583✔
943
                    end
1,120,383✔
944
                    C[i,j] = Cij
159,509✔
945
                end
300,668✔
946
            end
33,754✔
947
        else # tfun in (transpose, adjoint)
948
            for j in 1:n
6,751✔
949
                for i in m:-1:1
48,826✔
950
                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
318,559✔
951
                    for k in 1:i - 1
401,991✔
952
                        Cij += tfun(A[k,i]) * B[k,j]
1,168,211✔
953
                    end
1,490,395✔
954
                    C[i,j] = Cij
213,202✔
955
                end
401,991✔
956
            end
48,386✔
957
        end
958
    else # uploc == 'L'
959
        if tfun === identity
6,103✔
960
            for j in 1:n
5,173✔
961
                for i in m:-1:1
34,626✔
962
                    Cij = (unit ? oA : A[i,i]) * B[i,j]
224,263✔
963
                    for k in 1:i - 1
282,015✔
964
                        Cij += A[i,k] * B[k,j]
795,366✔
965
                    end
1,046,075✔
966
                    C[i,j] = Cij
149,664✔
967
                end
282,015✔
968
            end
17,313✔
969
        else # tfun in (transpose, adjoint)
970
            for j in 1:n
6,993✔
971
                for i in 1:m
50,102✔
972
                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
323,499✔
973
                    for k in i + 1:m
406,469✔
974
                        Cij += tfun(A[k,i]) * B[k,j]
1,175,832✔
975
                    end
1,493,461✔
976
                    C[i,j] = Cij
215,760✔
977
                end
406,469✔
978
            end
25,051✔
979
        end
980
    end
981
    return C
12,435✔
982
end
983
# conjugate cases
984
function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
14✔
985
    A = parent(xA)
14✔
986
    require_one_based_indexing(C, A, B)
14✔
987
    m, n = size(B, 1), size(B, 2)
14✔
988
    N = size(A, 1)
14✔
989
    if m != N
14✔
990
        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
×
991
    end
992
    mc, nc = size(C, 1), size(C, 2)
14✔
993
    if mc != N || nc != n
28✔
994
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
×
995
    end
996
    oA = oneunit(eltype(A))
14✔
997
    unit = isunitc == 'U'
14✔
998
    @inbounds if uploc == 'U'
14✔
999
        for j in 1:n
11✔
1000
            for i in 1:m
14✔
1001
                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
61✔
1002
                for k in i + 1:m
73✔
1003
                    Cij += conj(A[i,k]) * B[k,j]
199✔
1004
                end
261✔
1005
                C[i,j] = Cij
40✔
1006
            end
73✔
1007
        end
7✔
1008
    else # uploc == 'L'
1009
        for j in 1:n
11✔
1010
            for i in m:-1:1
14✔
1011
                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
59✔
1012
                for k in 1:i - 1
73✔
1013
                    Cij += conj(A[i,k]) * B[k,j]
178✔
1014
                end
261✔
1015
                C[i,j] = Cij
40✔
1016
            end
73✔
1017
        end
7✔
1018
    end
1019
    return C
14✔
1020
end
1021

1022
function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
4,474✔
1023
    require_one_based_indexing(C, A, B)
4,474✔
1024
    m, n = size(A, 1), size(A, 2)
4,474✔
1025
    N = size(B, 1)
4,474✔
1026
    if n != N
4,474✔
1027
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
2,472✔
1028
    end
1029
    mc, nc = size(C, 1), size(C, 2)
2,002✔
1030
    if mc != m || nc != N
4,004✔
1031
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
×
1032
    end
1033
    oB = oneunit(eltype(B))
2,002✔
1034
    unit = isunitc == 'U'
2,002✔
1035
    @inbounds if uploc == 'U'
2,002✔
1036
        if tfun === identity
952✔
1037
            for i in 1:m
852✔
1038
                for j in n:-1:1
7,188✔
1039
                    Cij = A[i,j] * (unit ? oB : B[j,j])
49,462✔
1040
                    for k in 1:j - 1
61,102✔
1041
                        Cij += A[i,k] * B[k,j]
135,742✔
1042
                    end
237,866✔
1043
                    C[i,j] = Cij
32,348✔
1044
                end
61,102✔
1045
            end
6,762✔
1046
        else # tfun in (transpose, adjoint)
1047
            for i in 1:m
1,052✔
1048
                for j in 1:n
8,956✔
1049
                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
59,332✔
1050
                    for k in j + 1:n
73,646✔
1051
                        Cij += A[i,k] * tfun(B[j,k])
156,072✔
1052
                    end
271,872✔
1053
                    C[i,j] = Cij
39,062✔
1054
                end
73,646✔
1055
            end
8,856✔
1056
        end
1057
    else # uploc == 'L'
1058
        if tfun === identity
1,050✔
1059
            for i in 1:m
848✔
1060
                for j in 1:n
7,184✔
1061
                    Cij = A[i,j] * (unit ? oB : B[j,j])
47,302✔
1062
                    for k in j + 1:n
59,944✔
1063
                        Cij += A[i,k] * B[k,j]
132,672✔
1064
                    end
228,768✔
1065
                    C[i,j] = Cij
31,768✔
1066
                end
59,944✔
1067
            end
3,592✔
1068
        else # tfun in (transpose, adjoint)
1069
            for i in 1:m
1,252✔
1070
                for j in n:-1:1
9,972✔
1071
                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
61,776✔
1072
                    for k in 1:j - 1
78,282✔
1073
                        Cij += A[i,k] * tfun(B[j,k])
166,536✔
1074
                    end
280,224✔
1075
                    C[i,j] = Cij
41,634✔
1076
                end
78,282✔
1077
            end
4,986✔
1078
        end
1079
    end
1080
    return C
2,002✔
1081
end
1082
# conjugate cases
1083
function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
×
1084
    B = parent(xB)
×
1085
    require_one_based_indexing(C, A, B)
×
1086
    m, n = size(A, 1), size(A, 2)
×
1087
    N = size(B, 1)
×
1088
    if n != N
×
1089
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
×
1090
    end
1091
    mc, nc = size(C, 1), size(C, 2)
×
1092
    if mc != m || nc != N
×
1093
        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
×
1094
    end
1095
    oB = oneunit(eltype(B))
×
1096
    unit = isunitc == 'U'
×
1097
    @inbounds if uploc == 'U'
×
1098
        for i in 1:m
×
1099
            for j in n:-1:1
×
1100
                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
×
1101
                for k in 1:j - 1
×
1102
                    Cij += A[i,k] * conj(B[k,j])
×
1103
                end
×
1104
                C[i,j] = Cij
×
1105
            end
×
1106
        end
×
1107
    else # uploc == 'L'
1108
        for i in 1:m
×
1109
            for j in 1:n
×
1110
                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
×
1111
                for k in j + 1:n
×
1112
                    Cij += A[i,k] * conj(B[k,j])
×
1113
                end
×
1114
                C[i,j] = Cij
×
1115
            end
×
1116
        end
×
1117
    end
1118
    return C
×
1119
end
1120

1121
#Generic solver using naive substitution
1122

1123
@inline _ustrip(a) = oneunit(a) \ a
284,264✔
1124
@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
2,248,293✔
1125

1126
# manually hoisting b[j] significantly improves performance as of Dec 2015
1127
# manually eliding bounds checking significantly improves performance as of Dec 2015
1128
# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
1129
# does not significantly impact performance as of Dec 2015
1130
# in the transpose and conjugate transpose naive substitution variants,
1131
# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
1132
function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
15,909✔
1133
    require_one_based_indexing(C, A, B)
15,909✔
1134
    mA, nA = size(A)
15,909✔
1135
    m, n = size(B, 1), size(B,2)
15,909✔
1136
    if nA != m
15,909✔
1137
        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
228✔
1138
    end
1139
    if size(C) != size(B)
31,362✔
1140
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
×
1141
    end
1142
    oA = oneunit(eltype(A))
15,681✔
1143
    @inbounds if uploc == 'U'
15,681✔
1144
        if isunitc == 'N'
8,046✔
1145
            if tfun === identity
5,702✔
1146
                for k in 1:n
5,771✔
1147
                    amm = A[m,m]
19,762✔
1148
                    iszero(amm) && throw(SingularException(m))
19,693✔
1149
                    Cm = C[m,k] = amm \ B[m,k]
23,407✔
1150
                    # fill C-column
1151
                    for i in m-1:-1:1
39,309✔
1152
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
194,152✔
1153
                    end
312,106✔
1154
                    for j in m-1:-1:1
39,309✔
1155
                        ajj = A[j,j]
166,501✔
1156
                        iszero(ajj) && throw(SingularException(j))
165,880✔
1157
                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
166,020✔
1158
                        for i in j-1:-1:1
312,106✔
1159
                            C[i,k] -= _ustrip(A[i,j]) * Cj
706,840✔
1160
                        end
1,265,908✔
1161
                    end
312,106✔
1162
                end
36,287✔
1163
            else # tfun in (adjoint, transpose)
1164
                for k in 1:n
4,986✔
1165
                    for j in 1:m
28,024✔
1166
                        ajj = A[j,j]
130,532✔
1167
                        iszero(ajj) && throw(SingularException(j))
129,152✔
1168
                        Bj = B[j,k]
141,298✔
1169
                        for i in 1:j-1
244,292✔
1170
                            Bj -= tfun(A[i,j]) * C[i,k]
660,670✔
1171
                        end
952,756✔
1172
                        C[j,k] = tfun(ajj) \ Bj
160,229✔
1173
                    end
244,292✔
1174
                end
28,406✔
1175
            end
1176
        else # isunitc == 'U'
1177
            if tfun === identity
2,344✔
1178
                for k in 1:n
2,026✔
1179
                    Cm = C[m,k] = oA \ B[m,k]
7,549✔
1180
                    # fill C-column
1181
                    for i in m-1:-1:1
11,102✔
1182
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
59,628✔
1183
                    end
86,615✔
1184
                    for j in m-1:-1:1
11,102✔
1185
                        Cj = C[j,k]
46,253✔
1186
                        for i in 1:j-1
86,615✔
1187
                            C[i,k] -= _ustrip(A[i,j]) * Cj
169,796✔
1188
                        end
297,292✔
1189
                    end
86,615✔
1190
                end
5,551✔
1191
            else # tfun in (adjoint, transpose)
1192
                for k in 1:n
1,888✔
1193
                    for j in 1:m
4,844✔
1194
                        Bj = B[j,k]
23,228✔
1195
                        for i in 1:j-1
42,098✔
1196
                            Bj -= tfun(A[i,j]) * C[i,k]
113,610✔
1197
                        end
164,342✔
1198
                        C[j,k] = oA \ Bj
27,924✔
1199
                    end
42,098✔
1200
                end
10,430✔
1201
            end
1202
        end
1203
    else # uploc == 'L'
1204
        if isunitc == 'N'
7,635✔
1205
            if tfun === identity
4,731✔
1206
                for k in 1:n
4,504✔
1207
                    a11 = A[1,1]
15,860✔
1208
                    iszero(a11) && throw(SingularException(1))
15,791✔
1209
                    C1 = C[1,k] = a11 \ B[1,k]
19,853✔
1210
                    # fill C-column
1211
                    for i in 2:m
31,506✔
1212
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
157,316✔
1213
                    end
242,535✔
1214
                    for j in 2:m
31,506✔
1215
                        ajj = A[j,j]
129,765✔
1216
                        iszero(ajj) && throw(SingularException(j))
129,144✔
1217
                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
129,242✔
1218
                        for i in j+1:m
242,535✔
1219
                            C[i,k] -= _ustrip(A[i,j]) * Cj
468,946✔
1220
                        end
823,181✔
1221
                    end
242,535✔
1222
                end
29,119✔
1223
            else # tfun in (adjoint, transpose)
1224
                for k in 1:n
4,320✔
1225
                    for j in m:-1:1
25,506✔
1226
                        ajj = A[j,j]
118,952✔
1227
                        iszero(ajj) && throw(SingularException(j))
117,572✔
1228
                        Bj = B[j,k]
129,743✔
1229
                        for i in j+1:m
222,391✔
1230
                            Bj -= tfun(A[i,j]) * C[i,k]
608,481✔
1231
                        end
863,907✔
1232
                        C[j,k] = tfun(ajj) \ Bj
148,372✔
1233
                    end
222,391✔
1234
                end
15,140✔
1235
            end
1236
        else # isunitc == 'U'
1237
            if tfun === identity
2,904✔
1238
                for k in 1:n
2,601✔
1239
                    C1 = C[1,k] = oA \ B[1,k]
9,222✔
1240
                    # fill C-column
1241
                    for i in 2:m
14,422✔
1242
                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
76,927✔
1243
                    end
119,723✔
1244
                    for j in 2:m
14,422✔
1245
                        Cj = C[j,k]
63,586✔
1246
                        for i in j+1:m
119,723✔
1247
                            C[i,k] -= _ustrip(A[i,j]) * Cj
334,552✔
1248
                        end
611,318✔
1249
                    end
119,723✔
1250
                end
7,211✔
1251
            else # tfun in (adjoint, transpose)
1252
                for k in 1:n
2,424✔
1253
                    for j in m:-1:1
6,366✔
1254
                        Bj = B[j,k]
30,398✔
1255
                        for i in j+1:m
55,677✔
1256
                            Bj -= tfun(A[i,j]) * C[i,k]
144,819✔
1257
                        end
220,263✔
1258
                        C[j,k] = oA \ Bj
35,451✔
1259
                    end
55,677✔
1260
                end
3,183✔
1261
            end
1262
        end
1263
    end
1264
    return C
15,605✔
1265
end
1266
# conjugate cases
1267
function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
164✔
1268
    A = parent(xA)
164✔
1269
    require_one_based_indexing(C, A, B)
164✔
1270
    mA, nA = size(A)
164✔
1271
    m, n = size(B, 1), size(B,2)
164✔
1272
    if nA != m
164✔
1273
        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
×
1274
    end
1275
    if size(C) != size(B)
328✔
1276
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
×
1277
    end
1278
    oA = oneunit(eltype(A))
164✔
1279
    @inbounds if uploc == 'U'
164✔
1280
        if isunitc == 'N'
82✔
1281
            for k in 1:n
164✔
1282
                amm = conj(A[m,m])
742✔
1283
                iszero(amm) && throw(SingularException(m))
742✔
1284
                Cm = C[m,k] = amm \ B[m,k]
778✔
1285
                # fill C-column
1286
                for i in m-1:-1:1
1,484✔
1287
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
5,976✔
1288
                end
11,210✔
1289
                for j in m-1:-1:1
1,484✔
1290
                    ajj = conj(A[j,j])
5,976✔
1291
                    iszero(ajj) && throw(SingularException(j))
5,976✔
1292
                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
5,976✔
1293
                    for i in j-1:-1:1
11,210✔
1294
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
21,096✔
1295
                    end
36,958✔
1296
                end
11,210✔
1297
            end
1,402✔
1298
        else # isunitc == 'U'
1299
            for k in 1:n
×
1300
                Cm = C[m,k] = oA \ B[m,k]
×
1301
                # fill C-column
1302
                for i in m-1:-1:1
×
1303
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
×
1304
                end
×
1305
                for j in m-1:-1:1
×
1306
                    Cj = C[j,k]
×
1307
                    for i in 1:j-1
×
1308
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
×
1309
                    end
×
1310
                end
×
1311
            end
82✔
1312
        end
1313
    else # uploc == 'L'
1314
        if isunitc == 'N'
82✔
1315
            for k in 1:n
164✔
1316
                a11 = conj(A[1,1])
742✔
1317
                iszero(a11) && throw(SingularException(1))
742✔
1318
                C1 = C[1,k] = a11 \ B[1,k]
788✔
1319
                # fill C-column
1320
                for i in 2:m
1,484✔
1321
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
5,976✔
1322
                end
11,210✔
1323
                for j in 2:m
1,484✔
1324
                    ajj = conj(A[j,j])
5,976✔
1325
                    iszero(ajj) && throw(SingularException(j))
5,976✔
1326
                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
5,976✔
1327
                    for i in j+1:m
11,210✔
1328
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
21,096✔
1329
                    end
36,958✔
1330
                end
11,210✔
1331
            end
742✔
1332
        else # isunitc == 'U'
1333
            for k in 1:n
×
1334
                C1 = C[1,k] = oA \ B[1,k]
×
1335
                # fill C-column
1336
                for i in 2:m
×
1337
                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
×
1338
                end
×
1339
                for j in 1:m
×
1340
                    Cj = C[j,k]
×
1341
                    for i in j+1:m
×
1342
                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
×
1343
                    end
×
1344
                end
×
1345
            end
×
1346
        end
1347
    end
1348
    return C
164✔
1349
end
1350

1351
function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
4,782✔
1352
    require_one_based_indexing(C, A, B)
4,782✔
1353
    m, n = size(A)
4,782✔
1354
    if size(B, 1) != n
4,782✔
1355
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
2,028✔
1356
    end
1357
    if size(C) != size(A)
5,508✔
1358
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
×
1359
    end
1360
    oB = oneunit(eltype(B))
2,754✔
1361
    unit = isunitc == 'U'
2,754✔
1362
    @inbounds if uploc == 'U'
2,754✔
1363
        if tfun === identity
1,381✔
1364
            for i in 1:m
2,078✔
1365
                for j in 1:n
19,202✔
1366
                    Aij = A[i,j]
119,700✔
1367
                    for k in 1:j - 1
168,257✔
1368
                        Aij -= C[i,k]*B[k,j]
462,412✔
1369
                    end
657,404✔
1370
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
134,425✔
1371
                    C[i,j] = Aij / (unit ? oB : B[j,j])
132,963✔
1372
                end
168,257✔
1373
            end
18,163✔
1374
        else # tfun in (adjoint, transpose)
1375
            for i in 1:m
684✔
1376
                for j in n:-1:1
6,200✔
1377
                    Aij = A[i,j]
28,696✔
1378
                    for k in j + 1:n
53,140✔
1379
                        Aij -= C[i,k]*tfun(B[j,k])
139,788✔
1380
                    end
202,140✔
1381
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
43,280✔
1382
                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
44,216✔
1383
                end
53,140✔
1384
            end
6,897✔
1385
        end
1386
    else # uploc == 'L'
1387
        if tfun === identity
1,373✔
1388
            for i in 1:m
2,062✔
1389
                for j in n:-1:1
19,042✔
1390
                    Aij = A[i,j]
118,756✔
1391
                    for k in j + 1:n
166,737✔
1392
                        Aij -= C[i,k]*B[k,j]
458,812✔
1393
                    end
650,924✔
1394
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
133,225✔
1395
                    C[i,j] = Aij / (unit ? oB : B[j,j])
131,763✔
1396
                end
166,737✔
1397
            end
9,521✔
1398
        else # tfun in (adjoint, transpose)
1399
            for i in 1:m
684✔
1400
                for j in 1:n
6,200✔
1401
                    Aij = A[i,j]
28,696✔
1402
                    for k in 1:j - 1
53,140✔
1403
                        Aij -= C[i,k]*tfun(B[j,k])
139,788✔
1404
                    end
202,140✔
1405
                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
43,280✔
1406
                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
44,216✔
1407
                end
53,140✔
1408
            end
3,100✔
1409
        end
1410
    end
1411
    return C
2,754✔
1412
end
1413
function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
×
1414
    B = parent(xB)
×
1415
    require_one_based_indexing(C, A, B)
×
1416
    m, n = size(A)
×
1417
    if size(B, 1) != n
×
1418
        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
×
1419
    end
1420
    if size(C) != size(A)
×
1421
        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
×
1422
    end
1423
    oB = oneunit(eltype(B))
×
1424
    unit = isunitc == 'U'
×
1425
    if uploc == 'U'
×
1426
        @inbounds for i in 1:m
×
1427
            for j in 1:n
×
1428
                Aij = A[i,j]
×
1429
                for k in 1:j - 1
×
1430
                    Aij -= C[i,k]*conj(B[k,j])
×
1431
                end
×
1432
                unit || (iszero(B[j,j]) && throw(SingularException(j)))
×
1433
                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
×
1434
            end
×
1435
        end
×
1436
    else # uploc == 'L'
1437
        @inbounds for i in 1:m
×
1438
            for j in n:-1:1
×
1439
                Aij = A[i,j]
×
1440
                for k in j + 1:n
×
1441
                    Aij -= C[i,k]*conj(B[k,j])
×
1442
                end
×
1443
                unit || (iszero(B[j,j]) && throw(SingularException(j)))
×
1444
                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
×
1445
            end
×
1446
        end
×
1447
    end
1448
    return C
×
1449
end
1450

1451
# these are needed because we don't keep track of left- and right-multiplication in tritrimul!
1452
rmul!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rmul!(triu!(A.data), B))
12✔
1453
rmul!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rmul!(triu!(A.data), B))
12✔
1454
rmul!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rmul!(tril!(A.data), B))
12✔
1455
rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(A.data), B))
12✔
1456

1457
# Promotion
1458
## Promotion methods in matmul don't apply to triangular multiplication since
1459
## it is inplace. Hence we have to make very similar definitions, but without
1460
## allocation of a result array. For multiplication and unit diagonal division
1461
## the element type doesn't have to be stable under division whereas that is
1462
## necessary in the general triangular solve problem.
1463

1464
_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
404✔
1465
    _init_eltype(*, TA, TB)
1466
_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
6,356✔
1467
    _init_eltype(op, TA, TB)
1468
## The general promotion methods
1469
function *(A::AbstractTriangular, B::AbstractTriangular)
3,643✔
1470
    TAB = _init_eltype(*, eltype(A), eltype(B))
3,643✔
1471
    mul!(similar(B, TAB, size(B)), A, B)
3,643✔
1472
end
1473

1474
for mat in (:AbstractVector, :AbstractMatrix)
1475
    ### Multiplication with triangle to the left and hence rhs cannot be transposed.
1476
    @eval function *(A::AbstractTriangular, B::$mat)
12,645✔
1477
        require_one_based_indexing(B)
12,645✔
1478
        TAB = _init_eltype(*, eltype(A), eltype(B))
12,645✔
1479
        mul!(similar(B, TAB, size(B)), A, B)
12,860✔
1480
    end
1481
    ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
1482
    @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
4,223✔
1483
        require_one_based_indexing(B)
4,223✔
1484
        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
4,223✔
1485
        ldiv!(similar(B, TAB, size(B)), A, B)
4,239✔
1486
    end
1487
    ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
1488
    @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
10,638✔
1489
        require_one_based_indexing(B)
10,638✔
1490
        TAB = _init_eltype(\, eltype(A), eltype(B))
10,639✔
1491
        ldiv!(similar(B, TAB, size(B)), A, B)
11,325✔
1492
    end
1493
    ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
1494
    @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
2,537✔
1495
        require_one_based_indexing(A)
2,537✔
1496
        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
2,537✔
1497
        _rdiv!(similar(A, TAB, size(A)), A, B)
2,553✔
1498
    end
1499
    ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
1500
    @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
2,601✔
1501
        require_one_based_indexing(A)
2,601✔
1502
        TAB = _init_eltype(/, eltype(A), eltype(B))
2,601✔
1503
        _rdiv!(similar(A, TAB, size(A)), A, B)
2,617✔
1504
    end
1505
end
1506
### Multiplication with triangle to the right and hence lhs cannot be transposed.
1507
# Only for AbstractMatrix, hence outside the above loop.
1508
function *(A::AbstractMatrix, B::AbstractTriangular)
4,781✔
1509
    require_one_based_indexing(A)
4,781✔
1510
    TAB = _init_eltype(*, eltype(A), eltype(B))
4,781✔
1511
    mul!(similar(A, TAB, size(A)), A, B)
5,089✔
1512
end
1513
# ambiguity resolution with definitions in matmul.jl
1514
*(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
404✔
1515
*(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
344✔
1516

1517
## Some Triangular-Triangular cases. We might want to write tailored methods
1518
## for these cases, but I'm not sure it is worth it.
1519
for f in (:*, :\)
1520
    @eval begin
1521
        ($f)(A::LowerTriangular, B::LowerTriangular) =
739✔
1522
            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
1523
        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
672✔
1524
            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
1525
        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
627✔
1526
            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
1527
        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
616✔
1528
            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
1529
        ($f)(A::UpperTriangular, B::UpperTriangular) =
2,531✔
1530
            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
1531
        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
672✔
1532
            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
1533
        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
627✔
1534
            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
1535
        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
621✔
1536
            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
1537
    end
1538
end
1539
(/)(A::LowerTriangular, B::LowerTriangular) =
123✔
1540
    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
1541
(/)(A::LowerTriangular, B::UnitLowerTriangular) =
117✔
1542
    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
1543
(/)(A::UnitLowerTriangular, B::LowerTriangular) =
98✔
1544
    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
1545
(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
106✔
1546
    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
1547
(/)(A::UpperTriangular, B::UpperTriangular) =
167✔
1548
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
1549
(/)(A::UpperTriangular, B::UnitUpperTriangular) =
117✔
1550
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
1551
(/)(A::UnitUpperTriangular, B::UpperTriangular) =
98✔
1552
    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
1553
(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
106✔
1554
    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
1555

1556
# Complex matrix power for upper triangular factor, see:
1557
#   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
1558
#     SIAM J. Matrix Anal. & Appl., 32 (3), (2011) 1056–1078.
1559
#   Higham and Lin, "An improved Schur-Padé algorithm for fractional powers of
1560
#     a matrix and their Fréchet derivatives", SIAM. J. Matrix Anal. & Appl.,
1561
#     34(3), (2013) 1341–1360.
1562
function powm!(A0::UpperTriangular{<:BlasFloat}, p::Real)
60✔
1563
    if abs(p) >= 1
60✔
1564
        throw(ArgumentError("p must be a real number in (-1,1), got $p"))
2✔
1565
    end
1566

1567
    normA0 = opnorm(A0, 1)
58✔
1568
    rmul!(A0, 1/normA0)
58✔
1569

1570
    theta = [1.53e-5, 2.25e-3, 1.92e-2, 6.08e-2, 1.25e-1, 2.03e-1, 2.84e-1]
406✔
1571
    n = checksquare(A0)
58✔
1572

1573
    A, m, s = invsquaring(A0, theta)
58✔
1574
    A = I - A
58✔
1575

1576
    # Compute accurate diagonal of I - T
1577
    sqrt_diag!(A0, A, s)
58✔
1578
    for i = 1:n
116✔
1579
        A[i, i] = -A[i, i]
202✔
1580
    end
346✔
1581
    # Compute the Padé approximant
1582
    c = 0.5 * (p - m) / (2 * m - 1)
58✔
1583
    triu!(A)
58✔
1584
    S = c * A
58✔
1585
    Stmp = similar(S)
58✔
1586
    for j = m-1:-1:1
116✔
1587
        j4 = 4 * j
248✔
1588
        c = (-p - j) / (j4 + 2)
248✔
1589
        for i = 1:n
496✔
1590
            @inbounds S[i, i] = S[i, i] + 1
884✔
1591
        end
1,520✔
1592
        copyto!(Stmp, S)
248✔
1593
        mul!(S, A, c)
248✔
1594
        ldiv!(Stmp, S.data)
248✔
1595

1596
        c = (p - j) / (j4 - 2)
248✔
1597
        for i = 1:n
496✔
1598
            @inbounds S[i, i] = S[i, i] + 1
884✔
1599
        end
1,520✔
1600
        copyto!(Stmp, S)
248✔
1601
        mul!(S, A, c)
248✔
1602
        ldiv!(Stmp, S.data)
248✔
1603
    end
438✔
1604
    for i = 1:n
116✔
1605
        S[i, i] = S[i, i] + 1
202✔
1606
    end
346✔
1607
    copyto!(Stmp, S)
58✔
1608
    mul!(S, A, -p)
58✔
1609
    ldiv!(Stmp, S.data)
58✔
1610
    for i = 1:n
116✔
1611
        @inbounds S[i, i] = S[i, i] + 1
202✔
1612
    end
346✔
1613

1614
    blockpower!(A0, S, p/(2^s))
58✔
1615
    for m = 1:s
116✔
1616
        mul!(Stmp.data, S, S)
234✔
1617
        copyto!(S, Stmp)
234✔
1618
        blockpower!(A0, S, p/(2^(s-m)))
234✔
1619
    end
410✔
1620
    rmul!(S, normA0^p)
58✔
1621
    return S
58✔
1622
end
1623
powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::Real)))
2✔
1624

1625
# Complex matrix logarithm for the upper triangular factor, see:
1626
#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
1627
#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
1628
#   Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
1629
#     logarithm and estimating the condition number", SIAM J. Sci. Comput.,
1630
#     35(4), (2013), C394–C410.
1631
#
1632
# Based on the code available at http://eprints.ma.man.ac.uk/1851/02/logm.zip,
1633
# Copyright (c) 2011, Awad H. Al-Mohy and Nicholas J. Higham
1634
# Julia version relicensed with permission from original authors
1635
log(A::UpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
257✔
1636
log(A::UnitUpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
26✔
1637
log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
4✔
1638
log(A::UnitLowerTriangular) = copy(transpose(log(copy(transpose(A)))))
4✔
1639

1640
function log_quasitriu(A0::AbstractMatrix{T}) where T<:BlasFloat
296✔
1641
    # allocate real A if log(A) will be real and complex A otherwise
1642
    n = checksquare(A0)
296✔
1643
    if isreal(A0) && (!istriu(A0) || !any(x -> real(x) < zero(real(T)), diag(A0)))
865✔
1644
        A = T <: Complex ? real(A0) : copy(A0)
91✔
1645
    else
1646
        A = T <: Complex ? copy(A0) : complex(A0)
205✔
1647
    end
1648
    if A0 isa UnitUpperTriangular
296✔
1649
        A = UpperTriangular(parent(A))
26✔
1650
        @inbounds for i in 1:n
52✔
1651
            A[i,i] = 1
234✔
1652
        end
442✔
1653
    end
1654
    Y0 = _log_quasitriu!(A0, A)
387✔
1655
    # return complex result for complex input
1656
    Y = T <: Complex ? complex(Y0) : Y0
322✔
1657

1658
    if A0 isa UpperTriangular || A0 isa UnitUpperTriangular
296✔
1659
        return UpperTriangular(Y)
283✔
1660
    else
1661
        return Y
13✔
1662
    end
1663
end
1664
# type-stable implementation of log_quasitriu
1665
# A is a copy of A0 that is overwritten while computing the result. It has the same eltype
1666
# as the result.
1667
function _log_quasitriu!(A0, A)
296✔
1668
    # Find Padé degree m and s while replacing A with A^(1/2^s)
1669
    m, s = _find_params_log_quasitriu!(A)
296✔
1670

1671
    # Compute accurate superdiagonal of A
1672
    _pow_superdiag_quasitriu!(A, A0, 0.5^s)
585✔
1673

1674
    # Compute accurate block diagonal of A
1675
    _sqrt_pow_diag_quasitriu!(A, A0, s)
296✔
1676

1677
    # Get the Gauss-Legendre quadrature points and weights
1678
    R = zeros(Float64, m, m)
10,040✔
1679
    for i = 1:m - 1
583✔
1680
        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
1,396✔
1681
        R[i+1,i] = R[i,i+1]
1,396✔
1682
    end
2,505✔
1683
    x,V = eigen(R)
592✔
1684
    w = Vector{Float64}(undef, m)
296✔
1685
    for i = 1:m
592✔
1686
        x[i] = (x[i] + 1) / 2
3,384✔
1687
        w[i] = V[1,i]^2
1,692✔
1688
    end
3,088✔
1689

1690
    # Compute the Padé approximation
1691
    t = eltype(A)
296✔
1692
    n = size(A, 1)
296✔
1693
    Y = zeros(t, n, n)
10,036✔
1694
    B = similar(A)
296✔
1695
    for k = 1:m
592✔
1696
        B .= t(x[k]) .* A
1,753✔
1697
        @inbounds for i in 1:n
3,384✔
1698
            B[i,i] += 1
8,243✔
1699
        end
14,794✔
1700
        Y .+= t(w[k]) .* rdiv_quasitriu!(A, B)
3,384✔
1701
    end
3,088✔
1702

1703
    # Scale back
1704
    lmul!(2.0^s, Y)
585✔
1705

1706
    # Compute accurate diagonal and superdiagonal of log(A)
1707
    _log_diag_quasitriu!(Y, A0)
296✔
1708

1709
    return Y
296✔
1710
end
1711

1712
# Auxiliary functions for matrix logarithm and matrix power
1713

1714
# Find Padé degree m and s while replacing A with A^(1/2^s)
1715
#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
1716
#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
1717
#   from Algorithm 4.1
1718
function _find_params_log_quasitriu!(A)
296✔
1719
    maxsqrt = 100
296✔
1720
    theta = [1.586970738772063e-005,
2,072✔
1721
         2.313807884242979e-003,
1722
         1.938179313533253e-002,
1723
         6.209171588994762e-002,
1724
         1.276404810806775e-001,
1725
         2.060962623452836e-001,
1726
         2.879093714241194e-001]
1727
    tmax = size(theta, 1)
296✔
1728
    n = size(A, 1)
296✔
1729
    p = 0
296✔
1730
    m = 0
296✔
1731

1732
    # Find s0, the smallest s such that the ρ(triu(A)^(1/2^s) - I) ≤ theta[tmax], where ρ(X)
1733
    # is the spectral radius of X
1734
    d = complex.(@view(A[diagind(A)]))
296✔
1735
    dm1 = d .- 1
592✔
1736
    s = 0
296✔
1737
    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
1,387✔
1738
        d .= sqrt.(d)
1,091✔
1739
        dm1 .= d .- 1
2,182✔
1740
        s = s + 1
1,091✔
1741
    end
1,091✔
1742
    s0 = s
296✔
1743

1744
    # Compute repeated roots
1745
    for k = 1:min(s, maxsqrt)
558✔
1746
        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
1,091✔
1747
    end
1,920✔
1748

1749
    # these three never needed at the same time, so reuse the same temporary
1750
    AmI = AmI4 = AmI5 = A - I
296✔
1751
    AmI2 = AmI * AmI
296✔
1752
    AmI3 = AmI2 * AmI
296✔
1753
    d2 = sqrt(opnorm(AmI2, 1))
296✔
1754
    d3 = cbrt(opnorm(AmI3, 1))
296✔
1755
    alpha2 = max(d2, d3)
296✔
1756
    foundm = false
296✔
1757
    if alpha2 <= theta[2]
296✔
1758
        m = alpha2 <= theta[1] ? 1 : 2
9✔
1759
        foundm = true
9✔
1760
    end
1761

1762
    while !foundm
640✔
1763
        more_sqrt = false
631✔
1764
        mul!(AmI4, AmI2, AmI2)
631✔
1765
        d4 = opnorm(AmI4, 1)^(1/4)
631✔
1766
        alpha3 = max(d3, d4)
631✔
1767
        if alpha3 <= theta[tmax]
631✔
1768
            local j
×
1769
            for outer j = 3:tmax
732✔
1770
                if alpha3 <= theta[j]
1,506✔
1771
                    break
366✔
1772
                end
1773
            end
1,140✔
1774
            if j <= 6
366✔
1775
                m = j
224✔
1776
                break
224✔
1777
            elseif alpha3 / 2 <= theta[5] && p < 2
142✔
1778
                more_sqrt = true
102✔
1779
                p = p + 1
102✔
1780
           end
1781
        end
1782

1783
        if !more_sqrt
407✔
1784
            mul!(AmI5, AmI3, AmI2)
305✔
1785
            d5 = opnorm(AmI5, 1)^(1/5)
305✔
1786
            alpha4 = max(d4, d5)
305✔
1787
            eta = min(alpha3, alpha4)
305✔
1788
            if eta <= theta[tmax]
305✔
1789
                j = 0
62✔
1790
                for outer j = 6:tmax
124✔
1791
                    if eta <= theta[j]
122✔
1792
                        m = j
62✔
1793
                        break
62✔
1794
                    end
1795
                end
60✔
1796
                break
62✔
1797
            end
1798
        end
1799

1800
        if s == maxsqrt
345✔
1801
            m = tmax
1✔
1802
            break
1✔
1803
        end
1804
        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
344✔
1805
        copyto!(AmI, A)
344✔
1806
        for i in 1:n
688✔
1807
            @inbounds AmI[i,i] -= 1
1,766✔
1808
        end
3,188✔
1809
        mul!(AmI2, AmI, AmI)
344✔
1810
        mul!(AmI3, AmI2, AmI)
344✔
1811
        d3 = cbrt(opnorm(AmI3, 1))
344✔
1812
        s = s + 1
344✔
1813
    end
344✔
1814
    return m, s
296✔
1815
end
1816

1817
# Compute accurate diagonal of A = A0^s - I
1818
function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
58✔
1819
    n = checksquare(A0)
58✔
1820
    T = eltype(A)
58✔
1821
    @inbounds for i = 1:n
116✔
1822
        a = complex(A0[i,i])
202✔
1823
        A[i,i] = _sqrt_pow(a, s)
202✔
1824
    end
202✔
1825
end
1826
# Compute accurate block diagonal of A = A0^s - I for upper quasi-triangular A0 produced
1827
# by the Schur decomposition. Diagonal is made of 1x1 and 2x2 blocks.
1828
# 2x2 blocks are real with non-negative conjugate pair eigenvalues
1829
function _sqrt_pow_diag_quasitriu!(A, A0, s)
296✔
1830
    n = checksquare(A0)
296✔
1831
    t = typeof(sqrt(zero(eltype(A))))
296✔
1832
    i = 1
296✔
1833
    @inbounds while i < n
296✔
1834
        if iszero(A0[i+1,i])  # 1x1 block
2,199✔
1835
            A[i,i] = _sqrt_pow(t(A0[i,i]), s)
1,095✔
1836
            i += 1
1,095✔
1837
        else  # real 2x2 block
1838
            @views _sqrt_pow_diag_block_2x2!(A[i:i+1,i:i+1], A0[i:i+1,i:i+1], s)
17✔
1839
            i += 2
17✔
1840
        end
1841
    end
1,112✔
1842
    if i == n  # last block is 1x1
296✔
1843
        @inbounds A[n,n] = _sqrt_pow(t(A0[n,n]), s)
289✔
1844
    end
1845
    return A
296✔
1846
end
1847
# compute a^(1/2^s)-1
1848
#   Al-Mohy, "A more accurate Briggs method for the logarithm",
1849
#      Numer. Algorithms, 59, (2012), 393–402.
1850
#   Algorithm 2
1851
function _sqrt_pow(a::Number, s)
1,586✔
1852
    T = typeof(sqrt(zero(a)))
1,586✔
1853
    s == 0 && return T(a) - 1
1,586✔
1854
    s0 = s
1,559✔
1855
    if imag(a) >= 0 && real(a) <= 0 && !iszero(a)  # angle(a) ≥ π / 2
1,559✔
1856
        a = sqrt(a)
139✔
1857
        s0 = s - 1
139✔
1858
    end
1859
    z0 = a - 1
1,559✔
1860
    a = sqrt(a)
1,559✔
1861
    r = 1 + a
1,559✔
1862
    for j = 1:s0-1
3,102✔
1863
        a = sqrt(a)
4,426✔
1864
        r = r * (1 + a)
4,426✔
1865
    end
7,309✔
1866
    return z0 / r
1,559✔
1867
end
1868
# compute A0 = A^(1/2^s)-I for 2x2 real matrices A and A0
1869
# A has non-negative conjugate pair eigenvalues
1870
# "Improved Inverse Scaling and Squaring Algorithms for the Matrix Logarithm"
1871
# SIAM J. Sci. Comput., 34(4), (2012) C153–C169. doi: 10.1137/110852553
1872
# Algorithm 5.1
1873
Base.@propagate_inbounds function _sqrt_pow_diag_block_2x2!(A, A0, s)
17✔
1874
    _sqrt_real_2x2!(A, A0)
17✔
1875
    if isone(s)
17✔
1876
        A[1,1] -= 1
×
1877
        A[2,2] -= 1
×
1878
    else
1879
        # Z = A - I
1880
        z11, z21, z12, z22 = A[1,1] - 1, A[2,1], A[1,2], A[2,2] - 1
17✔
1881
        # A = sqrt(A)
1882
        _sqrt_real_2x2!(A, A)
17✔
1883
        # P = A + I
1884
        p11, p21, p12, p22 = A[1,1] + 1, A[2,1], A[1,2], A[2,2] + 1
17✔
1885
        for i in 1:(s - 2)
34✔
1886
            # A = sqrt(A)
1887
            _sqrt_real_2x2!(A, A)
423✔
1888
            a11, a21, a12, a22 = A[1,1], A[2,1], A[1,2], A[2,2]
423✔
1889
            # P += P * A
1890
            r11 = p11*(1 + a11) + p12*a21
423✔
1891
            r22 = p21*a12 + p22*(1 + a22)
423✔
1892
            p21 = p21*(1 + a11) + p22*a21
423✔
1893
            p12 = p11*a12 + p12*(1 + a22)
423✔
1894
            p11 = r11
423✔
1895
            p22 = r22
423✔
1896
        end
829✔
1897
        # A = Z / P
1898
        c = inv(p11*p22 - p21*p12)
17✔
1899
        A[1,1] = (p22*z11 - p21*z12) * c
17✔
1900
        A[2,1] = (p22*z21 - p21*z22) * c
17✔
1901
        A[1,2] = (p11*z12 - p12*z11) * c
17✔
1902
        A[2,2] = (p11*z22 - p12*z21) * c
17✔
1903
    end
1904
    return A
17✔
1905
end
1906
# Compute accurate superdiagonal of A = A0^s - I for upper quasi-triangular A0 produced
1907
# by a Schur decomposition.
1908
# Higham and Lin, "A Schur–Padé Algorithm for Fractional Powers of a Matrix"
1909
# SIAM J. Matrix Anal. Appl., 32(3), (2011), 1056–1078.
1910
# Equation 5.6
1911
# see also blockpower for when A0 is upper triangular
1912
function _pow_superdiag_quasitriu!(A, A0, p)
296✔
1913
    n = checksquare(A0)
296✔
1914
    t = eltype(A)
296✔
1915
    k = 1
296✔
1916
    @inbounds while k < n
296✔
1917
        if !iszero(A[k+1,k])
2,192✔
1918
            k += 2
10✔
1919
            continue
10✔
1920
        end
1921
        if !(k == n - 1 || iszero(A[k+2,k+1]))
1,910✔
1922
            k += 3
7✔
1923
            continue
7✔
1924
        end
1925
        Ak = t(A0[k,k])
1,248✔
1926
        Akp1 = t(A0[k+1,k+1])
1,248✔
1927

1928
        Akp = Ak^p
1,088✔
1929
        Akp1p = Akp1^p
1,088✔
1930

1931
        if Ak == Akp1
1,088✔
1932
            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
285✔
1933
        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
1,592✔
1934
            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
423✔
1935
        else
1936
            logAk = log(Ak)
380✔
1937
            logAkp1 = log(Akp1)
380✔
1938
            z = (Akp1 - Ak)/(Akp1 + Ak)
380✔
1939
            if abs(z) > 1
493✔
1940
                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
73✔
1941
            else
1942
                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
403✔
1943
                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
518✔
1944
                A[k,k+1] = A0[k,k+1] * dd
307✔
1945
            end
1946
        end
1947
        k += 1
1,088✔
1948
    end
1,401✔
1949
end
1950

1951
# Compute accurate block diagonal and superdiagonal of A = log(A0) for upper
1952
# quasi-triangular A0 produced by the Schur decomposition.
1953
function _log_diag_quasitriu!(A, A0)
296✔
1954
    n = checksquare(A0)
296✔
1955
    t = eltype(A)
296✔
1956
    k = 1
296✔
1957
    @inbounds while k < n
296✔
1958
        if iszero(A0[k+1,k])  # 1x1 block
1,512✔
1959
            Ak = t(A0[k,k])
851✔
1960
            logAk = log(Ak)
751✔
1961
            A[k,k] = logAk
751✔
1962
            if k < n - 2 && iszero(A0[k+2,k+1])
1,094✔
1963
                Akp1 = t(A0[k+1,k+1])
404✔
1964
                logAkp1 = log(Akp1)
344✔
1965
                A[k+1,k+1] = logAkp1
344✔
1966
                if Ak == Akp1
344✔
1967
                    A[k,k+1] = A0[k,k+1] / Ak
103✔
1968
                elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
486✔
1969
                    A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
121✔
1970
                else
1971
                    z = (Akp1 - Ak)/(Akp1 + Ak)
120✔
1972
                    if abs(z) > 1
152✔
1973
                        A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
32✔
1974
                    else
1975
                        w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
128✔
1976
                        A[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
88✔
1977
                    end
1978
                end
1979
                k += 2
344✔
1980
            else
1981
                k += 1
1,158✔
1982
            end
1983
        else  # real 2x2 block
1984
            @views _log_diag_block_2x2!(A[k:k+1,k:k+1], A0[k:k+1,k:k+1])
17✔
1985
            k += 2
17✔
1986
        end
1987
    end
768✔
1988
    if k == n  # last 1x1 block
296✔
1989
        @inbounds A[n,n] = log(t(A0[n,n]))
309✔
1990
    end
1991
    return A
296✔
1992
end
1993
# compute A0 = log(A) for 2x2 real matrices A and A0, where A0 is a diagonal 2x2 block
1994
# produced by real Schur decomposition.
1995
# Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
1996
# logarithm and estimating the condition number", SIAM J. Sci. Comput.,
1997
# 35(4), (2013), C394–C410.
1998
# Eq. 6.1
1999
Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
17✔
2000
    a, b, c = A0[1,1], A0[1,2], A0[2,1]
17✔
2001
    # avoid underflow/overflow for large/small b and c
2002
    s = sqrt(abs(b)) * sqrt(abs(c))
17✔
2003
    θ = atan(s, a)
17✔
2004
    t = θ / s
17✔
2005
    au = abs(a)
17✔
2006
    if au > s
17✔
2007
        a1 = log1p((s / au)^2) / 2 + log(au)
7✔
2008
    else
2009
        a1 = log1p((au / s)^2) / 2 + log(s)
10✔
2010
    end
2011
    A[1,1] = a1
17✔
2012
    A[2,1] = c*t
17✔
2013
    A[1,2] = b*t
17✔
2014
    A[2,2] = a1
17✔
2015
    return A
17✔
2016
end
2017

2018
# Used only by powm at the moment
2019
# Repeatedly compute the square roots of A so that in the end its
2020
# eigenvalues are close enough to the positive real line
2021
function invsquaring(A0::UpperTriangular, theta)
58✔
2022
    require_one_based_indexing(theta)
58✔
2023
    # assumes theta is in ascending order
2024
    maxsqrt = 100
58✔
2025
    tmax = size(theta, 1)
58✔
2026
    n = checksquare(A0)
58✔
2027
    A = complex(copy(A0))
58✔
2028
    p = 0
58✔
2029
    m = 0
58✔
2030

2031
    # Compute repeated roots
2032
    d = complex(diag(A))
58✔
2033
    dm1 = d .- 1
116✔
2034
    s = 0
58✔
2035
    while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
206✔
2036
        d .= sqrt.(d)
148✔
2037
        dm1 .= d .- 1
296✔
2038
        s = s + 1
148✔
2039
    end
148✔
2040
    s0 = s
58✔
2041
    for k = 1:min(s, maxsqrt)
102✔
2042
        A = sqrt(A)
148✔
2043
    end
252✔
2044

2045
    AmI = A - I
58✔
2046
    d2 = sqrt(opnorm(AmI^2, 1))
58✔
2047
    d3 = cbrt(opnorm(AmI^3, 1))
58✔
2048
    alpha2 = max(d2, d3)
58✔
2049
    foundm = false
58✔
2050
    if alpha2 <= theta[2]
58✔
2051
        m = alpha2 <= theta[1] ? 1 : 2
×
2052
        foundm = true
×
2053
    end
2054

2055
    while !foundm
186✔
2056
        more = false
186✔
2057
        if s > s0
186✔
2058
            d3 = cbrt(opnorm(AmI^3, 1))
114✔
2059
        end
2060
        d4 = opnorm(AmI^4, 1)^(1/4)
186✔
2061
        alpha3 = max(d3, d4)
186✔
2062
        if alpha3 <= theta[tmax]
186✔
2063
            local j
×
2064
            for outer j = 3:tmax
284✔
2065
                if alpha3 <= theta[j]
610✔
2066
                    break
142✔
2067
                elseif alpha3 / 2 <= theta[5] && p < 2
468✔
2068
                    more = true
116✔
2069
                    p = p + 1
116✔
2070
                end
2071
            end
468✔
2072
            if j <= 6
142✔
2073
                m = j
58✔
2074
                foundm = true
58✔
2075
                break
58✔
2076
            elseif alpha3 / 2 <= theta[5] && p < 2
84✔
2077
                more = true
×
2078
                p = p + 1
×
2079
           end
2080
        end
2081

2082
        if !more
128✔
2083
            d5 = opnorm(AmI^5, 1)^(1/5)
86✔
2084
            alpha4 = max(d4, d5)
86✔
2085
            eta = min(alpha3, alpha4)
86✔
2086
            if eta <= theta[tmax]
86✔
2087
                j = 0
56✔
2088
                for outer j = 6:tmax
112✔
2089
                    if eta <= theta[j]
56✔
2090
                        m = j
14✔
2091
                        break
14✔
2092
                    end
2093
                    break
42✔
2094
                end
×
2095
            end
2096
            if s == maxsqrt
86✔
2097
                m = tmax
×
2098
                break
×
2099
            end
2100
            A = sqrt(A)
86✔
2101
            AmI = A - I
86✔
2102
            s = s + 1
86✔
2103
        end
2104
    end
128✔
2105

2106
    # Compute accurate superdiagonal of T
2107
    p = 1 / 2^s
58✔
2108
    A = complex(A)
58✔
2109
    blockpower!(A, A0, p)
58✔
2110
    return A,m,s
58✔
2111
end
2112

2113
# Compute accurate diagonal and superdiagonal of A = A0^p
2114
function blockpower!(A::UpperTriangular, A0::UpperTriangular, p)
350✔
2115
    n = checksquare(A0)
350✔
2116
    @inbounds for k = 1:n-1
700✔
2117
        Ak = complex(A0[k,k])
924✔
2118
        Akp1 = complex(A0[k+1,k+1])
924✔
2119

2120
        Akp = Ak^p
924✔
2121
        Akp1p = Akp1^p
924✔
2122

2123
        A[k,k] = Akp
924✔
2124
        A[k+1,k+1] = Akp1p
924✔
2125

2126
        if Ak == Akp1
924✔
2127
            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
98✔
2128
        elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
1,610✔
2129
            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
98✔
2130
        else
2131
            logAk = log(Ak)
728✔
2132
            logAkp1 = log(Akp1)
728✔
2133
            z = (Akp1 - Ak)/(Akp1 + Ak)
728✔
2134
            if abs(z) > 1
728✔
2135
                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
×
2136
            else
2137
                w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
728✔
2138
                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
1,456✔
2139
                A[k,k+1] = A0[k,k+1] * dd
728✔
2140
            end
2141
        end
2142
    end
924✔
2143
end
2144

2145
# Unwinding number
2146
unw(x::Real) = 0
272✔
2147
unw(x::Number) = ceil((imag(x) - pi) / (2 * pi))
1,974✔
2148

2149
# compute A / B for upper quasi-triangular B, possibly overwriting B
2150
function rdiv_quasitriu!(A, B)
1,692✔
2151
    n = checksquare(A)
1,692✔
2152
    AG = copy(A)
1,692✔
2153
    # use Givens rotations to annihilate 2x2 blocks
2154
    @inbounds for k in 1:(n-1)
3,369✔
2155
        s = B[k+1,k]
12,907✔
2156
        iszero(s) && continue  # 1x1 block
6,551✔
2157
        G = first(givens(B[k+1,k+1], s, k, k+1))
85✔
2158
        rmul!(B, G)
400✔
2159
        rmul!(AG, G)
85✔
2160
    end
11,425✔
2161
    return rdiv!(AG, UpperTriangular(B))
1,692✔
2162
end
2163

2164
# End of auxiliary functions for matrix logarithm and matrix power
2165

2166
sqrt(A::UpperTriangular) = sqrt_quasitriu(A)
582✔
2167
function sqrt(A::UnitUpperTriangular{T}) where T
20✔
2168
    B = A.data
20✔
2169
    n = checksquare(B)
20✔
2170
    t = typeof(sqrt(zero(T)))
20✔
2171
    R = Matrix{t}(I, n, n)
20✔
2172
    tt = typeof(oneunit(t)*oneunit(t))
20✔
2173
    half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
22✔
2174
    @inbounds for j = 1:n
40✔
2175
        for i = j-1:-1:1
264✔
2176
            r::tt = B[i,j]
518✔
2177
            @simd for k = i+1:j-1
640✔
2178
                r -= R[i,k]*R[k,j]
1,180✔
2179
            end
×
2180
            iszero(r) || (R[i,j] = half*r)
1,036✔
2181
        end
914✔
2182
    end
264✔
2183
    return UnitUpperTriangular(R)
20✔
2184
end
2185
sqrt(A::LowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
8✔
2186
sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
8✔
2187

2188
# Auxiliary functions for matrix square root
2189

2190
# square root of upper triangular or real upper quasitriangular matrix
2191
function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
1,340✔
2192
    n = checksquare(A0)
670✔
2193
    T = eltype(A0)
670✔
2194
    Tr = typeof(sqrt(real(zero(T))))
670✔
2195
    Tc = typeof(sqrt(complex(zero(T))))
670✔
2196
    if isreal(A0)
2,121✔
2197
        is_sqrt_real = true
284✔
2198
        if istriu(A0)
284✔
2199
            for i in 1:n
400✔
2200
                Aii = real(A0[i,i])
594✔
2201
                if Aii < zero(Aii)
594✔
2202
                    is_sqrt_real = false
40✔
2203
                    break
40✔
2204
                end
2205
            end
554✔
2206
        end
2207
        if is_sqrt_real
284✔
2208
            R = zeros(Tr, n, n)
27,010✔
2209
            A = real(A0)
244✔
2210
        else
2211
            R = zeros(Tc, n, n)
347✔
2212
            A = A0
324✔
2213
        end
2214
    else
2215
        A = A0
386✔
2216
        R = zeros(Tc, n, n)
386✔
2217
    end
2218
    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
911✔
2219
    Rc = eltype(A0) <: Real ? R : complex(R)
765✔
2220
    if A0 isa UpperTriangular
670✔
2221
        return UpperTriangular(Rc)
582✔
2222
    elseif A0 isa UnitUpperTriangular
88✔
2223
        return UnitUpperTriangular(Rc)
×
2224
    else
2225
        return Rc
88✔
2226
    end
2227
end
2228

2229
# in-place recursive sqrt of upper quasi-triangular matrix A from
2230
# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
2231
# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
2232
# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
2233
function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
4,335✔
2234
    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
2,199✔
2235
        _sqrt_quasitriu_block!(R, A)
2,136✔
2236
    else  # compute blockwise recursion
2237
        split = div(n, 2)
31✔
2238
        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
39✔
2239
        r1 = 1:split
31✔
2240
        r2 = (split + 1):n
31✔
2241
        n1, n2 = split, n - split
31✔
2242
        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
31✔
2243
        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
31✔
2244
        # solve diagonal blocks recursively
2245
        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
31✔
2246
        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
31✔
2247
        # solve off-diagonal block
2248
        R12 .= .- A12
62✔
2249
        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
31✔
2250
    end
2251
    return R
2,167✔
2252
end
2253

2254
function _sqrt_quasitriu_block!(R, A)
2,136✔
2255
    _sqrt_quasitriu_diag_block!(R, A)
2,136✔
2256
    _sqrt_quasitriu_offdiag_block!(R, A)
2,136✔
2257
    return R
2,136✔
2258
end
2259

2260
function _sqrt_quasitriu_diag_block!(R, A)
2,136✔
2261
    n = size(R, 1)
2,136✔
2262
    ta = eltype(R) <: Complex ? complex(eltype(A)) : eltype(A)
2,136✔
2263
    i = 1
2,136✔
2264
    @inbounds while i < n
2,136✔
2265
        if iszero(A[i + 1, i])
13,843✔
2266
            R[i, i] = sqrt(ta(A[i, i]))
6,988✔
2267
            i += 1
6,988✔
2268
        else
2269
            # this branch is never reached when A is complex triangular
2270
            @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
736✔
2271
            i += 2
736✔
2272
        end
2273
    end
7,724✔
2274
    if i == n
2,136✔
2275
        R[n, n] = sqrt(ta(A[n, n]))
1,698✔
2276
    end
2277
    return R
2,136✔
2278
end
2279

2280
function _sqrt_quasitriu_offdiag_block!(R, A)
2,136✔
2281
    n = size(R, 1)
2,136✔
2282
    j = 1
2,136✔
2283
    @inbounds while j ≤ n
2,136✔
2284
        jsize_is_2 = j < n && !iszero(A[j + 1, j])
15,541✔
2285
        i = j - 1
9,422✔
2286
        while i > 0
76,013✔
2287
            isize_is_2 = i > 1 && !iszero(A[i, i - 1])
83,842✔
2288
            if isize_is_2
66,591✔
2289
                if jsize_is_2
831✔
2290
                    _sqrt_quasitriu_offdiag_block_2x2!(R, A, i - 1, j)
414✔
2291
                else
2292
                    _sqrt_quasitriu_offdiag_block_2x1!(R, A, i - 1, j)
417✔
2293
                end
2294
                i -= 2
831✔
2295
            else
2296
                if jsize_is_2
65,760✔
2297
                    _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
250✔
2298
                else
2299
                    _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
65,512✔
2300
                end
2301
                i -= 1
65,760✔
2302
            end
2303
        end
66,591✔
2304
        j += 2 - !jsize_is_2
9,422✔
2305
    end
9,422✔
2306
    return R
2,136✔
2307
end
2308

2309
# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
2310
# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
2311
# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
2312
Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
1,193✔
2313
    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
2314
    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
1,193✔
2315
    # avoid overflow/underflow of μ
2316
    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
2317
    μ = sqrt(abs(a12)) * sqrt(abs(a21))
1,193✔
2318
    α = _real_sqrt(θ, μ)
1,307✔
2319
    c = 2α
1,193✔
2320
    R[1, 1] = α
1,193✔
2321
    R[2, 1] = a21 / c
1,193✔
2322
    R[1, 2] = a12 / c
1,193✔
2323
    R[2, 2] = α
1,193✔
2324
    return R
1,193✔
2325
end
2326

2327
# real part of square root of θ+im*μ
2328
@inline function _real_sqrt(θ, μ)
1,193✔
2329
    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
1,278✔
2330
    return θ ≥ 0 ? t : μ / 2t
1,307✔
2331
end
2332

2333
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
65,510✔
2334
    Rii = R[i, i]
65,510✔
2335
    Rjj = R[j, j]
65,510✔
2336
    iszero(Rii) && iszero(Rjj) && return R
65,510✔
2337
    t = eltype(R)
65,509✔
2338
    tt = typeof(zero(t)*zero(t))
65,509✔
2339
    r = tt(-A[i, j])
65,509✔
2340
    @simd for k in (i + 1):(j - 1)
72,386✔
2341
        r += R[i, k] * R[k, j]
2,991,358✔
2342
    end
×
2343
    iszero(r) && return R
65,509✔
2344
    R[i, j] = sylvester(Rii, Rjj, r)
56,945✔
2345
    return R
56,945✔
2346
end
2347

2348
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
250✔
2349
    jrange = j:(j + 1)
250✔
2350
    t = eltype(R)
250✔
2351
    tt = typeof(zero(t)*zero(t))
250✔
2352
    r1 = tt(-A[i, j])
250✔
2353
    r2 = tt(-A[i, j + 1])
250✔
2354
    @simd for k in (i + 1):(j - 1)
360✔
2355
        rik = R[i, k]
476✔
2356
        r1 += rik * R[k, j]
476✔
2357
        r2 += rik * R[k, j + 1]
476✔
2358
    end
×
2359
    Rjj = @view R[jrange, jrange]
250✔
2360
    Rij = @view R[i, jrange]
250✔
2361
    Rij[1] = r1
250✔
2362
    Rij[2] = r2
250✔
2363
    _sylvester_1x2!(R[i, i], Rjj, Rij)
250✔
2364
    return R
250✔
2365
end
2366

2367
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x1!(R, A, i, j)
417✔
2368
    irange = i:(i + 1)
417✔
2369
    t = eltype(R)
417✔
2370
    tt = typeof(zero(t)*zero(t))
417✔
2371
    r1 = tt(-A[i, j])
417✔
2372
    r2 = tt(-A[i + 1, j])
417✔
2373
    @simd for k in (i + 2):(j - 1)
546✔
2374
        rkj = R[k, j]
1,003✔
2375
        r1 += R[i, k] * rkj
1,003✔
2376
        r2 += R[i + 1, k] * rkj
1,003✔
2377
    end
×
2378
    Rii = @view R[irange, irange]
417✔
2379
    Rij = @view R[irange, j]
417✔
2380
    Rij[1] = r1
417✔
2381
    Rij[2] = r2
417✔
2382
    @views _sylvester_2x1!(Rii, R[j, j], Rij)
417✔
2383
    return R
417✔
2384
end
2385

2386
Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
414✔
2387
    irange = i:(i + 1)
414✔
2388
    jrange = j:(j + 1)
414✔
2389
    t = eltype(R)
414✔
2390
    tt = typeof(zero(t)*zero(t))
414✔
2391
    for i′ in irange, j′ in jrange
1,242✔
2392
        Cij = tt(-A[i′, j′])
1,656✔
2393
        @simd for k in (i + 2):(j - 1)
2,332✔
2394
            Cij += R[i′, k] * R[k, j′]
3,464✔
2395
        end
×
2396
        R[i′, j′] = Cij
1,656✔
2397
    end
2,070✔
2398
    Rii = @view R[irange, irange]
414✔
2399
    Rjj = @view R[jrange, jrange]
414✔
2400
    Rij = @view R[irange, jrange]
414✔
2401
    if !iszero(Rij) && !all(isnan, Rij)
414✔
2402
        _sylvester_2x2!(Rii, Rjj, Rij)
411✔
2403
    end
2404
    return R
414✔
2405
end
2406

2407
# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
2408
# A and B are no greater than blockwidth, based on Algorithm 1 from
2409
# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
2410
# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
2411
# 28(4), https://doi.org/10.1145/592843.592845.
2412
# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
2413
# computing one of the blocks.
2414
function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
1,154✔
2415
    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
539✔
2416
        _sylvester_quasitriu_base!(A, B, C; raise=raise)
398✔
2417
    elseif nA ≥ 2nB ≥ 2
141✔
2418
        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
12✔
2419
    elseif nB ≥ 2nA ≥ 2
129✔
2420
        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
12✔
2421
    else
2422
        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
117✔
2423
    end
2424
    return C
499✔
2425
end
2426
function _sylvester_quasitriu_base!(A, B, C; raise=true)
796✔
2427
    try
398✔
2428
        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
398✔
2429
        rmul!(C, -inv(scale))
398✔
2430
    catch e
2431
        if !(e isa LAPACKException) || raise
296✔
2432
            throw(e)
148✔
2433
        end
2434
    end
2435
    return C
382✔
2436
end
2437
function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
24✔
2438
    iA = div(nA, 2)
12✔
2439
    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
12✔
2440
    rA1, rA2 = 1:iA, (iA + 1):nA
12✔
2441
    nA1, nA2 = iA, nA-iA
12✔
2442
    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
12✔
2443
    C1, C2 = @views C[rA1,:], C[rA2,:]
12✔
2444
    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
24✔
2445
    mul!(C1, A12, C2, true, true)
8✔
2446
    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
16✔
2447
    return C
8✔
2448
end
2449
function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
24✔
2450
    iB = div(nB, 2)
12✔
2451
    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
12✔
2452
    rB1, rB2 = 1:iB, (iB + 1):nB
12✔
2453
    nB1, nB2 = iB, nB-iB
12✔
2454
    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
12✔
2455
    C1, C2 = @views C[:,rB1], C[:,rB2]
12✔
2456
    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
24✔
2457
    mul!(C2, C1, B12, true, true)
8✔
2458
    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
16✔
2459
    return C
8✔
2460
end
2461
function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
234✔
2462
    iA = div(nA, 2)
117✔
2463
    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
128✔
2464
    iB = div(nB, 2)
117✔
2465
    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
136✔
2466
    rA1, rA2 = 1:iA, (iA + 1):nA
117✔
2467
    nA1, nA2 = iA, nA-iA
117✔
2468
    rB1, rB2 = 1:iB, (iB + 1):nB
117✔
2469
    nB1, nB2 = iB, nB-iB
117✔
2470
    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
117✔
2471
    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
117✔
2472
    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
117✔
2473
    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
129✔
2474
    mul!(C11, A12, C21, true, true)
101✔
2475
    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
109✔
2476
    mul!(C22, C21, B12, true, true)
101✔
2477
    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
109✔
2478
    mul!(C12, A12, C22, true, true)
101✔
2479
    mul!(C12, C11, B12, true, true)
101✔
2480
    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
109✔
2481
    return C
101✔
2482
end
2483

2484
# End of auxiliary functions for matrix square root
2485

2486
# Generic eigensystems
2487
eigvals(A::AbstractTriangular) = diag(A)
100✔
2488
function eigvecs(A::AbstractTriangular{T}) where T
4✔
2489
    TT = promote_type(T, Float32)
4✔
2490
    if TT <: BlasFloat
4✔
2491
        return eigvecs(convert(AbstractMatrix{TT}, A))
4✔
2492
    else
2493
        throw(ArgumentError("eigvecs type $(typeof(A)) not supported. Please submit a pull request."))
×
2494
    end
2495
end
2496
det(A::UnitUpperTriangular{T}) where {T} = one(T)
7✔
2497
det(A::UnitLowerTriangular{T}) where {T} = one(T)
7✔
2498
logdet(A::UnitUpperTriangular{T}) where {T} = zero(T)
7✔
2499
logdet(A::UnitLowerTriangular{T}) where {T} = zero(T)
7✔
2500
logabsdet(A::UnitUpperTriangular{T}) where {T} = zero(T), one(T)
7✔
2501
logabsdet(A::UnitLowerTriangular{T}) where {T} = zero(T), one(T)
7✔
2502
det(A::UpperTriangular) = prod(diag(A.data))
259✔
2503
det(A::LowerTriangular) = prod(diag(A.data))
7✔
2504
function logabsdet(A::Union{UpperTriangular{T},LowerTriangular{T}}) where T
28✔
2505
    sgn = one(T)
28✔
2506
    abs_det = zero(real(T))
28✔
2507
    @inbounds for i in 1:size(A,1)
56✔
2508
        diag_i = A.data[i,i]
252✔
2509
        sgn *= sign(diag_i)
324✔
2510
        abs_det += log(abs(diag_i))
292✔
2511
    end
476✔
2512
    return abs_det, sgn
28✔
2513
end
2514

2515
eigen(A::AbstractTriangular) = Eigen(eigvals(A), eigvecs(A))
20✔
2516

2517
# Generic singular systems
2518
for func in (:svd, :svd!, :svdvals)
2519
    @eval begin
2520
        ($func)(A::AbstractTriangular; kwargs...) = ($func)(copyto!(similar(parent(A)), A); kwargs...)
112✔
2521
    end
2522
end
2523

2524
factorize(A::AbstractTriangular) = A
28✔
2525

2526
# disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
2527
/(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
×
2528
/(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
×
2529
# disambiguation methods: /(Transpose of AbsVec, <:AbstractTriangular)
2530
/(u::TransposeAbsVec, A::Union{LowerTriangular,UpperTriangular}) = transpose(transpose(A) \ u.parent)
×
2531
/(u::TransposeAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = transpose(transpose(A) \ u.parent)
×
2532
# disambiguation methods: /(Transpose of AbsVec, Adj/Trans of <:AbstractTriangular)
2533
for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
2534
                               (:UnitLowerTriangular, :UnitUpperTriangular),
2535
                               (:UpperTriangular, :LowerTriangular),
2536
                               (:UnitUpperTriangular, :UnitLowerTriangular))
2537
    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
×
2538
    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
×
2539
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc