• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 11204590927

06 Oct 2024 07:29PM UTC coverage: 95.808% (+1.2%) from 94.617%
11204590927

Pull #326

github

web-flow
Merge e2b369ea7 into 8f67533b9
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

466 of 482 new or added lines in 24 files covered. (96.68%)

1 existing line in 1 file now uncovered.

2651 of 2767 relevant lines covered (95.81%)

73863189.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/src/HallOfFame.jl
1
module HallOfFameModule
2

3
using DynamicExpressions: AbstractExpression, string_tree
4
using ..UtilsModule: split_string
5
using ..CoreModule:
6
    MAX_DEGREE, Options, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
7
using ..ComplexityModule: compute_complexity
8
using ..PopMemberModule: PopMember
9
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
10
using Printf: @sprintf
11

12
"""
13
    HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
14

15
List of the best members seen all time in `.members`, with `.members[c]` being
16
the best member seen at complexity c. Including only the members which actually
17
have been set, you can run `.members[exists]`.
18

19
# Fields
20

21
- `members::Array{PopMember{T,L},1}`: List of the best members seen all time.
22
    These are ordered by complexity, with `.members[1]` the member with complexity 1.
23
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
24
"""
25
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
26
    members::Array{PopMember{T,L,N},1}
53,006✔
27
    exists::Array{Bool,1} #Whether it has been set
28
end
29
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
6✔
30
    println(io, "HallOfFame{...}:")
6✔
31
    for i in eachindex(hof.members, hof.exists)
8✔
32
        s_member, s_exists = if hof.exists[i]
54✔
33
            sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
12✔
34
        else
35
            "undef", "false"
102✔
36
        end
37
        println(io, " "^4 * ".exists[$i] = $s_exists")
54✔
38
        print(io, " "^4 * ".members[$i] =")
54✔
39
        splitted = split(strip(s_member), '\n')
54✔
40
        if length(splitted) == 1
54✔
41
            println(io, " " * s_member)
54✔
42
        else
NEW
43
            println(io)
×
44
            foreach(line -> println(io, " "^8 * line), splitted)
17✔
45
        end
46
    end
85✔
47
    return nothing
6✔
48
end
49

50
"""
51
    HallOfFame(options::Options, dataset::Dataset{T,L}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
52

53
Create empty HallOfFame. The HallOfFame stores a list
54
of `PopMember` objects in `.members`, which is enumerated
55
by size (i.e., `.members[1]` is the constant solution).
56
`.exists` is used to determine whether the particular member
57
has been instantiated or not.
58

59
Arguments:
60
- `options`: Options containing specification about deterministic.
61
- `dataset`: Dataset containing the input data.
62
"""
63
function HallOfFame(
44,810✔
64
    options::Options, dataset::Dataset{T,L}
65
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
66
    actualMaxsize = options.maxsize + MAX_DEGREE
51,671✔
67
    base_tree = create_expression(zero(T), options, dataset)
51,672✔
68

69
    return HallOfFame{T,L,typeof(base_tree)}(
51,677✔
70
        [
71
            PopMember(
72
                copy(base_tree),
73
                L(0),
74
                L(Inf),
75
                options;
76
                parent=-1,
77
                deterministic=options.deterministic,
78
            ) for i in 1:actualMaxsize
79
        ],
80
        [false for i in 1:actualMaxsize],
81
    )
82
end
83

84
function Base.copy(hof::HallOfFame)
150✔
85
    return HallOfFame(
174✔
86
        [copy(member) for member in hof.members], [exists for exists in hof.exists]
87
    )
88
end
89

90
"""
91
    calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
92
"""
93
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
67,120✔
94
    # TODO - remove dataset from args.
95
    P = PopMember{T,L,N}
62,624✔
96
    # Dominating pareto curve - must be better than all simpler equations
97
    dominating = P[]
67,120✔
98
    actualMaxsize = length(hallOfFame.members)
67,120✔
99
    for size in 1:actualMaxsize
91,450✔
100
        if !hallOfFame.exists[size]
1,462,280✔
101
            continue
262,222✔
102
        end
103
        member = hallOfFame.members[size]
1,200,058✔
104
        # We check if this member is better than all members which are smaller than it and
105
        # also exist.
106
        betterThanAllSmaller = true
1,132,213✔
107
        for i in 1:(size - 1)
1,617,269✔
108
            if !hallOfFame.exists[i]
10,354,922✔
109
                continue
505,482✔
110
            end
111
            simpler_member = hallOfFame.members[i]
9,849,440✔
112
            if member.loss >= simpler_member.loss
9,849,440✔
113
                betterThanAllSmaller = false
437,812✔
114
                break
2,262,826✔
115
            end
116
        end
17,311,801✔
117
        if betterThanAllSmaller
1,200,058✔
118
            push!(dominating, copy(member))
1,044,005✔
119
        end
120
    end
2,544,159✔
121
    return dominating
67,120✔
122
end
123

124
function string_dominating_pareto_curve(
51,154✔
125
    hallOfFame, dataset, options; width::Union{Integer,Nothing}=nothing
126
)
127
    twidth = (width === nothing) ? 100 : max(100, width::Integer)
29,228✔
128
    output = ""
26,794✔
129
    output *= "Hall of Fame:\n"
26,794✔
130
    # TODO: Get user's terminal width.
131
    output *= "-"^(twidth - 1) * "\n"
26,794✔
132
    output *= @sprintf(
26,794✔
133
        "%-10s  %-8s   %-8s  %-8s\n", "Complexity", "Loss", "Score", "Equation"
134
    )
135

136
    formatted = format_hall_of_fame(hallOfFame, options)
41,156✔
137
    for (tree, score, loss, complexity) in
53,409✔
138
        zip(formatted.trees, formatted.scores, formatted.losses, formatted.complexities)
139
        eqn_string = string_tree(
482,341✔
140
            tree,
141
            options;
142
            display_variable_names=dataset.display_variable_names,
143
            X_sym_units=dataset.X_sym_units,
144
            y_sym_units=dataset.y_sym_units,
145
            raw=false,
146
        )
147
        y_prefix = dataset.y_variable_name
316,339✔
148
        unit_str = format_dimensions(dataset.y_sym_units)
316,339✔
149
        y_prefix *= unit_str
316,339✔
150
        if dataset.y_sym_units === nothing && dataset.X_sym_units !== nothing
316,339✔
151
            y_prefix *= WILDCARD_UNIT_STRING
×
152
        end
153
        eqn_string = y_prefix * " = " * eqn_string
348,853✔
154
        base_string_length = length(@sprintf("%-10d  %-8.3e  %8.3e  ", 1, 1.0, 1.0))
316,339✔
155

156
        dots = "..."
316,339✔
157
        equation_width = (twidth - 1) - base_string_length - length(dots)
316,339✔
158

159
        output *= @sprintf("%-10d  %-8.3e  %-8.3e  ", complexity, loss, score)
316,339✔
160

161
        split_eqn = split_string(eqn_string, equation_width)
482,341✔
162
        print_pad = false
316,339✔
163
        while length(split_eqn) > 1
393,793✔
164
            cur_piece = popfirst!(split_eqn)
77,454✔
165
            output *= " "^(print_pad * base_string_length) * cur_piece * dots * "\n"
83,648✔
166
            print_pad = true
77,454✔
167
        end
69,357✔
168
        output *= " "^(print_pad * base_string_length) * split_eqn[1] * "\n"
370,383✔
169
    end
560,573✔
170
    output *= "-"^(twidth - 1)
26,794✔
171
    return output
26,794✔
172
end
173

174
function format_hall_of_fame(hof::HallOfFame{T,L}, options) where {T,L}
27,272✔
175
    dominating = calculate_pareto_frontier(hof)
27,272✔
176
    foreach(dominating) do member
34,500✔
177
        if member.loss < 0.0
318,523✔
178
            throw(
×
179
                DomainError(
180
                    member.loss,
181
                    "Your loss function must be non-negative. To do this, consider wrapping your loss inside an exponential, which will not affect the search (unless you are using annealing).",
182
                ),
183
            )
184
        end
185
    end
186

187
    ZERO_POINT = eps(L)
27,272✔
188
    cur_loss = typemax(L)
27,272✔
189
    last_loss = cur_loss
27,272✔
190
    last_complexity = 0
27,272✔
191

192
    trees = [member.tree for member in dominating]
29,764✔
193
    losses = [member.loss for member in dominating]
29,764✔
194
    complexities = [compute_complexity(member, options) for member in dominating]
27,272✔
195
    scores = Array{L}(undef, length(dominating))
30,131✔
196

197
    for i in 1:length(dominating)
36,992✔
198
        complexity = complexities[i]
318,523✔
199
        cur_loss = losses[i]
318,523✔
200
        delta_c = complexity - last_complexity
318,523✔
201
        delta_l_mse = log(relu(cur_loss / last_loss) + ZERO_POINT)
318,523✔
202

203
        scores[i] = relu(-delta_l_mse / delta_c)
318,523✔
204
        last_loss = cur_loss
318,523✔
205
        last_complexity = complexity
340,328✔
206
    end
563,946✔
207
    return (; trees, scores, losses, complexities)
27,272✔
208
end
209
function format_hall_of_fame(hof::AbstractVector{<:HallOfFame}, options)
131✔
210
    outs = [format_hall_of_fame(h, options) for h in hof]
131✔
211
    return (;
131✔
212
        trees=[out.trees for out in outs],
213
        scores=[out.scores for out in outs],
214
        losses=[out.losses for out in outs],
215
        complexities=[out.complexities for out in outs],
216
    )
217
end
218
# TODO: Re-use this in `string_dominating_pareto_curve`
219

220
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc