• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9639805727

24 Jun 2024 05:00AM UTC coverage: 94.475% (-0.1%) from 94.617%
9639805727

Pull #326

github

web-flow
Merge 3ba1556f8 into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

239 of 250 new or added lines in 15 files covered. (95.6%)

4 existing lines in 3 files now uncovered.

2548 of 2697 relevant lines covered (94.48%)

46539295.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/src/HallOfFame.jl
1
module HallOfFameModule
2

3
using DynamicExpressions:
4
    AbstractExpression, parse_expression, Node, constructorof, string_tree
5
using DynamicExpressions: with_type_parameters
6
using ..UtilsModule: split_string
7
using ..CoreModule:
8
    MAX_DEGREE, Options, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
9
using ..ComplexityModule: compute_complexity
10
using ..PopMemberModule: PopMember
11
using ..LossFunctionsModule: eval_loss
12
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
13
using Printf: @sprintf
14

15
"""
16
    HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
17

18
List of the best members seen all time in `.members`, with `.members[c]` being
19
the best member seen at complexity c. Including only the members which actually
20
have been set, you can run `.members[exists]`.
21

22
# Fields
23

24
- `members::Array{PopMember{T,L},1}`: List of the best members seen all time.
25
    These are ordered by complexity, with `.members[1]` the member with complexity 1.
26
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
27
"""
28
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
29
    members::Array{PopMember{T,L,N},1}
42,591✔
30
    exists::Array{Bool,1} #Whether it has been set
31
end
32
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
6✔
33
    println(io, "HallOfFame{...}:")
6✔
34
    for i in eachindex(hof.members, hof.exists)
8✔
35
        s_member, s_exists = if hof.exists[i]
54✔
36
            sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
12✔
37
        else
38
            "undef", "false"
102✔
39
        end
40
        println(io, " "^4 * ".exists[$i] = $s_exists")
54✔
41
        print(io, " "^4 * ".members[$i] =")
54✔
42
        splitted = split(strip(s_member), '\n')
54✔
43
        if length(splitted) == 1
54✔
44
            println(io, " " * s_member)
54✔
45
        else
NEW
46
            println(io)
×
47
            foreach(line -> println(io, " "^8 * line), splitted)
17✔
48
        end
49
    end
85✔
50
    return nothing
6✔
51
end
52

53
"""
54
    HallOfFame(options::Options, dataset::Dataset{T,L}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
55

56
Create empty HallOfFame. The HallOfFame stores a list
57
of `PopMember` objects in `.members`, which is enumerated
58
by size (i.e., `.members[1]` is the constant solution).
59
`.exists` is used to determine whether the particular member
60
has been instantiated or not.
61

62
Arguments:
63
- `options`: Options containing specification about deterministic.
64
- `dataset`: Dataset containing the input data.
65
"""
66
function HallOfFame(
34,190✔
67
    options::Options, dataset::Dataset{T,L}
68
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
69
    actualMaxsize = options.maxsize + MAX_DEGREE
41,290✔
70
    base_tree = create_expression(zero(T), options, dataset)
41,292✔
71

72
    return HallOfFame{T,L,typeof(base_tree)}(
41,279✔
73
        [
74
            PopMember(
75
                copy(base_tree),
76
                L(0),
77
                L(Inf),
78
                options;
79
                parent=-1,
80
                deterministic=options.deterministic,
81
            ) for i in 1:actualMaxsize
82
        ],
83
        [false for i in 1:actualMaxsize],
84
    )
85
end
86

87
function Base.copy(hof::HallOfFame)
144✔
88
    return HallOfFame(
168✔
89
        [copy(member) for member in hof.members], [exists for exists in hof.exists]
90
    )
91
end
92

93
"""
94
    calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
95
"""
96
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
49,156✔
97
    # TODO - remove dataset from args.
98
    P = PopMember{T,L,N}
42,912✔
99
    # Dominating pareto curve - must be better than all simpler equations
100
    dominating = P[]
49,156✔
101
    actualMaxsize = length(hallOfFame.members)
49,156✔
102
    for size in 1:actualMaxsize
64,167✔
103
        if !hallOfFame.exists[size]
1,071,152✔
104
            continue
272,598✔
105
        end
106
        member = hallOfFame.members[size]
798,554✔
107
        # We check if this member is better than all members which are smaller than it and
108
        # also exist.
109
        betterThanAllSmaller = true
701,421✔
110
        for i in 1:(size - 1)
1,026,602✔
111
            if !hallOfFame.exists[i]
5,702,252✔
112
                continue
681,594✔
113
            end
114
            simpler_member = hallOfFame.members[i]
5,020,658✔
115
            if member.loss >= simpler_member.loss
5,020,658✔
116
                betterThanAllSmaller = false
366,822✔
117
                break
2,090,723✔
118
            end
119
        end
8,562,271✔
120
        if betterThanAllSmaller
798,554✔
121
            push!(dominating, copy(member))
703,131✔
122
        end
123
    end
1,769,582✔
124
    return dominating
49,156✔
125
end
126

127
function string_dominating_pareto_curve(
34,578✔
128
    hallOfFame, dataset, options; width::Union{Integer,Nothing}=nothing
129
)
130
    twidth = (width === nothing) ? 100 : max(100, width::Integer)
20,880✔
131
    output = ""
18,486✔
132
    output *= "Hall of Fame:\n"
18,486✔
133
    # TODO: Get user's terminal width.
134
    output *= "-"^(twidth - 1) * "\n"
18,486✔
135
    output *= @sprintf(
18,486✔
136
        "%-10s  %-8s   %-8s  %-8s\n", "Complexity", "Loss", "Score", "Equation"
137
    )
138

139
    formatted = format_hall_of_fame(hallOfFame, options)
28,458✔
140
    for (tree, score, loss, complexity) in
36,762✔
141
        zip(formatted.trees, formatted.scores, formatted.losses, formatted.complexities)
142
        eqn_string = string_tree(
212,767✔
143
            tree,
144
            options;
145
            display_variable_names=dataset.display_variable_names,
146
            X_sym_units=dataset.X_sym_units,
147
            y_sym_units=dataset.y_sym_units,
148
            raw=false,
149
        )
150
        y_prefix = dataset.y_variable_name
139,387✔
151
        unit_str = format_dimensions(dataset.y_sym_units)
139,387✔
152
        y_prefix *= unit_str
139,387✔
153
        if dataset.y_sym_units === nothing && dataset.X_sym_units !== nothing
139,387✔
154
            y_prefix *= WILDCARD_UNIT_STRING
×
155
        end
156
        eqn_string = y_prefix * " = " * eqn_string
164,898✔
157
        base_string_length = length(@sprintf("%-10d  %-8.3e  %8.3e  ", 1, 1.0, 1.0))
139,387✔
158

159
        dots = "..."
139,387✔
160
        equation_width = (twidth - 1) - base_string_length - length(dots)
139,387✔
161

162
        output *= @sprintf("%-10d  %-8.3e  %-8.3e  ", complexity, loss, score)
139,387✔
163

164
        split_eqn = split_string(eqn_string, equation_width)
212,767✔
165
        print_pad = false
139,387✔
166
        while length(split_eqn) > 1
170,957✔
167
            cur_piece = popfirst!(split_eqn)
31,570✔
168
            output *= " "^(print_pad * base_string_length) * cur_piece * dots * "\n"
35,574✔
169
            print_pad = true
31,570✔
170
        end
25,326✔
171
        output *= " "^(print_pad * base_string_length) * split_eqn[1] * "\n"
183,849✔
172
    end
220,211✔
173
    output *= "-"^(twidth - 1)
18,486✔
174
    return output
18,486✔
175
end
176

177
function format_hall_of_fame(hof::HallOfFame{T,L}, options) where {T,L}
18,940✔
178
    dominating = calculate_pareto_frontier(hof)
18,940✔
179
    foreach(dominating) do member
21,270✔
180
        if member.loss < 0.0
141,681✔
181
            throw(
×
182
                DomainError(
183
                    member.loss,
184
                    "Your loss function must be non-negative. To do this, consider wrapping your loss inside an exponential, which will not affect the search (unless you are using annealing).",
185
                ),
186
            )
187
        end
188
    end
189

190
    ZERO_POINT = eps(L)
18,940✔
191
    cur_loss = typemax(L)
18,940✔
192
    last_loss = cur_loss
18,940✔
193
    last_complexity = 0
18,940✔
194

195
    trees = [member.tree for member in dominating]
21,383✔
196
    losses = [member.loss for member in dominating]
21,383✔
197
    complexities = [compute_complexity(member, options) for member in dominating]
18,940✔
198
    scores = Array{L}(undef, length(dominating))
22,791✔
199

200
    for i in 1:length(dominating)
23,713✔
201
        complexity = complexities[i]
141,681✔
202
        cur_loss = losses[i]
141,681✔
203
        delta_c = complexity - last_complexity
141,681✔
204
        delta_l_mse = log(relu(cur_loss / last_loss) + ZERO_POINT)
141,681✔
205

206
        scores[i] = relu(-delta_l_mse / delta_c)
141,681✔
207
        last_loss = cur_loss
141,681✔
208
        last_complexity = complexity
160,970✔
209
    end
223,707✔
210
    return (; trees, scores, losses, complexities)
18,940✔
211
end
212
function format_hall_of_fame(hof::AbstractVector{<:HallOfFame}, options)
131✔
213
    outs = [format_hall_of_fame(h, options) for h in hof]
131✔
214
    return (;
131✔
215
        trees=[out.trees for out in outs],
216
        scores=[out.scores for out in outs],
217
        losses=[out.losses for out in outs],
218
        complexities=[out.complexities for out in outs],
219
    )
220
end
221
# TODO: Re-use this in `string_dominating_pareto_curve`
222

223
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc