• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9704727222

27 Jun 2024 11:01PM UTC coverage: 95.922% (+1.3%) from 94.617%
9704727222

Pull #326

github

web-flow
Merge 1f104aaf8 into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

301 of 307 new or added lines in 17 files covered. (98.05%)

1 existing line in 1 file now uncovered.

2611 of 2722 relevant lines covered (95.92%)

35611300.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/src/HallOfFame.jl
1
module HallOfFameModule
2

3
using DynamicExpressions:
4
    AbstractExpression, parse_expression, Node, constructorof, string_tree
5
using DynamicExpressions: with_type_parameters
6
using ..UtilsModule: split_string
7
using ..CoreModule:
8
    MAX_DEGREE, Options, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
9
using ..ComplexityModule: compute_complexity
10
using ..PopMemberModule: PopMember
11
using ..LossFunctionsModule: eval_loss
12
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
13
using Printf: @sprintf
14

15
"""
16
    HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
17

18
List of the best members seen all time in `.members`, with `.members[c]` being
19
the best member seen at complexity c. Including only the members which actually
20
have been set, you can run `.members[exists]`.
21

22
# Fields
23

24
- `members::Array{PopMember{T,L},1}`: List of the best members seen all time.
25
    These are ordered by complexity, with `.members[1]` the member with complexity 1.
26
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
27
"""
28
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
29
    members::Array{PopMember{T,L,N},1}
41,184✔
30
    exists::Array{Bool,1} #Whether it has been set
31
end
32
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
6✔
33
    println(io, "HallOfFame{...}:")
6✔
34
    for i in eachindex(hof.members, hof.exists)
8✔
35
        s_member, s_exists = if hof.exists[i]
54✔
36
            sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
12✔
37
        else
38
            "undef", "false"
102✔
39
        end
40
        println(io, " "^4 * ".exists[$i] = $s_exists")
54✔
41
        print(io, " "^4 * ".members[$i] =")
54✔
42
        splitted = split(strip(s_member), '\n')
54✔
43
        if length(splitted) == 1
54✔
44
            println(io, " " * s_member)
54✔
45
        else
NEW
46
            println(io)
×
47
            foreach(line -> println(io, " "^8 * line), splitted)
17✔
48
        end
49
    end
85✔
50
    return nothing
6✔
51
end
52

53
"""
54
    HallOfFame(options::Options, dataset::Dataset{T,L}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
55

56
Create empty HallOfFame. The HallOfFame stores a list
57
of `PopMember` objects in `.members`, which is enumerated
58
by size (i.e., `.members[1]` is the constant solution).
59
`.exists` is used to determine whether the particular member
60
has been instantiated or not.
61

62
Arguments:
63
- `options`: Options containing specification about deterministic.
64
- `dataset`: Dataset containing the input data.
65
"""
66
function HallOfFame(
33,064✔
67
    options::Options, dataset::Dataset{T,L}
68
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
69
    actualMaxsize = options.maxsize + MAX_DEGREE
39,855✔
70
    base_tree = create_expression(zero(T), options, dataset)
39,855✔
71

72
    return HallOfFame{T,L,typeof(base_tree)}(
39,848✔
73
        [
74
            PopMember(
75
                copy(base_tree),
76
                L(0),
77
                L(Inf),
78
                options;
79
                parent=-1,
80
                deterministic=options.deterministic,
81
            ) for i in 1:actualMaxsize
82
        ],
83
        [false for i in 1:actualMaxsize],
84
    )
85
end
86

87
function Base.copy(hof::HallOfFame)
150✔
88
    return HallOfFame(
174✔
89
        [copy(member) for member in hof.members], [exists for exists in hof.exists]
90
    )
91
end
92

93
"""
94
    calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
95
"""
96
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
43,569✔
97
    # TODO - remove dataset from args.
98
    P = PopMember{T,L,N}
37,406✔
99
    # Dominating pareto curve - must be better than all simpler equations
100
    dominating = P[]
43,569✔
101
    actualMaxsize = length(hallOfFame.members)
43,569✔
102
    for size in 1:actualMaxsize
58,097✔
103
        if !hallOfFame.exists[size]
948,318✔
104
            continue
261,926✔
105
        end
106
        member = hallOfFame.members[size]
686,392✔
107
        # We check if this member is better than all members which are smaller than it and
108
        # also exist.
109
        betterThanAllSmaller = true
592,627✔
110
        for i in 1:(size - 1)
905,474✔
111
            if !hallOfFame.exists[i]
5,209,485✔
112
                continue
684,276✔
113
            end
114
            simpler_member = hallOfFame.members[i]
4,525,209✔
115
            if member.loss >= simpler_member.loss
4,525,209✔
116
                betterThanAllSmaller = false
289,689✔
117
                break
2,032,522✔
118
            end
119
        end
7,741,306✔
120
        if betterThanAllSmaller
686,392✔
121
            push!(dominating, copy(member))
664,613✔
122
        end
123
    end
1,538,082✔
124
    return dominating
43,569✔
125
end
126

127
function string_dominating_pareto_curve(
26,791✔
128
    hallOfFame, dataset, options; width::Union{Integer,Nothing}=nothing
129
)
130
    twidth = (width === nothing) ? 100 : max(100, width::Integer)
16,937✔
131
    output = ""
14,576✔
132
    output *= "Hall of Fame:\n"
14,576✔
133
    # TODO: Get user's terminal width.
134
    output *= "-"^(twidth - 1) * "\n"
14,576✔
135
    output *= @sprintf(
14,576✔
136
        "%-10s  %-8s   %-8s  %-8s\n", "Complexity", "Loss", "Score", "Equation"
137
    )
138

139
    formatted = format_hall_of_fame(hallOfFame, options)
22,330✔
140
    for (tree, score, loss, complexity) in
28,947✔
141
        zip(formatted.trees, formatted.scores, formatted.losses, formatted.complexities)
142
        eqn_string = string_tree(
182,206✔
143
            tree,
144
            options;
145
            display_variable_names=dataset.display_variable_names,
146
            X_sym_units=dataset.X_sym_units,
147
            y_sym_units=dataset.y_sym_units,
148
            raw=false,
149
        )
150
        y_prefix = dataset.y_variable_name
119,938✔
151
        unit_str = format_dimensions(dataset.y_sym_units)
119,938✔
152
        y_prefix *= unit_str
119,938✔
153
        if dataset.y_sym_units === nothing && dataset.X_sym_units !== nothing
119,938✔
154
            y_prefix *= WILDCARD_UNIT_STRING
×
155
        end
156
        eqn_string = y_prefix * " = " * eqn_string
137,106✔
157
        base_string_length = length(@sprintf("%-10d  %-8.3e  %8.3e  ", 1, 1.0, 1.0))
119,938✔
158

159
        dots = "..."
119,938✔
160
        equation_width = (twidth - 1) - base_string_length - length(dots)
119,938✔
161

162
        output *= @sprintf("%-10d  %-8.3e  %-8.3e  ", complexity, loss, score)
119,938✔
163

164
        split_eqn = split_string(eqn_string, equation_width)
182,206✔
165
        print_pad = false
119,938✔
166
        while length(split_eqn) > 1
160,742✔
167
            cur_piece = popfirst!(split_eqn)
40,804✔
168
            output *= " "^(print_pad * base_string_length) * cur_piece * dots * "\n"
44,297✔
169
            print_pad = true
40,804✔
170
        end
28,928✔
171
        output *= " "^(print_pad * base_string_length) * split_eqn[1] * "\n"
156,814✔
172
    end
183,731✔
173
    output *= "-"^(twidth - 1)
14,576✔
174
    return output
14,576✔
175
end
176

177
function format_hall_of_fame(hof::HallOfFame{T,L}, options) where {T,L}
15,054✔
178
    dominating = calculate_pareto_frontier(hof)
15,054✔
179
    foreach(dominating) do member
17,432✔
180
        if member.loss < 0.0
122,528✔
181
            throw(
×
182
                DomainError(
183
                    member.loss,
184
                    "Your loss function must be non-negative. To do this, consider wrapping your loss inside an exponential, which will not affect the search (unless you are using annealing).",
185
                ),
186
            )
187
        end
188
    end
189

190
    ZERO_POINT = eps(L)
15,054✔
191
    cur_loss = typemax(L)
15,054✔
192
    last_loss = cur_loss
15,054✔
193
    last_complexity = 0
15,054✔
194

195
    trees = [member.tree for member in dominating]
17,474✔
196
    losses = [member.loss for member in dominating]
17,474✔
197
    complexities = [compute_complexity(member, options) for member in dominating]
15,054✔
198
    scores = Array{L}(undef, length(dominating))
17,217✔
199

200
    for i in 1:length(dominating)
19,852✔
201
        complexity = complexities[i]
122,528✔
202
        cur_loss = losses[i]
122,528✔
203
        delta_c = complexity - last_complexity
122,528✔
204
        delta_l_mse = log(relu(cur_loss / last_loss) + ZERO_POINT)
122,528✔
205

206
        scores[i] = relu(-delta_l_mse / delta_c)
122,528✔
207
        last_loss = cur_loss
122,528✔
208
        last_complexity = complexity
142,625✔
209
    end
187,689✔
210
    return (; trees, scores, losses, complexities)
15,054✔
211
end
212
function format_hall_of_fame(hof::AbstractVector{<:HallOfFame}, options)
131✔
213
    outs = [format_hall_of_fame(h, options) for h in hof]
131✔
214
    return (;
131✔
215
        trees=[out.trees for out in outs],
216
        scores=[out.scores for out in outs],
217
        losses=[out.losses for out in outs],
218
        complexities=[out.complexities for out in outs],
219
    )
220
end
221
# TODO: Re-use this in `string_dominating_pareto_curve`
222

223
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc