• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9686354911

26 Jun 2024 08:31PM UTC coverage: 93.22% (-1.4%) from 94.617%
9686354911

Pull #326

github

web-flow
Merge 6f8229c9f into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

275 of 296 new or added lines in 17 files covered. (92.91%)

34 existing lines in 5 files now uncovered.

2530 of 2714 relevant lines covered (93.22%)

32081968.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/src/HallOfFame.jl
1
module HallOfFameModule
2

3
using DynamicExpressions:
4
    AbstractExpression, parse_expression, Node, constructorof, string_tree
5
using DynamicExpressions: with_type_parameters
6
using ..UtilsModule: split_string
7
using ..CoreModule:
8
    MAX_DEGREE, Options, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
9
using ..ComplexityModule: compute_complexity
10
using ..PopMemberModule: PopMember
11
using ..LossFunctionsModule: eval_loss
12
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
13
using Printf: @sprintf
14

15
"""
16
    HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
17

18
List of the best members seen all time in `.members`, with `.members[c]` being
19
the best member seen at complexity c. Including only the members which actually
20
have been set, you can run `.members[exists]`.
21

22
# Fields
23

24
- `members::Array{PopMember{T,L},1}`: List of the best members seen all time.
25
    These are ordered by complexity, with `.members[1]` the member with complexity 1.
26
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
27
"""
28
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
29
    members::Array{PopMember{T,L,N},1}
20,547✔
30
    exists::Array{Bool,1} #Whether it has been set
31
end
32
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
6✔
33
    println(io, "HallOfFame{...}:")
6✔
34
    for i in eachindex(hof.members, hof.exists)
8✔
35
        s_member, s_exists = if hof.exists[i]
54✔
36
            sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
12✔
37
        else
38
            "undef", "false"
102✔
39
        end
40
        println(io, " "^4 * ".exists[$i] = $s_exists")
54✔
41
        print(io, " "^4 * ".members[$i] =")
54✔
42
        splitted = split(strip(s_member), '\n')
54✔
43
        if length(splitted) == 1
54✔
44
            println(io, " " * s_member)
54✔
45
        else
NEW
46
            println(io)
×
47
            foreach(line -> println(io, " "^8 * line), splitted)
17✔
48
        end
49
    end
85✔
50
    return nothing
6✔
51
end
52

53
"""
54
    HallOfFame(options::Options, dataset::Dataset{T,L}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
55

56
Create empty HallOfFame. The HallOfFame stores a list
57
of `PopMember` objects in `.members`, which is enumerated
58
by size (i.e., `.members[1]` is the constant solution).
59
`.exists` is used to determine whether the particular member
60
has been instantiated or not.
61

62
Arguments:
63
- `options`: Options containing specification about deterministic.
64
- `dataset`: Dataset containing the input data.
65
"""
66
function HallOfFame(
15,948✔
67
    options::Options, dataset::Dataset{T,L}
68
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
69
    actualMaxsize = options.maxsize + MAX_DEGREE
19,441✔
70
    base_tree = create_expression(zero(T), options, dataset)
19,441✔
71

72
    return HallOfFame{T,L,typeof(base_tree)}(
19,439✔
73
        [
74
            PopMember(
75
                copy(base_tree),
76
                L(0),
77
                L(Inf),
78
                options;
79
                parent=-1,
80
                deterministic=options.deterministic,
81
            ) for i in 1:actualMaxsize
82
        ],
83
        [false for i in 1:actualMaxsize],
84
    )
85
end
86

87
function Base.copy(hof::HallOfFame)
144✔
88
    return HallOfFame(
168✔
89
        [copy(member) for member in hof.members], [exists for exists in hof.exists]
90
    )
91
end
92

93
"""
94
    calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
95
"""
96
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
27,522✔
97
    # TODO - remove dataset from args.
98
    P = PopMember{T,L,N}
24,192✔
99
    # Dominating pareto curve - must be better than all simpler equations
100
    dominating = P[]
27,522✔
101
    actualMaxsize = length(hallOfFame.members)
27,522✔
102
    for size in 1:actualMaxsize
35,793✔
103
        if !hallOfFame.exists[size]
600,024✔
104
            continue
66,479✔
105
        end
106
        member = hallOfFame.members[size]
533,545✔
107
        # We check if this member is better than all members which are smaller than it and
108
        # also exist.
109
        betterThanAllSmaller = true
468,622✔
110
        for i in 1:(size - 1)
687,066✔
111
            if !hallOfFame.exists[i]
3,443,205✔
112
                continue
34,763✔
113
            end
114
            simpler_member = hallOfFame.members[i]
3,408,442✔
115
            if member.loss >= simpler_member.loss
3,408,442✔
116
                betterThanAllSmaller = false
298,684✔
117
                break
1,375,094✔
118
            end
119
        end
5,003,848✔
120
        if betterThanAllSmaller
533,545✔
121
            push!(dominating, copy(member))
382,040✔
122
        end
123
    end
982,533✔
124
    return dominating
27,522✔
125
end
126

127
function string_dominating_pareto_curve(
26,804✔
128
    hallOfFame, dataset, options; width::Union{Integer,Nothing}=nothing
129
)
130
    twidth = (width === nothing) ? 100 : max(100, width::Integer)
16,483✔
131
    output = ""
14,429✔
132
    output *= "Hall of Fame:\n"
14,429✔
133
    # TODO: Get user's terminal width.
134
    output *= "-"^(twidth - 1) * "\n"
14,429✔
135
    output *= @sprintf(
14,429✔
136
        "%-10s  %-8s   %-8s  %-8s\n", "Complexity", "Loss", "Score", "Equation"
137
    )
138

139
    formatted = format_hall_of_fame(hallOfFame, options)
22,422✔
140
    for (tree, score, loss, complexity) in
28,715✔
141
        zip(formatted.trees, formatted.scores, formatted.losses, formatted.complexities)
142
        eqn_string = string_tree(
149,628✔
143
            tree,
144
            options;
145
            display_variable_names=dataset.display_variable_names,
146
            X_sym_units=dataset.X_sym_units,
147
            y_sym_units=dataset.y_sym_units,
148
            raw=false,
149
        )
150
        y_prefix = dataset.y_variable_name
97,354✔
151
        unit_str = format_dimensions(dataset.y_sym_units)
97,354✔
152
        y_prefix *= unit_str
97,354✔
153
        if dataset.y_sym_units === nothing && dataset.X_sym_units !== nothing
97,354✔
154
            y_prefix *= WILDCARD_UNIT_STRING
×
155
        end
156
        eqn_string = y_prefix * " = " * eqn_string
114,525✔
157
        base_string_length = length(@sprintf("%-10d  %-8.3e  %8.3e  ", 1, 1.0, 1.0))
97,354✔
158

159
        dots = "..."
97,354✔
160
        equation_width = (twidth - 1) - base_string_length - length(dots)
97,354✔
161

162
        output *= @sprintf("%-10d  %-8.3e  %-8.3e  ", complexity, loss, score)
97,354✔
163

164
        split_eqn = split_string(eqn_string, equation_width)
149,628✔
165
        print_pad = false
97,354✔
166
        while length(split_eqn) > 1
121,565✔
167
            cur_piece = popfirst!(split_eqn)
24,211✔
168
            output *= " "^(print_pad * base_string_length) * cur_piece * dots * "\n"
28,436✔
169
            print_pad = true
24,211✔
170
        end
18,431✔
171
        output *= " "^(print_pad * base_string_length) * split_eqn[1] * "\n"
127,675✔
172
    end
152,075✔
173
    output *= "-"^(twidth - 1)
14,429✔
174
    return output
14,429✔
175
end
176

177
function format_hall_of_fame(hof::HallOfFame{T,L}, options) where {T,L}
14,501✔
178
    dominating = calculate_pareto_frontier(hof)
14,501✔
179
    foreach(dominating) do member
16,230✔
180
        if member.loss < 0.0
97,797✔
181
            throw(
×
182
                DomainError(
183
                    member.loss,
184
                    "Your loss function must be non-negative. To do this, consider wrapping your loss inside an exponential, which will not affect the search (unless you are using annealing).",
185
                ),
186
            )
187
        end
188
    end
189

190
    ZERO_POINT = eps(L)
14,501✔
191
    cur_loss = typemax(L)
14,501✔
192
    last_loss = cur_loss
14,501✔
193
    last_complexity = 0
14,501✔
194

195
    trees = [member.tree for member in dominating]
16,560✔
196
    losses = [member.loss for member in dominating]
16,560✔
197
    complexities = [compute_complexity(member, options) for member in dominating]
14,501✔
198
    scores = Array{L}(undef, length(dominating))
17,135✔
199

200
    for i in 1:length(dominating)
18,289✔
201
        complexity = complexities[i]
97,797✔
202
        cur_loss = losses[i]
97,797✔
203
        delta_c = complexity - last_complexity
97,797✔
204
        delta_l_mse = log(relu(cur_loss / last_loss) + ZERO_POINT)
97,797✔
205

206
        scores[i] = relu(-delta_l_mse / delta_c)
97,797✔
207
        last_loss = cur_loss
97,797✔
208
        last_complexity = complexity
111,009✔
209
    end
152,753✔
210
    return (; trees, scores, losses, complexities)
14,501✔
211
end
212
function format_hall_of_fame(hof::AbstractVector{<:HallOfFame}, options)
18✔
213
    outs = [format_hall_of_fame(h, options) for h in hof]
18✔
214
    return (;
18✔
215
        trees=[out.trees for out in outs],
216
        scores=[out.scores for out in outs],
217
        losses=[out.losses for out in outs],
218
        complexities=[out.complexities for out in outs],
219
    )
220
end
221
# TODO: Re-use this in `string_dominating_pareto_curve`
222

223
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc