• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9763114573

02 Jul 2024 02:43PM UTC coverage: 96.083% (+1.4%) from 94.697%
9763114573

Pull #326

github

web-flow
Merge 7a70dfb88 into c5ed5d0b9
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

352 of 357 new or added lines in 19 files covered. (98.6%)

60 existing lines in 12 files now uncovered.

2625 of 2732 relevant lines covered (96.08%)

66999448.2 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.97
/src/HallOfFame.jl
1
module HallOfFameModule
2

3
using DynamicExpressions:
4
    AbstractExpression, parse_expression, Node, constructorof, string_tree
5
using DynamicExpressions: with_type_parameters
6
using ..UtilsModule: split_string
7
using ..CoreModule:
8
    MAX_DEGREE, Options, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
9
using ..ComplexityModule: compute_complexity
10
using ..PopMemberModule: PopMember
11
using ..LossFunctionsModule: eval_loss
12
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
13
using Printf: @sprintf
14

15
"""
16
    HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
17

18
List of the best members seen all time in `.members`, with `.members[c]` being
19
the best member seen at complexity c. Including only the members which actually
20
have been set, you can run `.members[exists]`.
21

22
# Fields
23

24
- `members::Array{PopMember{T,L},1}`: List of the best members seen all time.
25
    These are ordered by complexity, with `.members[1]` the member with complexity 1.
26
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
27
"""
28
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
29
    members::Array{PopMember{T,L,N},1}
41,552✔
30
    exists::Array{Bool,1} #Whether it has been set
31
end
32
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
6✔
33
    println(io, "HallOfFame{...}:")
6✔
34
    for i in eachindex(hof.members, hof.exists)
8✔
35
        s_member, s_exists = if hof.exists[i]
54✔
36
            sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
12✔
37
        else
38
            "undef", "false"
102✔
39
        end
40
        println(io, " "^4 * ".exists[$i] = $s_exists")
54✔
41
        print(io, " "^4 * ".members[$i] =")
54✔
42
        splitted = split(strip(s_member), '\n')
54✔
43
        if length(splitted) == 1
54✔
44
            println(io, " " * s_member)
54✔
45
        else
NEW
46
            println(io)
×
47
            foreach(line -> println(io, " "^8 * line), splitted)
17✔
48
        end
49
    end
85✔
50
    return nothing
6✔
51
end
52

53
"""
54
    HallOfFame(options::Options, dataset::Dataset{T,L}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
55

56
Create empty HallOfFame. The HallOfFame stores a list
57
of `PopMember` objects in `.members`, which is enumerated
58
by size (i.e., `.members[1]` is the constant solution).
59
`.exists` is used to determine whether the particular member
60
has been instantiated or not.
61

62
Arguments:
63
- `options`: Options containing specification about deterministic.
64
- `dataset`: Dataset containing the input data.
65
"""
66
function HallOfFame(
32,411✔
67
    options::Options, dataset::Dataset{T,L}
68
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
69
    actualMaxsize = options.maxsize + MAX_DEGREE
40,229✔
70
    base_tree = create_expression(zero(T), options, dataset)
40,230✔
71

72
    return HallOfFame{T,L,typeof(base_tree)}(
40,227✔
73
        [
74
            PopMember(
75
                copy(base_tree),
76
                L(0),
77
                L(Inf),
78
                options;
79
                parent=-1,
80
                deterministic=options.deterministic,
81
            ) for i in 1:actualMaxsize
82
        ],
83
        [false for i in 1:actualMaxsize],
84
    )
85
end
86

87
function Base.copy(hof::HallOfFame)
150✔
88
    return HallOfFame(
174✔
89
        [copy(member) for member in hof.members], [exists for exists in hof.exists]
90
    )
91
end
92

93
"""
94
    calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
95
"""
96
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
45,113✔
97
    # TODO - remove dataset from args.
98
    P = PopMember{T,L,N}
39,166✔
99
    # Dominating pareto curve - must be better than all simpler equations
100
    dominating = P[]
45,113✔
101
    actualMaxsize = length(hallOfFame.members)
45,113✔
102
    for size in 1:actualMaxsize
61,945✔
103
        if !hallOfFame.exists[size]
981,946✔
104
            continue
218,151✔
105
        end
106
        member = hallOfFame.members[size]
763,795✔
107
        # We check if this member is better than all members which are smaller than it and
108
        # also exist.
109
        betterThanAllSmaller = true
671,820✔
110
        for i in 1:(size - 1)
1,036,185✔
111
            if !hallOfFame.exists[i]
5,920,774✔
112
                continue
499,597✔
113
            end
114
            simpler_member = hallOfFame.members[i]
5,421,177✔
115
            if member.loss >= simpler_member.loss
5,421,177✔
116
                betterThanAllSmaller = false
316,531✔
117
                break
2,948,687✔
118
            end
119
        end
8,172,480✔
120
        if betterThanAllSmaller
763,795✔
121
            push!(dominating, copy(member))
809,057✔
122
        end
123
    end
1,504,491✔
124
    return dominating
45,113✔
125
end
126

127
function string_dominating_pareto_curve(
28,797✔
128
    hallOfFame, dataset, options; width::Union{Integer,Nothing}=nothing
129
)
130
    twidth = (width === nothing) ? 100 : max(100, width::Integer)
19,980✔
131
    output = ""
16,259✔
132
    output *= "Hall of Fame:\n"
16,259✔
133
    # TODO: Get user's terminal width.
134
    output *= "-"^(twidth - 1) * "\n"
16,259✔
135
    output *= @sprintf(
16,259✔
136
        "%-10s  %-8s   %-8s  %-8s\n", "Complexity", "Loss", "Score", "Equation"
137
    )
138

139
    formatted = format_hall_of_fame(hallOfFame, options)
24,234✔
140
    for (tree, score, loss, complexity) in
32,301✔
141
        zip(formatted.trees, formatted.scores, formatted.losses, formatted.complexities)
142
        eqn_string = string_tree(
224,641✔
143
            tree,
144
            options;
145
            display_variable_names=dataset.display_variable_names,
146
            X_sym_units=dataset.X_sym_units,
147
            y_sym_units=dataset.y_sym_units,
148
            raw=false,
149
        )
150
        y_prefix = dataset.y_variable_name
149,962✔
151
        unit_str = format_dimensions(dataset.y_sym_units)
149,962✔
152
        y_prefix *= unit_str
149,962✔
153
        if dataset.y_sym_units === nothing && dataset.X_sym_units !== nothing
149,962✔
UNCOV
154
            y_prefix *= WILDCARD_UNIT_STRING
×
155
        end
156
        eqn_string = y_prefix * " = " * eqn_string
167,468✔
157
        base_string_length = length(@sprintf("%-10d  %-8.3e  %8.3e  ", 1, 1.0, 1.0))
149,962✔
158

159
        dots = "..."
149,962✔
160
        equation_width = (twidth - 1) - base_string_length - length(dots)
149,962✔
161

162
        output *= @sprintf("%-10d  %-8.3e  %-8.3e  ", complexity, loss, score)
149,962✔
163

164
        split_eqn = split_string(eqn_string, equation_width)
224,641✔
165
        print_pad = false
149,962✔
166
        while length(split_eqn) > 1
183,894✔
167
            cur_piece = popfirst!(split_eqn)
33,932✔
168
            output *= " "^(print_pad * base_string_length) * cur_piece * dots * "\n"
38,210✔
169
            print_pad = true
33,932✔
170
        end
25,682✔
171
        output *= " "^(print_pad * base_string_length) * split_eqn[1] * "\n"
203,493✔
172
    end
208,115✔
173
    output *= "-"^(twidth - 1)
16,259✔
174
    return output
16,259✔
175
end
176

177
function format_hall_of_fame(hof::HallOfFame{T,L}, options) where {T,L}
16,737✔
178
    dominating = calculate_pareto_frontier(hof)
16,737✔
179
    foreach(dominating) do member
19,214✔
180
        if member.loss < 0.0
152,173✔
UNCOV
181
            throw(
×
182
                DomainError(
183
                    member.loss,
184
                    "Your loss function must be non-negative. To do this, consider wrapping your loss inside an exponential, which will not affect the search (unless you are using annealing).",
185
                ),
186
            )
187
        end
188
    end
189

190
    ZERO_POINT = eps(L)
16,737✔
191
    cur_loss = typemax(L)
16,737✔
192
    last_loss = cur_loss
16,737✔
193
    last_complexity = 0
16,737✔
194

195
    trees = [member.tree for member in dominating]
20,516✔
196
    losses = [member.loss for member in dominating]
20,516✔
197
    complexities = [compute_complexity(member, options) for member in dominating]
16,737✔
198
    scores = Array{L}(undef, length(dominating))
18,905✔
199

200
    for i in 1:length(dominating)
22,993✔
201
        complexity = complexities[i]
152,173✔
202
        cur_loss = losses[i]
152,173✔
203
        delta_c = complexity - last_complexity
152,173✔
204
        delta_l_mse = log(relu(cur_loss / last_loss) + ZERO_POINT)
152,173✔
205

206
        scores[i] = relu(-delta_l_mse / delta_c)
152,173✔
207
        last_loss = cur_loss
152,173✔
208
        last_complexity = complexity
188,472✔
209
    end
211,545✔
210
    return (; trees, scores, losses, complexities)
16,737✔
211
end
212
function format_hall_of_fame(hof::AbstractVector{<:HallOfFame}, options)
131✔
213
    outs = [format_hall_of_fame(h, options) for h in hof]
131✔
214
    return (;
131✔
215
        trees=[out.trees for out in outs],
216
        scores=[out.scores for out in outs],
217
        losses=[out.losses for out in outs],
218
        complexities=[out.complexities for out in outs],
219
    )
220
end
221
# TODO: Re-use this in `string_dominating_pareto_curve`
222

223
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc