• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9686354911

26 Jun 2024 08:31PM UTC coverage: 93.22% (-1.4%) from 94.617%
9686354911

Pull #326

github

web-flow
Merge 6f8229c9f into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

275 of 296 new or added lines in 17 files covered. (92.91%)

34 existing lines in 5 files now uncovered.

2530 of 2714 relevant lines covered (93.22%)

32081968.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.46
/src/SingleIteration.jl
1
module SingleIterationModule
2

3
using DynamicExpressions:
4
    AbstractExpression,
5
    Node,
6
    constructorof,
7
    string_tree,
8
    simplify_tree!,
9
    combine_operators,
10
    parse_expression
11
using ..UtilsModule: @threads_if
12
using ..CoreModule: Options, Dataset, RecordType, DATA_TYPE, LOSS_TYPE, create_expression
13
using ..ComplexityModule: compute_complexity
14
using ..PopMemberModule: PopMember, generate_reference
15
using ..PopulationModule: Population, finalize_scores, best_sub_pop
16
using ..HallOfFameModule: HallOfFame
17
using ..AdaptiveParsimonyModule: RunningSearchStatistics
18
using ..RegularizedEvolutionModule: reg_evol_cycle
19
using ..LossFunctionsModule: score_func_batched, batch_sample
20
using ..ConstantOptimizationModule: optimize_constants
21
using ..RecorderModule: @recorder
22

23
# Cycle through regularized evolution many times,
24
# printing the fittest equation every 10% through
25
function s_r_cycle(
28,746✔
26
    dataset::D,
27
    pop::P,
28
    ncycles::Int,
29
    curmaxsize::Int,
30
    running_search_statistics::RunningSearchStatistics;
31
    verbosity::Int=0,
32
    options::Options,
33
    record::RecordType,
34
)::Tuple{
35
    P,HallOfFame{T,L,N},Float64
36
} where {T,L,D<:Dataset{T,L},N<:AbstractExpression{T},P<:Population{T,L,N}}
37
    max_temp = 1.0
18,356✔
38
    min_temp = 0.0
15,462✔
39
    if !options.annealing
15,816✔
40
        min_temp = max_temp
14,370✔
41
    end
42
    all_temperatures = LinRange(max_temp, min_temp, ncycles)
15,815✔
43
    best_examples_seen = HallOfFame(options, dataset)
23,607✔
44
    num_evals = 0.0
15,465✔
45

46
    # For evaluating on a fixed batch (for batching)
47
    idx = options.batching ? batch_sample(dataset, options) : Int[]
19,229✔
48
    example_tree = create_expression(zero(T), options, dataset)
15,815✔
49
    loss_cache = [(oid=example_tree, score=zero(L)) for member in pop.members]
18,708✔
50
    first_loop = true
15,465✔
51

52
    for temperature in all_temperatures
31,636✔
53
        pop, tmp_num_evals = reg_evol_cycle(
11,296,586✔
54
            dataset,
55
            pop,
56
            temperature,
57
            curmaxsize,
58
            running_search_statistics,
59
            options,
60
            record,
61
        )
62
        num_evals += tmp_num_evals
7,561,446✔
63
        for (i, member) in enumerate(pop.members)
10,166,633✔
64
            size = compute_complexity(member, options)
911,290,009✔
65
            score = if options.batching
668,731,937✔
66
                oid = member.tree
256,179,914✔
67
                if loss_cache[i].oid != oid || first_loop
612,014,938✔
68
                    # Evaluate on fixed batch so that we can more accurately
69
                    # compare expressions with a batched loss (though the batch
70
                    # changes each iteration, and we evaluate on full-batch outside,
71
                    # so this is not biased).
72
                    _score, _ = score_func_batched(
17,006,816✔
73
                        dataset, member, options; complexity=size, idx=idx
74
                    )
75
                    loss_cache[i] = (oid=copy(oid), score=_score)
24,152,993✔
76
                    _score
17,006,814✔
77
                else
78
                    # Already evaluated this particular expression, so just use
79
                    # the cached score
80
                    loss_cache[i].score
495,353,008✔
81
                end
82
            else
83
                member.score
1,081,151,567✔
84
            end
85
            # TODO: Note that this per-population hall of fame only uses the batched
86
            #       loss, and is therefore innaccurate. Therefore, some expressions
87
            #       may be loss if a very small batch size is used.
88
            # - Could have different batch size for different things (smaller for constant opt)
89
            # - Could just recompute losses here (expensive)
90
            # - Average over a few batches
91
            # - Store multiple expressions in hall of fame
92
            if 0 < size <= options.maxsize && (
1,337,148,020✔
93
                !best_examples_seen.exists[size] ||
94
                score < best_examples_seen.members[size].score
95
            )
96
                best_examples_seen.exists[size] = true
1,368,960✔
97
                best_examples_seen.members[size] = copy(member)
124,472,199✔
98
            end
99
        end
1,083,550,645✔
100
        first_loop = false
8,948,680✔
101
    end
12,324,892✔
102

103
    return (pop, best_examples_seen, num_evals)
15,815✔
104
end
105

106
function optimize_and_simplify_population(
15,815✔
107
    dataset::D, pop::P, options::Options, curmaxsize::Int, record::RecordType
108
)::Tuple{P,Float64} where {T,L,D<:Dataset{T,L},P<:Population{T,L}}
109
    array_num_evals = zeros(Float64, pop.n)
15,815✔
110
    do_optimization = rand(pop.n) .< options.optimizer_probability
22,037✔
111
    @threads_if !(options.deterministic) for j in 1:(pop.n)
16,451✔
112
        if options.should_simplify
1,903,227✔
113
            tree = pop.members[j].tree
1,778,158✔
114
            tree = simplify_tree!(tree, options.operators)
1,778,624✔
115
            if tree isa Node
1,778,324✔
UNCOV
116
                tree = combine_operators(tree, options.operators)
×
117
            end
118
            pop.members[j].tree = tree
1,778,237✔
119
        end
120
        if options.should_optimize_constants && do_optimization[j]
1,903,292✔
121
            # TODO: Might want to do full batch optimization here?
122
            pop.members[j], array_num_evals[j] = optimize_constants(
204,363✔
123
                dataset, pop.members[j], options
124
            )
125
        end
126
    end
127
    num_evals = sum(array_num_evals)
15,756✔
128
    pop, tmp_num_evals = finalize_scores(dataset, pop, options)
15,757✔
129
    num_evals += tmp_num_evals
15,757✔
130

131
    # Now, we create new references for every member,
132
    # and optionally record which operations occurred.
133
    for j in 1:(pop.n)
15,757✔
134
        old_ref = pop.members[j].ref
1,903,375✔
135
        new_ref = generate_reference()
1,903,367✔
136
        pop.members[j].parent = old_ref
1,903,102✔
137
        pop.members[j].ref = new_ref
1,903,369✔
138

139
        @recorder begin
1,908,957✔
140
            # Same structure as in RegularizedEvolution.jl,
141
            # except we assume that the record already exists.
142
            @assert haskey(record, "mutations")
6,600✔
143
            member = pop.members[j]
6,600✔
144
            if !haskey(record["mutations"], "$(member.ref)")
6,600✔
145
                record["mutations"]["$(member.ref)"] = RecordType(
6,600✔
146
                    "events" => Vector{RecordType}(),
147
                    "tree" => string_tree(member.tree, options),
148
                    "score" => member.score,
149
                    "loss" => member.loss,
150
                    "parent" => member.parent,
151
                )
152
            end
153
            optimize_and_simplify_event = RecordType(
6,600✔
154
                "type" => "tuning",
155
                "time" => time(),
156
                "child" => new_ref,
157
                "mutation" => RecordType(
158
                    "type" =>
159
                        if (do_optimization[j] && options.should_optimize_constants)
160
                            "simplification_and_optimization"
952✔
161
                        else
162
                            "simplification"
12,248✔
163
                        end,
164
                ),
165
            )
166
            death_event = RecordType("type" => "death", "time" => time())
6,600✔
167

168
            push!(record["mutations"]["$(old_ref)"]["events"], optimize_and_simplify_event)
6,600✔
169
            push!(record["mutations"]["$(old_ref)"]["events"], death_event)
6,600✔
170
        end
171
    end
1,551,292✔
172
    return (pop, num_evals)
15,757✔
173
end
174

175
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc