• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9686354911

26 Jun 2024 08:31PM UTC coverage: 93.22% (-1.4%) from 94.617%
9686354911

Pull #326

github

web-flow
Merge 6f8229c9f into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

275 of 296 new or added lines in 17 files covered. (92.91%)

34 existing lines in 5 files now uncovered.

2530 of 2714 relevant lines covered (93.22%)

32081968.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.48
/src/Mutate.jl
1
module MutateModule
2

3
using DynamicExpressions:
4
    AbstractExpression,
5
    ParametricExpression,
6
    Node,
7
    with_contents,
8
    get_tree,
9
    preserve_sharing,
10
    copy_node,
11
    count_nodes,
12
    count_constants,
13
    simplify_tree!,
14
    combine_operators
15
using ..CoreModule:
16
    Options, MutationWeights, Dataset, RecordType, sample_mutation, DATA_TYPE, LOSS_TYPE
17
using ..ComplexityModule: compute_complexity
18
using ..LossFunctionsModule: score_func, score_func_batched
19
using ..CheckConstraintsModule: check_constraints
20
using ..AdaptiveParsimonyModule: RunningSearchStatistics
21
using ..PopMemberModule: PopMember
22
using ..MutationFunctionsModule:
23
    gen_random_tree_fixed_size,
24
    mutate_constant,
25
    mutate_operator,
26
    swap_operands,
27
    append_random_op,
28
    prepend_random_op,
29
    insert_random_op,
30
    delete_random_op!,
31
    crossover_trees,
32
    form_random_connection!,
33
    break_random_connection!
34
using ..ConstantOptimizationModule: optimize_constants
35
using ..RecorderModule: @recorder
36

37
function condition_mutation_weights!(
63,247,963✔
38
    weights::MutationWeights, member::PopMember, options::Options, curmaxsize::Int
39
)
40
    tree = get_tree(member.tree)
63,246,692✔
41
    if !preserve_sharing(typeof(member.tree))
63,248,668✔
42
        weights.form_connection = 0.0
63,256,999✔
43
        weights.break_connection = 0.0
63,257,317✔
44
    end
45
    if tree.degree == 0
63,257,239✔
46
        # If equation is too small, don't delete operators
47
        # or simplify
48
        weights.mutate_operator = 0.0
29,406✔
49
        weights.swap_operands = 0.0
29,406✔
50
        weights.delete_node = 0.0
29,406✔
51
        weights.simplify = 0.0
29,406✔
52
        if !tree.constant
29,406✔
53
            weights.optimize = 0.0
9,834✔
54
            weights.mutate_constant = 0.0
9,834✔
55
        end
56
        return nothing
29,406✔
57
    end
58

59
    if !any(node -> node.degree == 2, tree)
151,888,369✔
60
        # swap is implemented only for binary ops
61
        weights.swap_operands = 0.0
456,753✔
62
    end
63

64
    if !(member.tree isa ParametricExpression)  # TODO: HACK
63,231,438✔
65
        #More constants => more likely to do constant mutation
66
        let n_constants = count_constants(member.tree)
62,219,935✔
67
            weights.mutate_constant *= min(8, n_constants) / 8.0
62,228,777✔
68
        end
69
    end
70
    complexity = compute_complexity(member, options)
86,280,929✔
71

72
    if complexity >= curmaxsize
63,235,061✔
73
        # If equation is too big, don't add new operators
74
        weights.add_node = 0.0
8,878,091✔
75
        weights.insert_node = 0.0
8,878,093✔
76
    end
77

78
    if !options.should_simplify
63,234,759✔
79
        weights.simplify = 0.0
3,961,178✔
80
    end
81

82
    return nothing
63,234,551✔
83
end
84

85
# Go through one simulated options.annealing mutation cycle
86
#  exp(-delta/T) defines probability of accepting a change
87
function next_generation(
114,780,491✔
88
    dataset::D,
89
    member::P,
90
    temperature,
91
    curmaxsize::Int,
92
    running_search_statistics::RunningSearchStatistics,
93
    options::Options;
94
    tmp_recorder::RecordType,
95
)::Tuple{
96
    P,Bool,Float64
97
} where {T,L,D<:Dataset{T,L},N<:AbstractExpression{T},P<:PopMember{T,L,N}}
98
    parent_ref = member.ref
74,971,585✔
99
    mutation_accepted = false
63,232,234✔
100
    num_evals = 0.0
63,232,764✔
101

102
    #TODO - reconsider this
103
    beforeScore, beforeLoss = if options.batching
63,256,889✔
104
        num_evals += (options.batch_size / dataset.n)
25,618,235✔
105
        score_func_batched(dataset, member, options)
41,397,116✔
106
    else
107
        member.score, member.loss
100,892,547✔
108
    end
109

110
    nfeatures = dataset.nfeatures
63,261,773✔
111

112
    weights = copy(options.mutation_weights)
63,261,257✔
113

114
    condition_mutation_weights!(weights, member, options, curmaxsize)
92,172,168✔
115

116
    mutation_choice = sample_mutation(weights)
63,263,841✔
117

118
    successful_mutation = false
63,239,541✔
119
    #TODO: Currently we dont take this \/ into account
120
    is_success_always_possible = true
63,239,470✔
121
    attempts = 0
63,239,695✔
122
    max_attempts = 10
63,239,523✔
123

124
    #############################################
125
    # Mutations
126
    #############################################
127
    local tree
×
128
    while (!successful_mutation) && attempts < max_attempts
131,426,542✔
129
        tree = copy_node(member.tree)
72,794,536✔
130
        successful_mutation = true
72,770,025✔
131
        if mutation_choice == :mutate_constant
72,794,498✔
132
            tree = mutate_constant(tree, temperature, options)
13,117,845✔
133
            @recorder tmp_recorder["type"] = "constant"
7,826,456✔
134
            is_success_always_possible = true
7,826,384✔
135
            # Mutating a constant shouldn't invalidate an already-valid function
136
        elseif mutation_choice == :mutate_operator
64,967,852✔
137
            tree = mutate_operator(tree, options)
10,718,129✔
138
            @recorder tmp_recorder["type"] = "operator"
6,523,250✔
139
            is_success_always_possible = true
6,523,211✔
140
            # Can always mutate to the same operator
141

142
        elseif mutation_choice == :swap_operands
58,445,689✔
143
            tree = swap_operands(tree)
1,365,010✔
144
            @recorder tmp_recorder["type"] = "swap_operands"
832,541✔
145
            is_success_always_possible = true
832,544✔
146

147
        elseif mutation_choice == :add_node
57,613,219✔
148
            if rand() < 0.5
5,596,639✔
149
                tree = append_random_op(tree, options, nfeatures)
4,098,798✔
150
                @recorder tmp_recorder["type"] = "append_op"
2,796,555✔
151
            else
152
                tree = prepend_random_op(tree, options, nfeatures)
4,105,990✔
153
                @recorder tmp_recorder["type"] = "prepend_op"
2,800,131✔
154
            end
155
            is_success_always_possible = false
5,596,634✔
156
            # Can potentially have a situation without success
157
        elseif mutation_choice == :insert_node
52,017,497✔
158
            tree = insert_random_op(tree, options, nfeatures)
34,602,256✔
159
            @recorder tmp_recorder["type"] = "insert_op"
23,428,752✔
160
            is_success_always_possible = false
23,428,273✔
161
        elseif mutation_choice == :delete_node
28,592,131✔
162
            tree = delete_random_op!(tree, options, nfeatures)
29,282,176✔
163
            @recorder tmp_recorder["type"] = "delete_op"
19,979,509✔
164
            is_success_always_possible = true
19,979,365✔
165
        elseif mutation_choice == :simplify
8,613,708✔
166
            @assert options.should_simplify
52,563✔
167
            simplify_tree!(tree, options.operators)
52,563✔
168
            tree = combine_operators(tree, options.operators)
52,563✔
169
            @recorder tmp_recorder["type"] = "partial_simplify"
52,563✔
170
            mutation_accepted = true
49,808✔
171
            return (
52,563✔
172
                PopMember(
173
                    tree,
174
                    beforeScore,
175
                    beforeLoss,
176
                    options;
177
                    parent=parent_ref,
178
                    deterministic=options.deterministic,
179
                ),
180
                mutation_accepted,
181
                num_evals,
182
            )
183

184
            is_success_always_possible = true
×
185
            # Simplification shouldn't hurt complexity; unless some non-symmetric constraint
186
            # to commutative operator...
187

188
        elseif mutation_choice == :randomize
8,561,138✔
189
            # We select a random size, though the generated tree
190
            # may have fewer nodes than we request.
191
            tree_size_to_generate = rand(1:curmaxsize)
3,989,812✔
192
            tree = with_contents(
5,770,036✔
193
                tree,
194
                gen_random_tree_fixed_size(tree_size_to_generate, options, nfeatures, T),
195
            )
196
            @recorder tmp_recorder["type"] = "regenerate"
3,989,865✔
197

198
            is_success_always_possible = true
3,989,850✔
199
        elseif mutation_choice == :optimize
4,571,482✔
200
            cur_member = PopMember(
8,360✔
201
                tree,
202
                beforeScore,
203
                beforeLoss,
204
                options,
205
                compute_complexity(member, options);
206
                parent=parent_ref,
207
                deterministic=options.deterministic,
208
            )
209
            cur_member, new_num_evals = optimize_constants(dataset, cur_member, options)
15,342✔
210
            num_evals += new_num_evals
8,123✔
211
            @recorder tmp_recorder["type"] = "optimize"
8,123✔
212
            mutation_accepted = true
8,123✔
213
            return (cur_member, mutation_accepted, num_evals)
8,123✔
214

215
            is_success_always_possible = true
×
216
        elseif mutation_choice == :do_nothing
4,563,350✔
217
            @recorder begin
4,563,359✔
218
                tmp_recorder["type"] = "identity"
8,199✔
219
                tmp_recorder["result"] = "accept"
8,199✔
220
                tmp_recorder["reason"] = "identity"
8,199✔
221
            end
222
            mutation_accepted = true
4,560,189✔
223
            return (
4,563,355✔
224
                PopMember(
225
                    tree,
226
                    beforeScore,
227
                    beforeLoss,
228
                    options,
229
                    compute_complexity(member, options);
230
                    parent=parent_ref,
231
                    deterministic=options.deterministic,
232
                ),
233
                mutation_accepted,
234
                num_evals,
235
            )
236
        elseif mutation_choice == :form_connection
×
237
            tree = form_random_connection!(tree)
×
238
            @recorder tmp_recorder["type"] = "form_connection"
×
239
            is_success_always_possible = true
×
240
        elseif mutation_choice == :break_connection
×
241
            tree = break_random_connection!(tree)
×
242
            @recorder tmp_recorder["type"] = "break_connection"
×
243
            is_success_always_possible = true
×
244
        else
245
            error("Unknown mutation choice: $mutation_choice")
×
246
        end
247

248
        successful_mutation =
68,172,702✔
249
            successful_mutation && check_constraints(tree, options, curmaxsize)
250

251
        attempts += 1
68,169,553✔
252
    end
55,790,715✔
253
    #############################################
254
    if !(tree isa AbstractExpression)
58,634,895✔
NEW
255
        @error "Failed after $(mutation_choice)"
×
NEW
256
        error()
×
257
    end
258

259
    if !successful_mutation
58,633,965✔
260
        @recorder begin
663,131✔
261
            tmp_recorder["result"] = "reject"
37,547✔
262
            tmp_recorder["reason"] = "failed_constraint_check"
37,546✔
263
        end
264
        mutation_accepted = false
663,130✔
265
        return (
663,131✔
266
            PopMember(
267
                copy_node(member.tree),
268
                beforeScore,
269
                beforeLoss,
270
                options,
271
                compute_complexity(member, options);
272
                parent=parent_ref,
273
                deterministic=options.deterministic,
274
            ),
275
            mutation_accepted,
276
            num_evals,
277
        )
278
    end
279

280
    if options.batching
57,969,250✔
281
        afterScore, afterLoss = score_func_batched(dataset, tree, options)
32,070,862✔
282
        num_evals += (options.batch_size / dataset.n)
22,660,281✔
283
    else
284
        afterScore, afterLoss = score_func(dataset, tree, options)
58,625,110✔
285
        num_evals += 1
35,315,078✔
286
    end
287

288
    if isnan(afterScore)
57,974,246✔
289
        @recorder begin
×
290
            tmp_recorder["result"] = "reject"
×
291
            tmp_recorder["reason"] = "nan_loss"
×
292
        end
293
        mutation_accepted = false
×
294
        return (
×
295
            PopMember(
296
                copy_node(member.tree),
297
                beforeScore,
298
                beforeLoss,
299
                options,
300
                compute_complexity(member, options);
301
                parent=parent_ref,
302
                deterministic=options.deterministic,
303
            ),
304
            mutation_accepted,
305
            num_evals,
306
        )
307
    end
308

309
    probChange = 1.0
57,955,088✔
310
    if options.annealing
57,972,280✔
311
        delta = afterScore - beforeScore
38,534,116✔
312
        probChange *= exp(-delta / (temperature * options.alpha))
38,533,851✔
313
    end
314
    newSize = -1
57,973,558✔
315
    if options.use_frequency
57,973,866✔
316
        oldSize = compute_complexity(member, options)
58,694,007✔
317
        newSize = compute_complexity(tree, options)
49,953,390✔
318
        old_frequency = if (0 < oldSize <= options.maxsize)
42,686,353✔
319
            running_search_statistics.normalized_frequencies[oldSize]
42,686,333✔
320
        else
321
            1e-6
42,686,357✔
322
        end
323
        new_frequency = if (0 < newSize <= options.maxsize)
42,686,392✔
324
            running_search_statistics.normalized_frequencies[newSize]
42,686,313✔
325
        else
326
            1e-6
42,685,710✔
327
        end
328
        probChange *= old_frequency / new_frequency
42,685,508✔
329
    end
330

331
    if probChange < rand()
57,975,534✔
332
        @recorder begin
29,134,568✔
333
            tmp_recorder["result"] = "reject"
86,443✔
334
            tmp_recorder["reason"] = "annealing_or_frequency"
86,443✔
335
        end
336
        mutation_accepted = false
29,131,092✔
337
        return (
29,133,978✔
338
            PopMember(
339
                copy_node(member.tree),
340
                beforeScore,
341
                beforeLoss,
342
                options,
343
                compute_complexity(member, options);
344
                parent=parent_ref,
345
                deterministic=options.deterministic,
346
            ),
347
            mutation_accepted,
348
            num_evals,
349
        )
350
    else
351
        @recorder begin
28,841,720✔
352
            tmp_recorder["result"] = "accept"
194,427✔
353
            tmp_recorder["reason"] = "pass"
194,425✔
354
        end
355
        mutation_accepted = true
28,826,213✔
356
        return (
28,841,395✔
357
            PopMember(
358
                tree,
359
                afterScore,
360
                afterLoss,
361
                options,
362
                newSize;
363
                parent=parent_ref,
364
                deterministic=options.deterministic,
365
            ),
366
            mutation_accepted,
367
            num_evals,
368
        )
369
    end
370
end
371

372
"""Generate a generation via crossover of two members."""
373
function crossover_generation(
1,457,710✔
374
    member1::P, member2::P, dataset::D, curmaxsize::Int, options::Options
375
)::Tuple{P,P,Bool,Float64} where {T,L,D<:Dataset{T,L},N,P<:PopMember{T,L,N}}
376
    tree1 = member1.tree
1,457,710✔
377
    tree2 = member2.tree
1,457,711✔
378
    crossover_accepted = false
1,456,003✔
379

380
    # We breed these until constraints are no longer violated:
381
    child_tree1, child_tree2 = crossover_trees(tree1, tree2)
2,181,790✔
382
    num_tries = 1
1,456,006✔
383
    max_tries = 10
1,456,006✔
384
    num_evals = 0.0
1,455,999✔
385
    afterSize1 = -1
1,456,000✔
386
    afterSize2 = -1
1,456,000✔
387
    while true
2,453,997✔
388
        afterSize1 = compute_complexity(child_tree1, options)
2,848,332✔
389
        afterSize2 = compute_complexity(child_tree2, options)
2,848,341✔
390
        # Both trees satisfy constraints
391
        if check_constraints(child_tree1, options, curmaxsize, afterSize1) &&
2,454,007✔
392
            check_constraints(child_tree2, options, curmaxsize, afterSize2)
393
            break
1,447,817✔
394
        end
395
        if num_tries > max_tries
1,006,191✔
396
            crossover_accepted = false
9,884✔
397
            return member1, member2, crossover_accepted, num_evals  # Fail.
9,898✔
398
        end
399
        child_tree1, child_tree2 = crossover_trees(tree1, tree2)
1,492,496✔
400
        num_tries += 1
996,297✔
401
    end
821,232✔
402
    if options.batching
1,447,812✔
403
        afterScore1, afterLoss1 = score_func_batched(
×
404
            dataset, child_tree1, options; complexity=afterSize1
405
        )
406
        afterScore2, afterLoss2 = score_func_batched(
×
407
            dataset, child_tree2, options; complexity=afterSize2
408
        )
409
        num_evals += 2 * (options.batch_size / dataset.n)
×
410
    else
411
        afterScore1, afterLoss1 = score_func(
2,167,098✔
412
            dataset, child_tree1, options; complexity=afterSize1
413
        )
414
        afterScore2, afterLoss2 = score_func(
2,167,079✔
415
            dataset, child_tree2, options; complexity=afterSize2
416
        )
417
        num_evals += options.batch_size / dataset.n
1,447,806✔
418
    end
419

420
    baby1 = PopMember(
1,447,808✔
421
        child_tree1,
422
        afterScore1,
423
        afterLoss1,
424
        options,
425
        afterSize1;
426
        parent=member1.ref,
427
        deterministic=options.deterministic,
428
    )::P
429
    baby2 = PopMember(
1,447,799✔
430
        child_tree2,
431
        afterScore2,
432
        afterLoss2,
433
        options,
434
        afterSize2;
435
        parent=member2.ref,
436
        deterministic=options.deterministic,
437
    )::P
438

439
    crossover_accepted = true
1,446,106✔
440
    return baby1, baby2, crossover_accepted, num_evals
1,447,801✔
441
end
442

443
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc