• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MilesCranmer / SymbolicRegression.jl / 9639805727

24 Jun 2024 05:00AM UTC coverage: 94.475% (-0.1%) from 94.617%
9639805727

Pull #326

github

web-flow
Merge 3ba1556f8 into ceddaa424
Pull Request #326: BREAKING: Change expression types to `DynamicExpressions.Expression` (from `DynamicExpressions.Node`)

239 of 250 new or added lines in 15 files covered. (95.6%)

4 existing lines in 3 files now uncovered.

2548 of 2697 relevant lines covered (94.48%)

46539295.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.02
/src/Mutate.jl
1
module MutateModule
2

3
using DynamicExpressions:
4
    AbstractExpression,
5
    ParametricExpression,
6
    Node,
7
    with_contents,
8
    get_tree,
9
    preserve_sharing,
10
    copy_node,
11
    count_nodes,
12
    count_constants,
13
    simplify_tree!,
14
    combine_operators
15
using ..CoreModule:
16
    Options, MutationWeights, Dataset, RecordType, sample_mutation, DATA_TYPE, LOSS_TYPE
17
using ..ComplexityModule: compute_complexity
18
using ..LossFunctionsModule: score_func, score_func_batched
19
using ..CheckConstraintsModule: check_constraints
20
using ..AdaptiveParsimonyModule: RunningSearchStatistics
21
using ..PopMemberModule: PopMember
22
using ..MutationFunctionsModule:
23
    gen_random_tree_fixed_size,
24
    mutate_constant,
25
    mutate_operator,
26
    swap_operands,
27
    append_random_op,
28
    prepend_random_op,
29
    insert_random_op,
30
    delete_random_op!,
31
    crossover_trees,
32
    form_random_connection!,
33
    break_random_connection!
34
using ..ConstantOptimizationModule: optimize_constants
35
using ..RecorderModule: @recorder
36

37
function condition_mutation_weights!(
93,575,942✔
38
    weights::MutationWeights, member::PopMember, options::Options, curmaxsize::Int
39
)
40
    tree = get_tree(member.tree)
93,573,732✔
41
    if !preserve_sharing(typeof(member.tree))
93,576,785✔
42
        weights.form_connection = 0.0
93,590,336✔
43
        weights.break_connection = 0.0
93,590,894✔
44
    end
45
    if tree.degree == 0
93,590,798✔
46
        # If equation is too small, don't delete operators
47
        # or simplify
48
        weights.mutate_operator = 0.0
278,128✔
49
        weights.swap_operands = 0.0
278,128✔
50
        weights.delete_node = 0.0
278,128✔
51
        weights.simplify = 0.0
278,128✔
52
        if !tree.constant
278,128✔
53
            weights.optimize = 0.0
158,272✔
54
            weights.mutate_constant = 0.0
158,272✔
55
        end
56
        return nothing
278,125✔
57
    end
58

59
    if !any(node -> node.degree == 2, tree)
222,289,422✔
60
        # swap is implemented only for binary ops
61
        weights.swap_operands = 0.0
737,383✔
62
    end
63

64
    if !(member.tree isa ParametricExpression)  # TODO: HACK
93,318,920✔
65
        #More constants => more likely to do constant mutation
66
        let n_constants = count_constants(member.tree)
89,538,843✔
67
            weights.mutate_constant *= min(8, n_constants) / 8.0
89,549,170✔
68
        end
69
    end
70
    complexity = compute_complexity(member, options)
126,207,758✔
71

72
    if complexity >= curmaxsize
93,325,584✔
73
        # If equation is too big, don't add new operators
74
        weights.add_node = 0.0
8,803,989✔
75
        weights.insert_node = 0.0
8,803,986✔
76
    end
77

78
    if !options.should_simplify
93,325,284✔
79
        weights.simplify = 0.0
4,237,964✔
80
    end
81

82
    return nothing
93,324,995✔
83
end
84

85
# Go through one simulated options.annealing mutation cycle
86
#  exp(-delta/T) defines probability of accepting a change
87
function next_generation(
170,479,562✔
88
    dataset::D,
89
    member::P,
90
    temperature,
91
    curmaxsize::Int,
92
    running_search_statistics::RunningSearchStatistics,
93
    options::Options;
94
    tmp_recorder::RecordType,
95
)::Tuple{
96
    P,Bool,Float64
97
} where {T,L,D<:Dataset{T,L},N<:AbstractExpression{T},P<:PopMember{T,L,N}}
98
    parent_ref = member.ref
110,258,235✔
99
    mutation_accepted = false
93,564,113✔
100
    num_evals = 0.0
93,564,873✔
101

102
    #TODO - reconsider this
103
    beforeScore, beforeLoss = if options.batching
93,588,632✔
104
        num_evals += (options.batch_size / dataset.n)
26,369,639✔
105
        score_func_batched(dataset, member, options)
44,666,926✔
106
    else
107
        member.score, member.loss
160,810,370✔
108
    end
109

110
    nfeatures = dataset.nfeatures
93,598,857✔
111

112
    weights = copy(options.mutation_weights)
93,599,010✔
113

114
    condition_mutation_weights!(weights, member, options, curmaxsize)
138,263,256✔
115

116
    mutation_choice = sample_mutation(weights)
93,602,811✔
117

118
    successful_mutation = false
93,576,490✔
119
    #TODO: Currently we dont take this \/ into account
120
    is_success_always_possible = true
93,577,063✔
121
    attempts = 0
93,577,409✔
122
    max_attempts = 10
93,577,297✔
123

124
    #############################################
125
    # Mutations
126
    #############################################
127
    local tree
×
128
    while (!successful_mutation) && attempts < max_attempts
218,575,317✔
129
        tree = copy_node(member.tree)
130,502,517✔
130
        successful_mutation = true
130,467,826✔
131
        if mutation_choice == :mutate_constant
130,494,469✔
132
            tree = mutate_constant(tree, temperature, options)
12,401,740✔
133
            @recorder tmp_recorder["type"] = "constant"
7,460,899✔
134
            is_success_always_possible = true
7,460,833✔
135
            # Mutating a constant shouldn't invalidate an already-valid function
136
        elseif mutation_choice == :mutate_operator
123,030,385✔
137
            tree = mutate_operator(tree, options)
13,969,372✔
138
            @recorder tmp_recorder["type"] = "operator"
8,334,207✔
139
            is_success_always_possible = true
8,334,166✔
140
            # Can always mutate to the same operator
141

142
        elseif mutation_choice == :swap_operands
114,698,077✔
143
            tree = swap_operands(tree)
2,000,802✔
144
            @recorder tmp_recorder["type"] = "swap_operands"
1,211,242✔
145
            is_success_always_possible = true
1,211,239✔
146

147
        elseif mutation_choice == :add_node
113,487,431✔
148
            if rand() < 0.5
12,121,498✔
149
                tree = append_random_op(tree, options, nfeatures)
9,091,419✔
150
                @recorder tmp_recorder["type"] = "append_op"
6,060,620✔
151
            else
152
                tree = prepend_random_op(tree, options, nfeatures)
9,091,489✔
153
                @recorder tmp_recorder["type"] = "prepend_op"
6,061,094✔
154
            end
155
            is_success_always_possible = false
12,121,509✔
156
            # Can potentially have a situation without success
157
        elseif mutation_choice == :insert_node
101,372,357✔
158
            tree = insert_random_op(tree, options, nfeatures)
98,045,406✔
159
            @recorder tmp_recorder["type"] = "insert_op"
65,378,523✔
160
            is_success_always_possible = false
65,376,168✔
161
        elseif mutation_choice == :delete_node
36,004,175✔
162
            tree = delete_random_op!(tree, options, nfeatures)
39,676,451✔
163
            @recorder tmp_recorder["type"] = "delete_op"
26,449,685✔
164
            is_success_always_possible = true
26,449,512✔
165
        elseif mutation_choice == :simplify
9,555,531✔
166
            @assert options.should_simplify
60,018✔
167
            simplify_tree!(tree, options.operators)
61,005✔
168
            if tree isa Node
57,257✔
UNCOV
169
                tree = combine_operators(tree, options.operators)
×
170
            end
171
            @recorder tmp_recorder["type"] = "partial_simplify"
60,018✔
172
            mutation_accepted = true
57,257✔
173
            return (
60,018✔
174
                PopMember(
175
                    tree,
176
                    beforeScore,
177
                    beforeLoss,
178
                    options;
179
                    parent=parent_ref,
180
                    deterministic=options.deterministic,
181
                ),
182
                mutation_accepted,
183
                num_evals,
184
            )
185

186
            is_success_always_possible = true
×
187
            # Simplification shouldn't hurt complexity; unless some non-symmetric constraint
188
            # to commutative operator...
189

190
        elseif mutation_choice == :randomize
9,495,493✔
191
            # We select a random size, though the generated tree
192
            # may have fewer nodes than we request.
193
            tree_size_to_generate = rand(1:curmaxsize)
4,051,139✔
194
            tree = with_contents(
6,074,737✔
195
                tree,
196
                gen_random_tree_fixed_size(tree_size_to_generate, options, nfeatures, T),
197
            )
198
            @recorder tmp_recorder["type"] = "regenerate"
4,051,179✔
199

200
            is_success_always_possible = true
4,051,168✔
201
        elseif mutation_choice == :optimize
5,444,447✔
202
            cur_member = PopMember(
9,237✔
203
                tree,
204
                beforeScore,
205
                beforeLoss,
206
                options,
207
                compute_complexity(member, options);
208
                parent=parent_ref,
209
                deterministic=options.deterministic,
210
            )
211
            cur_member, new_num_evals = optimize_constants(dataset, cur_member, options)
16,374✔
212
            num_evals += new_num_evals
8,731✔
213
            @recorder tmp_recorder["type"] = "optimize"
8,731✔
214
            mutation_accepted = true
8,731✔
215
            return (cur_member, mutation_accepted, num_evals)
8,731✔
216

217
            is_success_always_possible = true
×
218
        elseif mutation_choice == :do_nothing
5,435,713✔
219
            @recorder begin
5,435,718✔
220
                tmp_recorder["type"] = "identity"
8,145✔
221
                tmp_recorder["result"] = "accept"
8,145✔
222
                tmp_recorder["reason"] = "identity"
8,145✔
223
            end
224
            mutation_accepted = true
5,432,660✔
225
            return (
5,435,715✔
226
                PopMember(
227
                    tree,
228
                    beforeScore,
229
                    beforeLoss,
230
                    options,
231
                    compute_complexity(member, options);
232
                    parent=parent_ref,
233
                    deterministic=options.deterministic,
234
                ),
235
                mutation_accepted,
236
                num_evals,
237
            )
238
        elseif mutation_choice == :form_connection
×
239
            tree = form_random_connection!(tree)
×
240
            @recorder tmp_recorder["type"] = "form_connection"
×
241
            is_success_always_possible = true
×
242
        elseif mutation_choice == :break_connection
×
243
            tree = break_random_connection!(tree)
×
244
            @recorder tmp_recorder["type"] = "break_connection"
×
245
            is_success_always_possible = true
×
246
        else
247
            error("Unknown mutation choice: $mutation_choice")
×
248
        end
249

250
        successful_mutation =
124,992,483✔
251
            successful_mutation && check_constraints(tree, options, curmaxsize)
252

253
        attempts += 1
124,995,077✔
254
    end
103,610,568✔
255
    #############################################
256
    if !(tree isa AbstractExpression)
88,087,430✔
NEW
257
        @error "Failed after $(mutation_choice)"
×
NEW
258
        error()
×
259
    end
260

261
    if !successful_mutation
88,086,184✔
262
        @recorder begin
3,717,494✔
263
            tmp_recorder["result"] = "reject"
48,800✔
264
            tmp_recorder["reason"] = "failed_constraint_check"
48,799✔
265
        end
266
        mutation_accepted = false
3,717,494✔
267
        return (
3,717,505✔
268
            PopMember(
269
                copy_node(member.tree),
270
                beforeScore,
271
                beforeLoss,
272
                options,
273
                compute_complexity(member, options);
274
                parent=parent_ref,
275
                deterministic=options.deterministic,
276
            ),
277
            mutation_accepted,
278
            num_evals,
279
        )
280
    end
281

282
    if options.batching
84,366,842✔
283
        afterScore, afterLoss = score_func_batched(dataset, tree, options)
35,002,687✔
284
        num_evals += (options.batch_size / dataset.n)
23,329,106✔
285
    else
286
        afterScore, afterLoss = score_func(dataset, tree, options)
101,323,308✔
287
        num_evals += 1
61,051,324✔
288
    end
289

290
    if isnan(afterScore)
84,378,073✔
291
        @recorder begin
×
292
            tmp_recorder["result"] = "reject"
×
293
            tmp_recorder["reason"] = "nan_loss"
×
294
        end
295
        mutation_accepted = false
×
296
        return (
×
297
            PopMember(
298
                copy_node(member.tree),
299
                beforeScore,
300
                beforeLoss,
301
                options,
302
                compute_complexity(member, options);
303
                parent=parent_ref,
304
                deterministic=options.deterministic,
305
            ),
306
            mutation_accepted,
307
            num_evals,
308
        )
309
    end
310

311
    probChange = 1.0
84,358,038✔
312
    if options.annealing
84,375,926✔
313
        delta = afterScore - beforeScore
38,899,148✔
314
        probChange *= exp(-delta / (temperature * options.alpha))
38,898,774✔
315
    end
316
    newSize = -1
84,375,475✔
317
    if options.use_frequency
84,375,952✔
318
        oldSize = compute_complexity(member, options)
93,891,952✔
319
        newSize = compute_complexity(tree, options)
79,163,520✔
320
        old_frequency = if (0 < oldSize <= options.maxsize)
69,219,955✔
321
            running_search_statistics.normalized_frequencies[oldSize]
69,220,182✔
322
        else
323
            1e-6
69,220,781✔
324
        end
325
        new_frequency = if (0 < newSize <= options.maxsize)
69,220,667✔
326
            running_search_statistics.normalized_frequencies[newSize]
69,220,721✔
327
        else
328
            1e-6
69,218,765✔
329
        end
330
        probChange *= old_frequency / new_frequency
69,218,851✔
331
    end
332

333
    if probChange < rand()
84,379,733✔
334
        @recorder begin
35,184,711✔
335
            tmp_recorder["result"] = "reject"
98,707✔
336
            tmp_recorder["reason"] = "annealing_or_frequency"
98,709✔
337
        end
338
        mutation_accepted = false
35,181,467✔
339
        return (
35,184,233✔
340
            PopMember(
341
                copy_node(member.tree),
342
                beforeScore,
343
                beforeLoss,
344
                options,
345
                compute_complexity(member, options);
346
                parent=parent_ref,
347
                deterministic=options.deterministic,
348
            ),
349
            mutation_accepted,
350
            num_evals,
351
        )
352
    else
353
        @recorder begin
49,195,236✔
354
            tmp_recorder["result"] = "accept"
170,968✔
355
            tmp_recorder["reason"] = "pass"
170,966✔
356
        end
357
        mutation_accepted = true
49,179,060✔
358
        return (
49,194,543✔
359
            PopMember(
360
                tree,
361
                afterScore,
362
                afterLoss,
363
                options,
364
                newSize;
365
                parent=parent_ref,
366
                deterministic=options.deterministic,
367
            ),
368
            mutation_accepted,
369
            num_evals,
370
        )
371
    end
372
end
373

374
"""Generate a generation via crossover of two members."""
375
function crossover_generation(
3,570,203✔
376
    member1::P, member2::P, dataset::D, curmaxsize::Int, options::Options
377
)::Tuple{P,P,Bool,Float64} where {T,L,D<:Dataset{T,L},N,P<:PopMember{T,L,N}}
378
    tree1 = member1.tree
3,570,201✔
379
    tree2 = member2.tree
3,570,203✔
380
    crossover_accepted = false
3,568,472✔
381

382
    # We breed these until constraints are no longer violated:
383
    child_tree1, child_tree2 = crossover_trees(tree1, tree2)
5,353,495✔
384
    num_tries = 1
3,568,467✔
385
    max_tries = 10
3,568,466✔
386
    num_evals = 0.0
3,568,460✔
387
    afterSize1 = -1
3,568,463✔
388
    afterSize2 = -1
3,568,460✔
389
    while true
5,543,899✔
390
        afterSize1 = compute_complexity(child_tree1, options)
6,450,546✔
391
        afterSize2 = compute_complexity(child_tree2, options)
6,450,561✔
392
        # Both trees satisfy constraints
393
        if check_constraints(child_tree1, options, curmaxsize, afterSize1) &&
5,543,898✔
394
            check_constraints(child_tree2, options, curmaxsize, afterSize2)
395
            break
3,555,514✔
396
        end
397
        if num_tries > max_tries
1,988,420✔
398
            crossover_accepted = false
14,687✔
399
            return member1, member2, crossover_accepted, num_evals  # Fail.
14,699✔
400
        end
401
        child_tree1, child_tree2 = crossover_trees(tree1, tree2)
2,968,118✔
402
        num_tries += 1
1,973,734✔
403
    end
1,640,830✔
404
    if options.batching
3,555,507✔
405
        afterScore1, afterLoss1 = score_func_batched(
×
406
            dataset, child_tree1, options; complexity=afterSize1
407
        )
408
        afterScore2, afterLoss2 = score_func_batched(
×
409
            dataset, child_tree2, options; complexity=afterSize2
410
        )
411
        num_evals += 2 * (options.batch_size / dataset.n)
×
412
    else
413
        afterScore1, afterLoss1 = score_func(
5,331,322✔
414
            dataset, child_tree1, options; complexity=afterSize1
415
        )
416
        afterScore2, afterLoss2 = score_func(
5,331,287✔
417
            dataset, child_tree2, options; complexity=afterSize2
418
        )
419
        num_evals += options.batch_size / dataset.n
3,555,492✔
420
    end
421

422
    baby1 = PopMember(
3,555,486✔
423
        child_tree1,
424
        afterScore1,
425
        afterLoss1,
426
        options,
427
        afterSize1;
428
        parent=member1.ref,
429
        deterministic=options.deterministic,
430
    )::P
431
    baby2 = PopMember(
3,555,455✔
432
        child_tree2,
433
        afterScore2,
434
        afterLoss2,
435
        options,
436
        afterSize2;
437
        parent=member2.ref,
438
        deterministic=options.deterministic,
439
    )::P
440

441
    crossover_accepted = true
3,553,725✔
442
    return baby1, baby2, crossover_accepted, num_evals
3,555,439✔
443
end
444

445
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc