• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 17241309602

26 Aug 2025 02:29PM UTC coverage: 86.333% (-0.1%) from 86.435%
17241309602

Pull #1293

github

web-flow
Merge 46a96b376 into 72136b7c1
Pull Request #1293: Added new parameter to handle filtering of incomplete CDS transcripts

8970 of 10390 relevant lines covered (86.33%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.54
/pvactools/lib/post_processor.py
1
import tempfile
1✔
2
import shutil
1✔
3

4
from pvactools.lib.identify_problematic_amino_acids import IdentifyProblematicAminoAcids
1✔
5
from pvactools.lib.mark_genes_of_interest import MarkGenesOfInterest
1✔
6
from pvactools.lib.aggregate_all_epitopes import PvacseqAggregateAllEpitopes, PvacfuseAggregateAllEpitopes, PvacbindAggregateAllEpitopes, PvacspliceAggregateAllEpitopes
1✔
7
from pvactools.lib.binding_filter import BindingFilter
1✔
8
from pvactools.lib.filter import Filter, FilterCriterion
1✔
9
from pvactools.lib.top_score_filter import PvacseqTopScoreFilter, PvacfuseTopScoreFilter, PvacbindTopScoreFilter, PvacspliceTopScoreFilter
1✔
10
from pvactools.lib.calculate_manufacturability import CalculateManufacturability
1✔
11
from pvactools.lib.calculate_reference_proteome_similarity import CalculateReferenceProteomeSimilarity
1✔
12
from pvactools.lib.update_tiers import PvacseqUpdateTiers, PvacbindUpdateTiers, PvacfuseUpdateTiers, PvacspliceUpdateTiers
1✔
13
from pvactools.lib.net_chop import NetChop
1✔
14
from pvactools.lib.netmhc_stab import NetMHCStab
1✔
15

16
class PostProcessor:
1✔
17
    def __init__(self, **kwargs):
1✔
18
        for (k,v) in kwargs.items():
1✔
19
            setattr(self, k, v)
1✔
20
        self.aggregate_report = self.input_file.replace('.tsv', '.aggregated.tsv')
1✔
21
        self.identify_problematic_amino_acids_fh = tempfile.NamedTemporaryFile()
1✔
22
        self.mark_genes_of_interest_fh = tempfile.NamedTemporaryFile()
1✔
23
        self.binding_filter_fh = tempfile.NamedTemporaryFile()
1✔
24
        self.coverage_filter_fh = tempfile.NamedTemporaryFile()
1✔
25
        self.transcript_support_level_filter_fh = tempfile.NamedTemporaryFile()
1✔
26
        self.top_score_filter_fh = tempfile.NamedTemporaryFile()
1✔
27
        self.net_chop_fh = tempfile.NamedTemporaryFile()
1✔
28
        self.netmhc_stab_fh = tempfile.NamedTemporaryFile()
1✔
29
        self.manufacturability_fh = tempfile.NamedTemporaryFile()
1✔
30
        self.reference_similarity_fh = tempfile.NamedTemporaryFile(suffix='.tsv')
1✔
31
        self.file_type = kwargs.pop('file_type', None)
1✔
32
        self.fasta = kwargs.pop('fasta', None)
1✔
33
        self.net_chop_fasta = kwargs.pop('net_chop_fasta', None)
1✔
34
        if not hasattr(self, 'flurry_state'):
1✔
35
            self.flurry_state = self.get_flurry_state()
1✔
36
        self.el_only = all([self.is_el(a) for a in self.prediction_algorithms])
1✔
37

38
    def get_flurry_state(self):
1✔
39
        if 'MHCflurry' in self.prediction_algorithms and 'MHCflurryEL' in self.prediction_algorithms:
1✔
40
            self.prediction_algorithms.remove('MHCflurryEL')
×
41
            return 'both'
×
42
        elif 'MHCflurry' in self.prediction_algorithms:
1✔
43
            return 'BA_only'
×
44
        elif 'MHCflurryEL' in self.prediction_algorithms:
1✔
45
            pred_idx = self.prediction_algorithms.index('MHCflurryEL')
×
46
            self.prediction_algorithms[pred_idx] = 'MHCflurry'
×
47
            return 'EL_only'
×
48
        else:
49
            return None
1✔
50

51
    def is_el(self, algorithm):
1✔
52
        if algorithm == 'MHCflurry' and self.flurry_state == 'EL_only':
1✔
53
            return True
×
54
        if algorithm in ['NetMHCIIpanEL', 'NetMHCpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno']:
1✔
55
            return True
×
56
        return False
1✔
57

58
    def execute(self):
1✔
59
        self.identify_problematic_amino_acids()
1✔
60
        self.mark_genes_of_interests()
1✔
61
        self.aggregate_all_epitopes()
1✔
62
        self.calculate_manufacturability()
1✔
63
        self.execute_binding_filter()
1✔
64
        self.execute_coverage_filter()
1✔
65
        self.execute_transcript_support_level_filter()
1✔
66
        self.execute_top_score_filter()
1✔
67
        self.call_net_chop()
1✔
68
        self.call_netmhc_stab()
1✔
69
        self.calculate_reference_proteome_similarity()
1✔
70
        shutil.copy(self.netmhc_stab_fh.name, self.filtered_report_file)
1✔
71
        self.close_filehandles()
1✔
72
        print("\nDone: Pipeline finished successfully. File {} contains list of filtered putative neoantigens.\n".format(self.filtered_report_file))
1✔
73

74
    def identify_problematic_amino_acids(self):
1✔
75
        if self.problematic_amino_acids:
1✔
76
            print("Identifying peptides with problematic amino acids")
1✔
77
            IdentifyProblematicAminoAcids(self.input_file, self.identify_problematic_amino_acids_fh.name, self.problematic_amino_acids, file_type=self.file_type).execute()
1✔
78
            shutil.copy(self.identify_problematic_amino_acids_fh.name, self.input_file)
1✔
79
            print("Completed")
1✔
80

81
    def mark_genes_of_interests(self):
1✔
82
        if self.file_type != 'pVACbind':
1✔
83
            print("Marking genes of interest")
1✔
84
            MarkGenesOfInterest(self.input_file, self.mark_genes_of_interest_fh.name, self.genes_of_interest_file, file_type=self.file_type).execute()
1✔
85
            shutil.copy(self.mark_genes_of_interest_fh.name, self.input_file)
1✔
86
            print("Completed")
1✔
87

88
    def aggregate_all_epitopes(self):
1✔
89
        if self.el_only:
1✔
90
            print("WARNING: No binding affinity algorithm(s) specified, skipping aggregated report creation.")
×
91
            return
×
92
        print("Creating aggregated report")
1✔
93
        if self.file_type == 'pVACseq':
1✔
94
            aggregator = PvacseqAggregateAllEpitopes(
1✔
95
                self.input_file,
96
                self.aggregate_report,
97
                tumor_purity=self.tumor_purity,
98
                binding_threshold=self.binding_threshold,
99
                percentile_threshold=self.percentile_threshold,
100
                percentile_threshold_strategy=self.percentile_threshold_strategy,
101
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
102
                trna_vaf=self.trna_vaf,
103
                trna_cov=self.trna_cov,
104
                expn_val=self.expn_val,
105
                transcript_prioritization_strategy=self.transcript_prioritization_strategy,
106
                maximum_transcript_support_level=self.maximum_transcript_support_level,
107
                top_score_metric=self.top_score_metric,
108
                top_score_metric2=self.top_score_metric2,
109
                allele_specific_anchors=self.allele_specific_anchors,
110
                allow_incomplete_transcripts=self.allow_incomplete_transcripts,
111
                anchor_contribution_threshold=self.anchor_contribution_threshold,
112
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
113
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
114
            )
115
            aggregator.execute()
1✔
116
            self.vaf_clonal = aggregator.vaf_clonal
1✔
117
        elif self.file_type == 'pVACfuse':
1✔
118
            PvacfuseAggregateAllEpitopes(
1✔
119
                self.input_file,
120
                self.aggregate_report,
121
                binding_threshold=self.binding_threshold,
122
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
123
                percentile_threshold=self.percentile_threshold,
124
                percentile_threshold_strategy=self.percentile_threshold_strategy,
125
                top_score_metric=self.top_score_metric,
126
                top_score_metric2=self.top_score_metric2,
127
                read_support=self.read_support,
128
                expn_val=self.expn_val,
129
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
130
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
131
            ).execute()
132
        elif self.file_type == 'pVACbind':
1✔
133
            PvacbindAggregateAllEpitopes(
1✔
134
                self.input_file,
135
                self.aggregate_report,
136
                binding_threshold=self.binding_threshold,
137
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
138
                percentile_threshold=self.percentile_threshold,
139
                percentile_threshold_strategy=self.percentile_threshold_strategy,
140
                top_score_metric=self.top_score_metric,
141
                top_score_metric2=self.top_score_metric2,
142
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
143
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
144
            ).execute()
145
        elif self.file_type == 'pVACsplice':
1✔
146
            aggregator = PvacspliceAggregateAllEpitopes(
1✔
147
                self.input_file,
148
                self.aggregate_report,
149
                tumor_purity=self.tumor_purity,
150
                binding_threshold=self.binding_threshold,
151
                percentile_threshold=self.percentile_threshold,
152
                percentile_threshold_strategy=self.percentile_threshold_strategy,
153
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
154
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
155
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
156
                top_score_metric=self.top_score_metric,
157
                top_score_metric2=self.top_score_metric2,
158
                trna_vaf=self.trna_vaf,
159
                trna_cov=self.trna_cov,
160
                expn_val=self.expn_val,
161
                transcript_prioritization_strategy=self.transcript_prioritization_strategy,
162
                maximum_transcript_support_level=self.maximum_transcript_support_level,
163
                allow_incomplete_transcripts=self.allow_incomplete_transcripts,
164
            )
165
            aggregator.execute()
1✔
166
            self.vaf_clonal = aggregator.vaf_clonal
1✔
167
        print("Completed")
1✔
168

169
    def calculate_manufacturability(self):
1✔
170
        if self.run_manufacturability_metrics:
1✔
171
            print("Calculating Manufacturability Metrics")
1✔
172
            CalculateManufacturability(self.input_file, self.manufacturability_fh.name, self.file_type).execute()
1✔
173
            shutil.copy(self.manufacturability_fh.name, self.input_file)
1✔
174
            print("Completed")
1✔
175

176
    def execute_binding_filter(self):
1✔
177
        if self.el_only:
1✔
178
            shutil.copy(self.input_file, self.binding_filter_fh.name)
×
179
            return
×
180
        print("Running Binding Filters")
1✔
181
        BindingFilter(
1✔
182
            self.input_file,
183
            self.binding_filter_fh.name,
184
            self.binding_threshold,
185
            self.minimum_fold_change,
186
            self.top_score_metric,
187
            self.top_score_metric2,
188
            self.exclude_NAs,
189
            self.allele_specific_binding_thresholds,
190
            self.percentile_threshold,
191
            self.percentile_threshold_strategy,
192
            self.file_type,
193
        ).execute()
194
        print("Completed")
1✔
195

196
    def execute_coverage_filter(self):
1✔
197
        if self.run_coverage_filter:
1✔
198
            print("Running Coverage Filters")
1✔
199
            filter_criteria = []
1✔
200
            if self.file_type == 'pVACseq':
1✔
201
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
1✔
202
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
1✔
203
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
1✔
204
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
1✔
205
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
1✔
206
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
1✔
207
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
1✔
208
                filter_criteria.append(FilterCriterion("Transcript Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
1✔
209
            # excluding transcript expression filter for pvacsplice
210
            elif self.file_type == 'pVACsplice':
1✔
211
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
1✔
212
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
1✔
213
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
1✔
214
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
1✔
215
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
1✔
216
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
1✔
217
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
1✔
218
            elif self.file_type == 'pVACfuse':
1✔
219
                filter_criteria.append(FilterCriterion("Read Support", '>=', self.read_support, exclude_nas=self.exclude_NAs))
1✔
220
                filter_criteria.append(FilterCriterion("Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
1✔
221
            Filter(self.binding_filter_fh.name, self.coverage_filter_fh.name, filter_criteria).execute()
1✔
222
            print("Completed")
1✔
223
        else:
224
            shutil.copy(self.binding_filter_fh.name, self.coverage_filter_fh.name)
1✔
225

226
    def execute_transcript_support_level_filter(self):
1✔
227
        if self.run_transcript_support_level_filter:
1✔
228
            print("Running Transcript Support Level Filter")
1✔
229
            filter_criteria = [FilterCriterion('Transcript Support Level', '<=', self.maximum_transcript_support_level, exclude_nas=True, skip_value='Not Supported')]
1✔
230
            Filter(
1✔
231
                self.coverage_filter_fh.name,
232
                self.transcript_support_level_filter_fh.name,
233
                filter_criteria,
234
                ['Transcript Support Level'],
235
            ).execute()
236
            print("Complete")
1✔
237
        else:
238
            shutil.copy(self.coverage_filter_fh.name, self.transcript_support_level_filter_fh.name)
1✔
239

240
    def execute_top_score_filter(self):
1✔
241
        if self.el_only:
1✔
242
            shutil.copy(self.transcript_support_level_filter_fh.name, self.top_score_filter_fh.name)
×
243
            return
×
244
        print("Running Top Score Filter")
1✔
245
        if self.file_type == 'pVACseq':
1✔
246
            PvacseqTopScoreFilter(
1✔
247
                self.transcript_support_level_filter_fh.name,
248
                self.top_score_filter_fh.name,
249
                top_score_metric=self.top_score_metric,
250
                top_score_metric2=self.top_score_metric2,
251
                binding_threshold=self.binding_threshold,
252
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
253
                maximum_transcript_support_level=self.maximum_transcript_support_level,
254
                allele_specific_anchors=self.allele_specific_anchors,
255
                anchor_contribution_threshold=self.anchor_contribution_threshold,
256
                allow_incomplete_transcripts=self.allow_incomplete_transcripts,
257
            ).execute()
258
        elif self.file_type == 'pVACfuse':
1✔
259
            PvacfuseTopScoreFilter(
1✔
260
                self.transcript_support_level_filter_fh.name,
261
                self.top_score_filter_fh.name,
262
                top_score_metric = self.top_score_metric,
263
                top_score_metric2 = self.top_score_metric2,
264
            ).execute()
265
        elif self.file_type == 'pVACbind':
1✔
266
            PvacbindTopScoreFilter(
1✔
267
                self.transcript_support_level_filter_fh.name,
268
                self.top_score_filter_fh.name,
269
                top_score_metric = self.top_score_metric,
270
                top_score_metric2 = self.top_score_metric2,
271
            ).execute()
272
        elif self.file_type == 'pVACsplice':
1✔
273
            PvacspliceTopScoreFilter(
1✔
274
                self.transcript_support_level_filter_fh.name,
275
                self.top_score_filter_fh.name,
276
                top_score_metric = self.top_score_metric,
277
                top_score_metric2 = self.top_score_metric2,
278
                maximum_transcript_support_level=self.maximum_transcript_support_level,
279
                allow_incomplete_transcripts=self.allow_incomplete_transcripts,
280
            ).execute()
281
        print("Completed")
1✔
282

283
    def call_net_chop(self):
1✔
284
        if self.run_net_chop:
1✔
285
            print("Submitting remaining epitopes to NetChop")
1✔
286
            NetChop(self.top_score_filter_fh.name, self.net_chop_fasta, self.net_chop_fh.name, self.net_chop_method, str(self.net_chop_threshold), self.file_type).execute()
1✔
287
            print("Completed")
1✔
288
        else:
289
            shutil.copy(self.top_score_filter_fh.name, self.net_chop_fh.name)
1✔
290

291
    def call_netmhc_stab(self):
1✔
292
        if self.run_netmhc_stab:
1✔
293
            print("Running NetMHCStabPan")
1✔
294
            NetMHCStab(self.net_chop_fh.name, self.netmhc_stab_fh.name, self.file_type, self.top_score_metric, self.top_score_metric2).execute()
1✔
295
            print("Completed")
1✔
296
        else:
297
            shutil.copy(self.net_chop_fh.name, self.netmhc_stab_fh.name)
1✔
298

299
    def calculate_reference_proteome_similarity(self):
1✔
300
        if self.el_only:
1✔
301
            return
×
302
        if self.run_reference_proteome_similarity:
1✔
303
            print("Calculating Reference Proteome Similarity")
1✔
304
            if self.file_type == 'pVACseq':
1✔
305
                aggregate_metrics_file = self.aggregate_report.replace('.tsv', '.metrics.json')
1✔
306
                CalculateReferenceProteomeSimilarity(
1✔
307
                    self.aggregate_report,
308
                    self.fasta,
309
                    self.reference_similarity_fh.name,
310
                    species=self.species,
311
                    file_type=self.file_type,
312
                    n_threads=self.n_threads,
313
                    blastp_path=self.blastp_path,
314
                    blastp_db=self.blastp_db,
315
                    peptide_fasta=self.peptide_fasta,
316
                    aggregate_metrics_file=aggregate_metrics_file,
317
                ).execute()
318
                aggregate_metrics_output_file = self.reference_similarity_fh.name.replace('.tsv', '.metrics.json')
1✔
319
                shutil.move(aggregate_metrics_output_file, aggregate_metrics_file)
1✔
320
                shutil.copy(self.reference_similarity_fh.name, self.aggregate_report)
1✔
321

322
                PvacseqUpdateTiers(
1✔
323
                    self.aggregate_report,
324
                    self.vaf_clonal,
325
                    binding_threshold=self.binding_threshold,
326
                    percentile_threshold=self.percentile_threshold,
327
                    percentile_threshold_strategy=self.percentile_threshold_strategy,
328
                    allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
329
                    trna_vaf=self.trna_vaf,
330
                    trna_cov=self.trna_cov,
331
                    expn_val=self.expn_val,
332
                    transcript_prioritization_strategy=self.transcript_prioritization_strategy,
333
                    maximum_transcript_support_level=self.maximum_transcript_support_level,
334
                    allele_specific_anchors=self.allele_specific_anchors,
335
                    anchor_contribution_threshold=self.anchor_contribution_threshold,
336
                ).execute()
337
            else:
338
                CalculateReferenceProteomeSimilarity(
1✔
339
                    self.aggregate_report,
340
                    self.fasta,
341
                    self.reference_similarity_fh.name,
342
                    species=self.species,
343
                    file_type=self.file_type,
344
                    n_threads=self.n_threads,
345
                    blastp_path=self.blastp_path,
346
                    blastp_db=self.blastp_db,
347
                    peptide_fasta=self.peptide_fasta,
348
                ).execute()
349
                shutil.copy(self.reference_similarity_fh.name, self.aggregate_report)
1✔
350

351
                if self.file_type == 'pVACbind':
1✔
352
                    PvacbindUpdateTiers(
1✔
353
                        self.aggregate_report,
354
                        binding_threshold=self.binding_threshold,
355
                        allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
356
                        percentile_threshold=self.percentile_threshold,
357
                        percentile_threshold_strategy=self.percentile_threshold_strategy,
358
                    ).execute()
359
                elif self.file_type == 'pVACfuse':
1✔
360
                    PvacfuseUpdateTiers(
1✔
361
                        self.aggregate_report,
362
                        binding_threshold=self.binding_threshold,
363
                        allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
364
                        percentile_threshold=self.percentile_threshold,
365
                        percentile_threshold_strategy=self.percentile_threshold_strategy,
366
                        read_support=self.read_support,
367
                        expn_val=self.expn_val,
368
                    ).execute()
369
                elif self.file_type == 'pVACsplice':
1✔
370
                    PvacspliceUpdateTiers(
1✔
371
                        self.aggregate_report,
372
                        self.vaf_clonal,
373
                        binding_threshold=self.binding_threshold,
374
                        allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
375
                        percentile_threshold=self.percentile_threshold,
376
                        percentile_threshold_strategy=self.percentile_threshold_strategy,
377
                        trna_vaf=self.trna_vaf,
378
                        trna_cov=self.trna_cov,
379
                        expn_val=self.expn_val,
380
                        transcript_prioritization_strategy=self.transcript_prioritization_strategy,
381
                        maximum_transcript_support_level=self.maximum_transcript_support_level,
382
                    ).execute()
383
            shutil.move("{}.reference_matches".format(self.reference_similarity_fh.name), "{}.reference_matches".format(self.aggregate_report))
1✔
384
            print("Completed")
1✔
385
        else:
386
            shutil.copy(self.aggregate_report, self.reference_similarity_fh.name)
1✔
387

388
    def close_filehandles(self):
1✔
389
        self.binding_filter_fh.close()
1✔
390
        self.coverage_filter_fh.close()
1✔
391
        self.transcript_support_level_filter_fh.close()
1✔
392
        self.top_score_filter_fh.close()
1✔
393
        self.net_chop_fh.close()
1✔
394
        self.netmhc_stab_fh.close()
1✔
395
        self.manufacturability_fh.close()
1✔
396
        self.reference_similarity_fh.close()
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc