• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 13393614980

18 Feb 2025 03:03PM UTC coverage: 83.006% (-0.5%) from 83.463%
13393614980

Pull #1203

github

web-flow
Merge 4ca77dac7 into e899319d0
Pull Request #1203: Integrated pVACcompare into pVACtools

422 of 588 new or added lines in 19 files covered. (71.77%)

88 existing lines in 12 files now uncovered.

7864 of 9474 relevant lines covered (83.01%)

4.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.67
/pvactools/lib/post_processor.py
1
import tempfile
5✔
2
import shutil
5✔
3

4
from pvactools.lib.identify_problematic_amino_acids import IdentifyProblematicAminoAcids
5✔
5
from pvactools.lib.aggregate_all_epitopes import PvacseqAggregateAllEpitopes, PvacfuseAggregateAllEpitopes, PvacbindAggregateAllEpitopes, PvacspliceAggregateAllEpitopes
5✔
6
from pvactools.lib.binding_filter import BindingFilter
5✔
7
from pvactools.lib.filter import Filter, FilterCriterion
5✔
8
from pvactools.lib.top_score_filter import PvacseqTopScoreFilter, PvacfuseTopScoreFilter, PvacbindTopScoreFilter, PvacspliceTopScoreFilter
5✔
9
from pvactools.lib.calculate_manufacturability import CalculateManufacturability
5✔
10
from pvactools.lib.calculate_reference_proteome_similarity import CalculateReferenceProteomeSimilarity
5✔
11
from pvactools.lib.net_chop import NetChop
5✔
12
from pvactools.lib.netmhc_stab import NetMHCStab
5✔
13

14
class PostProcessor:
5✔
15
    def __init__(self, **kwargs):
5✔
16
        for (k,v) in kwargs.items():
5✔
17
            setattr(self, k, v)
5✔
18
        self.aggregate_report = self.input_file.replace('.tsv', '.aggregated.tsv')
5✔
19
        self.identify_problematic_amino_acids_fh = tempfile.NamedTemporaryFile()
5✔
20
        self.binding_filter_fh = tempfile.NamedTemporaryFile()
5✔
21
        self.coverage_filter_fh = tempfile.NamedTemporaryFile()
5✔
22
        self.transcript_support_level_filter_fh = tempfile.NamedTemporaryFile()
5✔
23
        self.top_score_filter_fh = tempfile.NamedTemporaryFile()
5✔
24
        self.net_chop_fh = tempfile.NamedTemporaryFile()
5✔
25
        self.netmhc_stab_fh = tempfile.NamedTemporaryFile()
5✔
26
        self.manufacturability_fh = tempfile.NamedTemporaryFile()
5✔
27
        self.reference_similarity_fh = tempfile.NamedTemporaryFile(suffix='.tsv')
5✔
28
        self.file_type = kwargs.pop('file_type', None)
5✔
29
        self.fasta = kwargs.pop('fasta', None)
5✔
30
        self.net_chop_fasta = kwargs.pop('net_chop_fasta', None)
5✔
31
        if not hasattr(self, 'flurry_state'):
5✔
32
            self.flurry_state = self.get_flurry_state()
5✔
33
        self.el_only = all([self.is_el(a) for a in self.prediction_algorithms])
5✔
34

35
    def get_flurry_state(self):
5✔
36
        if 'MHCflurry' in self.prediction_algorithms and 'MHCflurryEL' in self.prediction_algorithms:
5✔
37
            self.prediction_algorithms.remove('MHCflurryEL')
×
38
            return 'both'
×
39
        elif 'MHCflurry' in self.prediction_algorithms:
5✔
40
            return 'BA_only'
×
41
        elif 'MHCflurryEL' in self.prediction_algorithms:
5✔
42
            pred_idx = self.prediction_algorithms.index('MHCflurryEL')
×
43
            self.prediction_algorithms[pred_idx] = 'MHCflurry'
×
44
            return 'EL_only'
×
45
        else:
46
            return None
5✔
47

48
    def is_el(self, algorithm):
5✔
49
        if algorithm == 'MHCflurry' and self.flurry_state == 'EL_only':
5✔
50
            return True
×
51
        if algorithm in ['NetMHCIIpanEL', 'NetMHCpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno']:
5✔
52
            return True
×
53
        return False
5✔
54

55
    def execute(self):
5✔
56
        self.identify_problematic_amino_acids()
5✔
57
        self.aggregate_all_epitopes()
5✔
58
        self.calculate_manufacturability()
5✔
59
        self.execute_binding_filter()
5✔
60
        self.execute_coverage_filter()
5✔
61
        self.execute_transcript_support_level_filter()
5✔
62
        self.execute_top_score_filter()
5✔
63
        self.call_net_chop()
5✔
64
        self.call_netmhc_stab()
5✔
65
        self.calculate_reference_proteome_similarity()
5✔
66
        if not self.el_only:
5✔
67
            shutil.copy(self.reference_similarity_fh.name, self.aggregate_report)
5✔
68
        shutil.copy(self.netmhc_stab_fh.name, self.filtered_report_file)
5✔
69
        self.close_filehandles()
5✔
70
        print("\nDone: Pipeline finished successfully. File {} contains list of filtered putative neoantigens.\n".format(self.filtered_report_file))
5✔
71

72
    def identify_problematic_amino_acids(self):
5✔
73
        if self.problematic_amino_acids:
5✔
74
            print("Identifying peptides with problematic amino acids")
5✔
75
            IdentifyProblematicAminoAcids(self.input_file, self.identify_problematic_amino_acids_fh.name, self.problematic_amino_acids, file_type=self.file_type).execute()
5✔
76
            shutil.copy(self.identify_problematic_amino_acids_fh.name, self.input_file)
5✔
77
            print("Completed")
5✔
78

79
    def aggregate_all_epitopes(self):
5✔
80
        if self.el_only:
5✔
81
            print("WARNING: No binding affinity algorithm(s) specified, skipping aggregated report creation.")
×
82
            return
×
83
        print("Creating aggregated report")
5✔
84
        if self.file_type == 'pVACseq':
5✔
85
            PvacseqAggregateAllEpitopes(
5✔
86
                self.input_file,
87
                self.aggregate_report,
88
                tumor_purity=self.tumor_purity,
89
                binding_threshold=self.binding_threshold,
90
                percentile_threshold=self.percentile_threshold,
91
                percentile_threshold_strategy=self.percentile_threshold_strategy,
92
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
93
                trna_vaf=self.trna_vaf,
94
                trna_cov=self.trna_cov,
95
                expn_val=self.expn_val,
96
                maximum_transcript_support_level=self.maximum_transcript_support_level,
97
                top_score_metric=self.top_score_metric,
98
                allele_specific_anchors=self.allele_specific_anchors,
99
                anchor_contribution_threshold=self.anchor_contribution_threshold,
100
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
101
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
102
            ).execute()
103
        elif self.file_type == 'pVACfuse':
5✔
104
            PvacfuseAggregateAllEpitopes(
5✔
105
                self.input_file,
106
                self.aggregate_report,
107
                binding_threshold=self.binding_threshold,
108
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
109
                percentile_threshold=self.percentile_threshold,
110
                percentile_threshold_strategy=self.percentile_threshold_strategy,
111
                top_score_metric=self.top_score_metric,
112
                read_support=self.read_support,
113
                expn_val=self.expn_val,
114
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
115
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
116
            ).execute()
117
        elif self.file_type == 'pVACbind':
5✔
118
            PvacbindAggregateAllEpitopes(
5✔
119
                self.input_file,
120
                self.aggregate_report,
121
                binding_threshold=self.binding_threshold,
122
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
123
                percentile_threshold=self.percentile_threshold,
124
                percentile_threshold_strategy=self.percentile_threshold_strategy,
125
                top_score_metric=self.top_score_metric,
126
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
127
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
128
            ).execute()
129
        elif self.file_type == 'pVACsplice':
5✔
130
            PvacspliceAggregateAllEpitopes(
5✔
131
                self.input_file,
132
                self.aggregate_report,
133
                tumor_purity=self.tumor_purity,
134
                binding_threshold=self.binding_threshold,
135
                percentile_threshold=self.percentile_threshold,
136
                percentile_threshold_strategy=self.percentile_threshold_strategy,
137
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
138
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
139
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
140
                top_score_metric=self.top_score_metric,
141
                trna_vaf=self.trna_vaf,
142
                trna_cov=self.trna_cov,
143
                expn_val=self.expn_val,
144
                maximum_transcript_support_level=self.maximum_transcript_support_level,
145
            ).execute()
146
        print("Completed")
5✔
147

148
    def calculate_manufacturability(self):
5✔
149
        if self.run_manufacturability_metrics:
5✔
150
            print("Calculating Manufacturability Metrics")
5✔
151
            CalculateManufacturability(self.input_file, self.manufacturability_fh.name, self.file_type).execute()
5✔
152
            shutil.copy(self.manufacturability_fh.name, self.input_file)
5✔
153
            print("Completed")
5✔
154

155
    def execute_binding_filter(self):
5✔
156
        if self.el_only:
5✔
UNCOV
157
            shutil.copy(self.input_file, self.binding_filter_fh.name)
×
UNCOV
158
            return
×
159
        print("Running Binding Filters")
5✔
160
        BindingFilter(
5✔
161
            self.input_file,
162
            self.binding_filter_fh.name,
163
            self.binding_threshold,
164
            self.minimum_fold_change,
165
            self.top_score_metric,
166
            self.exclude_NAs,
167
            self.allele_specific_binding_thresholds,
168
            self.percentile_threshold,
169
            self.percentile_threshold_strategy,
170
            self.file_type,
171
        ).execute()
172
        print("Completed")
5✔
173

174
    def execute_coverage_filter(self):
5✔
175
        if self.run_coverage_filter:
5✔
176
            print("Running Coverage Filters")
5✔
177
            filter_criteria = []
5✔
178
            if self.file_type == 'pVACseq':
5✔
179
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
5✔
180
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
5✔
181
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
5✔
182
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
5✔
183
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
5✔
184
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
5✔
185
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
186
                filter_criteria.append(FilterCriterion("Transcript Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
187
            # excluding transcript expression filter for pvacsplice
188
            elif self.file_type == 'pVACsplice':
5✔
189
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
5✔
190
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
5✔
191
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
5✔
192
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
5✔
193
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
5✔
194
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
5✔
195
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
196
            elif self.file_type == 'pVACfuse':
5✔
197
                filter_criteria.append(FilterCriterion("Read Support", '>=', self.read_support, exclude_nas=self.exclude_NAs))
5✔
198
                filter_criteria.append(FilterCriterion("Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
199
            Filter(self.binding_filter_fh.name, self.coverage_filter_fh.name, filter_criteria).execute()
5✔
200
            print("Completed")
5✔
201
        else:
202
            shutil.copy(self.binding_filter_fh.name, self.coverage_filter_fh.name)
5✔
203

204
    def execute_transcript_support_level_filter(self):
5✔
205
        if self.run_transcript_support_level_filter:
5✔
206
            print("Running Transcript Support Level Filter")
5✔
207
            filter_criteria = [FilterCriterion('Transcript Support Level', '<=', self.maximum_transcript_support_level, exclude_nas=True, skip_value='Not Supported')]
5✔
208
            Filter(
5✔
209
                self.coverage_filter_fh.name,
210
                self.transcript_support_level_filter_fh.name,
211
                filter_criteria,
212
                ['Transcript Support Level'],
213
            ).execute()
214
            print("Complete")
5✔
215
        else:
216
            shutil.copy(self.coverage_filter_fh.name, self.transcript_support_level_filter_fh.name)
5✔
217

218
    def execute_top_score_filter(self):
5✔
219
        if self.el_only:
5✔
UNCOV
220
            shutil.copy(self.transcript_support_level_filter_fh.name, self.top_score_filter_fh.name)
×
UNCOV
221
            return
×
222
        print("Running Top Score Filter")
5✔
223
        if self.file_type == 'pVACseq':
5✔
224
            PvacseqTopScoreFilter(
5✔
225
                self.transcript_support_level_filter_fh.name,
226
                self.top_score_filter_fh.name,
227
                top_score_metric=self.top_score_metric,
228
                binding_threshold=self.binding_threshold,
229
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
230
                maximum_transcript_support_level=self.maximum_transcript_support_level,
231
                allele_specific_anchors=self.allele_specific_anchors,
232
                anchor_contribution_threshold=self.anchor_contribution_threshold,
233
            ).execute()
234
        elif self.file_type == 'pVACfuse':
5✔
235
            PvacfuseTopScoreFilter(
5✔
236
                self.transcript_support_level_filter_fh.name,
237
                self.top_score_filter_fh.name,
238
                top_score_metric = self.top_score_metric,
239
            ).execute()
240
        elif self.file_type == 'pVACbind':
5✔
241
            PvacbindTopScoreFilter(
5✔
242
                self.transcript_support_level_filter_fh.name,
243
                self.top_score_filter_fh.name,
244
                top_score_metric = self.top_score_metric,
245
            ).execute()
246
        elif self.file_type == 'pVACsplice':
5✔
247
            PvacspliceTopScoreFilter(
5✔
248
                self.transcript_support_level_filter_fh.name,
249
                self.top_score_filter_fh.name,
250
                top_score_metric = self.top_score_metric,
251
                maximum_transcript_support_level=self.maximum_transcript_support_level,
252
            ).execute()
253
        print("Completed")
5✔
254

255
    def call_net_chop(self):
5✔
256
        if self.run_net_chop:
5✔
257
            print("Submitting remaining epitopes to NetChop")
5✔
258
            NetChop(self.top_score_filter_fh.name, self.net_chop_fasta, self.net_chop_fh.name, self.net_chop_method, str(self.net_chop_threshold), self.file_type).execute()
5✔
259
            print("Completed")
5✔
260
        else:
261
            shutil.copy(self.top_score_filter_fh.name, self.net_chop_fh.name)
5✔
262

263
    def call_netmhc_stab(self):
5✔
264
        if self.run_netmhc_stab:
5✔
265
            print("Running NetMHCStabPan")
5✔
266
            NetMHCStab(self.net_chop_fh.name, self.netmhc_stab_fh.name, self.file_type, self.top_score_metric).execute()
5✔
267
            print("Completed")
5✔
268
        else:
269
            shutil.copy(self.net_chop_fh.name, self.netmhc_stab_fh.name)
5✔
270

271
    def calculate_reference_proteome_similarity(self):
5✔
272
        if self.el_only:
5✔
UNCOV
273
            return
×
274
        if self.run_reference_proteome_similarity:
5✔
275
            print("Calculating Reference Proteome Similarity")
5✔
276
            if self.file_type == 'pVACseq':
5✔
277
                aggregate_metrics_file = self.aggregate_report.replace('.tsv', '.metrics.json')
5✔
278
                CalculateReferenceProteomeSimilarity(
5✔
279
                    self.aggregate_report,
280
                    self.fasta,
281
                    self.reference_similarity_fh.name,
282
                    species=self.species,
283
                    file_type=self.file_type,
284
                    n_threads=self.n_threads,
285
                    blastp_path=self.blastp_path,
286
                    blastp_db=self.blastp_db,
287
                    peptide_fasta=self.peptide_fasta,
288
                    aggregate_metrics_file=aggregate_metrics_file,
289
                ).execute()
290
                aggregate_metrics_output_file = self.reference_similarity_fh.name.replace('.tsv', '.metrics.json')
5✔
291
                shutil.move(aggregate_metrics_output_file, aggregate_metrics_file)
5✔
292
            else:
293
                CalculateReferenceProteomeSimilarity(
5✔
294
                    self.aggregate_report,
295
                    self.fasta,
296
                    self.reference_similarity_fh.name,
297
                    species=self.species,
298
                    file_type=self.file_type,
299
                    n_threads=self.n_threads,
300
                    blastp_path=self.blastp_path,
301
                    blastp_db=self.blastp_db,
302
                    peptide_fasta=self.peptide_fasta,
303
                ).execute()
304
            shutil.move("{}.reference_matches".format(self.reference_similarity_fh.name), "{}.reference_matches".format(self.aggregate_report))
5✔
305
            print("Completed")
5✔
306
        else:
307
            shutil.copy(self.aggregate_report, self.reference_similarity_fh.name)
5✔
308

309
    def close_filehandles(self):
5✔
310
        self.binding_filter_fh.close()
5✔
311
        self.coverage_filter_fh.close()
5✔
312
        self.transcript_support_level_filter_fh.close()
5✔
313
        self.top_score_filter_fh.close()
5✔
314
        self.net_chop_fh.close()
5✔
315
        self.netmhc_stab_fh.close()
5✔
316
        self.manufacturability_fh.close()
5✔
317
        self.reference_similarity_fh.close()
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc