• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 13416806118

10 Feb 2025 06:35PM UTC coverage: 83.463% (-0.2%) from 83.7%
13416806118

Pull #1200

github

web-flow
Merge 0285cbfb4 into 0eea407be
Pull Request #1200: Fix pVACvector bug that would result in not all junctional epitopes getting tested on clipping

2 of 2 new or added lines in 1 file covered. (100.0%)

203 existing lines in 19 files now uncovered.

7424 of 8895 relevant lines covered (83.46%)

4.16 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.67
/pvactools/lib/post_processor.py
1
import tempfile
5✔
2
import shutil
5✔
3

4
from pvactools.lib.identify_problematic_amino_acids import IdentifyProblematicAminoAcids
5✔
5
from pvactools.lib.aggregate_all_epitopes import PvacseqAggregateAllEpitopes, PvacfuseAggregateAllEpitopes, PvacbindAggregateAllEpitopes, PvacspliceAggregateAllEpitopes
5✔
6
from pvactools.lib.binding_filter import BindingFilter
5✔
7
from pvactools.lib.filter import Filter, FilterCriterion
5✔
8
from pvactools.lib.top_score_filter import PvacseqTopScoreFilter, PvacfuseTopScoreFilter, PvacbindTopScoreFilter, PvacspliceTopScoreFilter
5✔
9
from pvactools.lib.calculate_manufacturability import CalculateManufacturability
5✔
10
from pvactools.lib.calculate_reference_proteome_similarity import CalculateReferenceProteomeSimilarity
5✔
11
from pvactools.lib.net_chop import NetChop
5✔
12
from pvactools.lib.netmhc_stab import NetMHCStab
5✔
13

14
class PostProcessor:
5✔
15
    def __init__(self, **kwargs):
5✔
16
        for (k,v) in kwargs.items():
5✔
17
            setattr(self, k, v)
5✔
18
        self.aggregate_report = self.input_file.replace('.tsv', '.aggregated.tsv')
5✔
19
        self.identify_problematic_amino_acids_fh = tempfile.NamedTemporaryFile()
5✔
20
        self.binding_filter_fh = tempfile.NamedTemporaryFile()
5✔
21
        self.coverage_filter_fh = tempfile.NamedTemporaryFile()
5✔
22
        self.transcript_support_level_filter_fh = tempfile.NamedTemporaryFile()
5✔
23
        self.top_score_filter_fh = tempfile.NamedTemporaryFile()
5✔
24
        self.net_chop_fh = tempfile.NamedTemporaryFile()
5✔
25
        self.netmhc_stab_fh = tempfile.NamedTemporaryFile()
5✔
26
        self.manufacturability_fh = tempfile.NamedTemporaryFile()
5✔
27
        self.reference_similarity_fh = tempfile.NamedTemporaryFile(suffix='.tsv')
5✔
28
        self.file_type = kwargs.pop('file_type', None)
5✔
29
        self.fasta = kwargs.pop('fasta', None)
5✔
30
        self.net_chop_fasta = kwargs.pop('net_chop_fasta', None)
5✔
31
        if not hasattr(self, 'flurry_state'):
5✔
32
            self.flurry_state = self.get_flurry_state()
5✔
33
        self.el_only = all([self.is_el(a) for a in self.prediction_algorithms])
5✔
34

35
    def get_flurry_state(self):
5✔
36
        if 'MHCflurry' in self.prediction_algorithms and 'MHCflurryEL' in self.prediction_algorithms:
5✔
37
            self.prediction_algorithms.remove('MHCflurryEL')
×
38
            return 'both'
×
39
        elif 'MHCflurry' in self.prediction_algorithms:
5✔
40
            return 'BA_only'
×
41
        elif 'MHCflurryEL' in self.prediction_algorithms:
5✔
42
            pred_idx = self.prediction_algorithms.index('MHCflurryEL')
×
43
            self.prediction_algorithms[pred_idx] = 'MHCflurry'
×
44
            return 'EL_only'
×
45
        else:
46
            return None
5✔
47

48
    def is_el(self, algorithm):
5✔
49
        if algorithm == 'MHCflurry' and self.flurry_state == 'EL_only':
5✔
50
            return True
×
51
        if algorithm in ['NetMHCIIpanEL', 'NetMHCpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno']:
5✔
52
            return True
×
53
        return False
5✔
54

55
    def execute(self):
5✔
56
        self.identify_problematic_amino_acids()
5✔
57
        self.aggregate_all_epitopes()
5✔
58
        self.calculate_manufacturability()
5✔
59
        self.execute_binding_filter()
5✔
60
        self.execute_coverage_filter()
5✔
61
        self.execute_transcript_support_level_filter()
5✔
62
        self.execute_top_score_filter()
5✔
63
        self.call_net_chop()
5✔
64
        self.call_netmhc_stab()
5✔
65
        self.calculate_reference_proteome_similarity()
5✔
66
        if not self.el_only:
5✔
67
            shutil.copy(self.reference_similarity_fh.name, self.aggregate_report)
5✔
68
        shutil.copy(self.netmhc_stab_fh.name, self.filtered_report_file)
5✔
69
        self.close_filehandles()
5✔
70
        print("\nDone: Pipeline finished successfully. File {} contains list of filtered putative neoantigens.\n".format(self.filtered_report_file))
5✔
71

72
    def identify_problematic_amino_acids(self):
5✔
73
        if self.problematic_amino_acids:
5✔
74
            print("Identifying peptides with problematic amino acids")
5✔
75
            IdentifyProblematicAminoAcids(self.input_file, self.identify_problematic_amino_acids_fh.name, self.problematic_amino_acids, file_type=self.file_type).execute()
5✔
76
            shutil.copy(self.identify_problematic_amino_acids_fh.name, self.input_file)
5✔
77
            print("Completed")
5✔
78

79
    def aggregate_all_epitopes(self):
5✔
80
        if self.el_only:
5✔
81
            print("WARNING: No binding affinity algorithm(s) specified, skipping aggregated report creation.")
×
82
            return
×
83
        print("Creating aggregated report")
5✔
84
        if self.file_type == 'pVACseq':
5✔
85
            PvacseqAggregateAllEpitopes(
5✔
86
                self.input_file,
87
                self.aggregate_report,
88
                tumor_purity=self.tumor_purity,
89
                binding_threshold=self.binding_threshold,
90
                percentile_threshold=self.percentile_threshold,
91
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
92
                trna_vaf=self.trna_vaf,
93
                trna_cov=self.trna_cov,
94
                expn_val=self.expn_val,
95
                maximum_transcript_support_level=self.maximum_transcript_support_level,
96
                top_score_metric=self.top_score_metric,
97
                allele_specific_anchors=self.allele_specific_anchors,
98
                anchor_contribution_threshold=self.anchor_contribution_threshold,
99
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
100
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
101
            ).execute()
102
        elif self.file_type == 'pVACfuse':
5✔
103
            PvacfuseAggregateAllEpitopes(
5✔
104
                self.input_file,
105
                self.aggregate_report,
106
                binding_threshold=self.binding_threshold,
107
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
108
                percentile_threshold=self.percentile_threshold,
109
                top_score_metric=self.top_score_metric,
110
                read_support=self.read_support,
111
                expn_val=self.expn_val,
112
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
113
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
114
            ).execute()
115
        elif self.file_type == 'pVACbind':
5✔
116
            PvacbindAggregateAllEpitopes(
5✔
117
                self.input_file,
118
                self.aggregate_report,
119
                binding_threshold=self.binding_threshold,
120
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
121
                percentile_threshold=self.percentile_threshold,
122
                top_score_metric=self.top_score_metric,
123
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
124
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
125
            ).execute()
126
        elif self.file_type == 'pVACsplice':
5✔
127
            PvacspliceAggregateAllEpitopes(
5✔
128
                self.input_file,
129
                self.aggregate_report,
130
                tumor_purity=self.tumor_purity,
131
                binding_threshold=self.binding_threshold,
132
                percentile_threshold=self.percentile_threshold,
133
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
134
                aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold,
135
                aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit,
136
                top_score_metric=self.top_score_metric,
137
                trna_vaf=self.trna_vaf,
138
                trna_cov=self.trna_cov,
139
                expn_val=self.expn_val,
140
                maximum_transcript_support_level=self.maximum_transcript_support_level,
141
            ).execute()
142
        print("Completed")
5✔
143

144
    def calculate_manufacturability(self):
5✔
145
        if self.run_manufacturability_metrics:
5✔
146
            print("Calculating Manufacturability Metrics")
5✔
147
            CalculateManufacturability(self.input_file, self.manufacturability_fh.name, self.file_type).execute()
5✔
148
            shutil.copy(self.manufacturability_fh.name, self.input_file)
5✔
149
            print("Completed")
5✔
150

151
    def execute_binding_filter(self):
5✔
152
        if self.el_only:
5✔
UNCOV
153
            shutil.copy(self.input_file, self.binding_filter_fh.name)
×
UNCOV
154
            return
×
155
        print("Running Binding Filters")
5✔
156
        BindingFilter(
5✔
157
            self.input_file,
158
            self.binding_filter_fh.name,
159
            self.binding_threshold,
160
            self.minimum_fold_change,
161
            self.top_score_metric,
162
            self.exclude_NAs,
163
            self.allele_specific_binding_thresholds,
164
            self.percentile_threshold,
165
            self.file_type,
166
        ).execute()
167
        print("Completed")
5✔
168

169
    def execute_coverage_filter(self):
5✔
170
        if self.run_coverage_filter:
5✔
171
            print("Running Coverage Filters")
5✔
172
            filter_criteria = []
5✔
173
            if self.file_type == 'pVACseq':
5✔
174
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
5✔
175
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
5✔
176
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
5✔
177
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
5✔
178
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
5✔
179
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
5✔
180
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
181
                filter_criteria.append(FilterCriterion("Transcript Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
182
            # excluding transcript expression filter for pvacsplice
183
            elif self.file_type == 'pVACsplice':
5✔
184
                filter_criteria.append(FilterCriterion("Normal Depth", '>=', self.normal_cov, exclude_nas=self.exclude_NAs))
5✔
185
                filter_criteria.append(FilterCriterion("Normal VAF", '<=', self.normal_vaf, exclude_nas=self.exclude_NAs))
5✔
186
                filter_criteria.append(FilterCriterion("Tumor DNA Depth", '>=', self.tdna_cov, exclude_nas=self.exclude_NAs))
5✔
187
                filter_criteria.append(FilterCriterion("Tumor DNA VAF", '>=', self.tdna_vaf, exclude_nas=self.exclude_NAs))
5✔
188
                filter_criteria.append(FilterCriterion("Tumor RNA Depth", '>=', self.trna_cov, exclude_nas=self.exclude_NAs))
5✔
189
                filter_criteria.append(FilterCriterion("Tumor RNA VAF", '>=', self.trna_vaf, exclude_nas=self.exclude_NAs))
5✔
190
                filter_criteria.append(FilterCriterion("Gene Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
191
            elif self.file_type == 'pVACfuse':
5✔
192
                filter_criteria.append(FilterCriterion("Read Support", '>=', self.read_support, exclude_nas=self.exclude_NAs))
5✔
193
                filter_criteria.append(FilterCriterion("Expression", '>=', self.expn_val, exclude_nas=self.exclude_NAs))
5✔
194
            Filter(self.binding_filter_fh.name, self.coverage_filter_fh.name, filter_criteria).execute()
5✔
195
            print("Completed")
5✔
196
        else:
197
            shutil.copy(self.binding_filter_fh.name, self.coverage_filter_fh.name)
5✔
198

199
    def execute_transcript_support_level_filter(self):
5✔
200
        if self.run_transcript_support_level_filter:
5✔
201
            print("Running Transcript Support Level Filter")
5✔
202
            filter_criteria = [FilterCriterion('Transcript Support Level', '<=', self.maximum_transcript_support_level, exclude_nas=True, skip_value='Not Supported')]
5✔
203
            Filter(
5✔
204
                self.coverage_filter_fh.name,
205
                self.transcript_support_level_filter_fh.name,
206
                filter_criteria,
207
                ['Transcript Support Level'],
208
            ).execute()
209
            print("Complete")
5✔
210
        else:
211
            shutil.copy(self.coverage_filter_fh.name, self.transcript_support_level_filter_fh.name)
5✔
212

213
    def execute_top_score_filter(self):
5✔
214
        if self.el_only:
5✔
UNCOV
215
            shutil.copy(self.transcript_support_level_filter_fh.name, self.top_score_filter_fh.name)
×
UNCOV
216
            return
×
217
        print("Running Top Score Filter")
5✔
218
        if self.file_type == 'pVACseq':
5✔
219
            PvacseqTopScoreFilter(
5✔
220
                self.transcript_support_level_filter_fh.name,
221
                self.top_score_filter_fh.name,
222
                top_score_metric=self.top_score_metric,
223
                binding_threshold=self.binding_threshold,
224
                allele_specific_binding_thresholds=self.allele_specific_binding_thresholds,
225
                maximum_transcript_support_level=self.maximum_transcript_support_level,
226
                allele_specific_anchors=self.allele_specific_anchors,
227
                anchor_contribution_threshold=self.anchor_contribution_threshold,
228
            ).execute()
229
        elif self.file_type == 'pVACfuse':
5✔
230
            PvacfuseTopScoreFilter(
5✔
231
                self.transcript_support_level_filter_fh.name,
232
                self.top_score_filter_fh.name,
233
                top_score_metric = self.top_score_metric,
234
            ).execute()
235
        elif self.file_type == 'pVACbind':
5✔
236
            PvacbindTopScoreFilter(
5✔
237
                self.transcript_support_level_filter_fh.name,
238
                self.top_score_filter_fh.name,
239
                top_score_metric = self.top_score_metric,
240
            ).execute()
241
        elif self.file_type == 'pVACsplice':
5✔
242
            PvacspliceTopScoreFilter(
5✔
243
                self.transcript_support_level_filter_fh.name,
244
                self.top_score_filter_fh.name,
245
                top_score_metric = self.top_score_metric,
246
                maximum_transcript_support_level=self.maximum_transcript_support_level,
247
            ).execute()
248
        print("Completed")
5✔
249

250
    def call_net_chop(self):
5✔
251
        if self.run_net_chop:
5✔
252
            print("Submitting remaining epitopes to NetChop")
5✔
253
            NetChop(self.top_score_filter_fh.name, self.net_chop_fasta, self.net_chop_fh.name, self.net_chop_method, str(self.net_chop_threshold), self.file_type).execute()
5✔
254
            print("Completed")
5✔
255
        else:
256
            shutil.copy(self.top_score_filter_fh.name, self.net_chop_fh.name)
5✔
257

258
    def call_netmhc_stab(self):
5✔
259
        if self.run_netmhc_stab:
5✔
260
            print("Running NetMHCStabPan")
5✔
261
            NetMHCStab(self.net_chop_fh.name, self.netmhc_stab_fh.name, self.file_type, self.top_score_metric).execute()
5✔
262
            print("Completed")
5✔
263
        else:
264
            shutil.copy(self.net_chop_fh.name, self.netmhc_stab_fh.name)
5✔
265

266
    def calculate_reference_proteome_similarity(self):
5✔
267
        if self.el_only:
5✔
UNCOV
268
            return
×
269
        if self.run_reference_proteome_similarity:
5✔
270
            print("Calculating Reference Proteome Similarity")
5✔
271
            if self.file_type == 'pVACseq':
5✔
272
                aggregate_metrics_file = self.aggregate_report.replace('.tsv', '.metrics.json')
5✔
273
                CalculateReferenceProteomeSimilarity(
5✔
274
                    self.aggregate_report,
275
                    self.fasta,
276
                    self.reference_similarity_fh.name,
277
                    species=self.species,
278
                    file_type=self.file_type,
279
                    n_threads=self.n_threads,
280
                    blastp_path=self.blastp_path,
281
                    blastp_db=self.blastp_db,
282
                    peptide_fasta=self.peptide_fasta,
283
                    aggregate_metrics_file=aggregate_metrics_file,
284
                ).execute()
285
                aggregate_metrics_output_file = self.reference_similarity_fh.name.replace('.tsv', '.metrics.json')
5✔
286
                shutil.move(aggregate_metrics_output_file, aggregate_metrics_file)
5✔
287
            else:
288
                CalculateReferenceProteomeSimilarity(
5✔
289
                    self.aggregate_report,
290
                    self.fasta,
291
                    self.reference_similarity_fh.name,
292
                    species=self.species,
293
                    file_type=self.file_type,
294
                    n_threads=self.n_threads,
295
                    blastp_path=self.blastp_path,
296
                    blastp_db=self.blastp_db,
297
                    peptide_fasta=self.peptide_fasta,
298
                ).execute()
299
            shutil.move("{}.reference_matches".format(self.reference_similarity_fh.name), "{}.reference_matches".format(self.aggregate_report))
5✔
300
            print("Completed")
5✔
301
        else:
302
            shutil.copy(self.aggregate_report, self.reference_similarity_fh.name)
5✔
303

304
    def close_filehandles(self):
5✔
305
        self.binding_filter_fh.close()
5✔
306
        self.coverage_filter_fh.close()
5✔
307
        self.transcript_support_level_filter_fh.close()
5✔
308
        self.top_score_filter_fh.close()
5✔
309
        self.net_chop_fh.close()
5✔
310
        self.netmhc_stab_fh.close()
5✔
311
        self.manufacturability_fh.close()
5✔
312
        self.reference_similarity_fh.close()
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc