• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 18038500289

26 Sep 2025 01:01PM UTC coverage: 82.716% (-0.8%) from 83.548%
18038500289

Pull #1307

github

web-flow
Merge bab58f8f7 into 9aa720e9d
Pull Request #1307: Add support for MixMHCpred and PRIME prediction algorithms

199 of 307 new or added lines in 2 files covered. (64.82%)

39 existing lines in 2 files now uncovered.

8997 of 10877 relevant lines covered (82.72%)

2.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.08
/pvactools/lib/output_parser.py
1
from abc import ABCMeta, abstractmethod
3✔
2
import sys
3✔
3
import csv
3✔
4
import re
3✔
5
import operator
3✔
6
import os
3✔
7
import pandas as pd
3✔
8
from math import ceil, inf
3✔
9
from statistics import median
3✔
10
import yaml
3✔
11

12
from pvactools.lib.prediction_class import PredictionClass
3✔
13

14
csv.field_size_limit(sys.maxsize)
3✔
15

16
class OutputParser(metaclass=ABCMeta):
3✔
17
    def __init__(self, **kwargs):
3✔
18
        self.input_iedb_files        = kwargs['input_iedb_files']
3✔
19
        self.input_tsv_file          = kwargs['input_tsv_file']
3✔
20
        self.key_file                = kwargs['key_file']
3✔
21
        self.output_file             = kwargs['output_file']
3✔
22
        self.sample_name             = kwargs['sample_name']
3✔
23
        self.add_sample_name         = kwargs.get('add_sample_name_column')
3✔
24
        self.flurry_state            = kwargs.get('flurry_state')
3✔
25

26
    def parse_input_tsv_file(self):
3✔
27
        with open(self.input_tsv_file, 'r') as reader:
3✔
28
            tsv_reader = csv.DictReader(reader, delimiter='\t')
3✔
29
            tsv_entries = {}
3✔
30
            for line in tsv_reader:
3✔
31
                if line['index'] in tsv_entries:
3✔
32
                    sys.exit('Duplicate TSV indexes')
×
33
                tsv_entries[line['index']] = line
3✔
34
            return tsv_entries
3✔
35

36
    def min_match_count(self, peptide_length):
3✔
37
        return ceil(peptide_length / 2)
3✔
38

39
    def determine_consecutive_matches_from_left(self, mt_epitope_seq, wt_epitope_seq):
3✔
40
        consecutive_matches = 0
3✔
41
        for a, b in zip(mt_epitope_seq, wt_epitope_seq):
3✔
42
            if a == b:
3✔
43
                consecutive_matches += 1
3✔
44
            else:
45
                break
3✔
46
        return consecutive_matches
3✔
47

48
    def determine_consecutive_matches_from_right(self, mt_epitope_seq, wt_epitope_seq):
3✔
49
        consecutive_matches = 0
3✔
50
        for a, b in zip(reversed(mt_epitope_seq), reversed(wt_epitope_seq)):
3✔
51
            if a == b:
3✔
52
                consecutive_matches += 1
3✔
53
            else:
54
                break
3✔
55
        return consecutive_matches
3✔
56

57
    def determine_total_matches(self, mt_epitope_seq, wt_epitope_seq):
3✔
58
        matches = 0
3✔
59
        for a, b in zip(mt_epitope_seq, wt_epitope_seq):
3✔
60
            if a == b:
3✔
61
                matches += 1
3✔
62
        return matches
3✔
63

64
    def find_first_mutation_position(self, wt_epitope_seq, mt_epitope_seq):
3✔
65
        for i,(wt_aa,mt_aa) in enumerate(zip(wt_epitope_seq,mt_epitope_seq)):
×
66
            if wt_aa != mt_aa:
×
67
                return i+1
×
68

69
    def find_mutation_positions(self, wt_epitope_seq, mt_epitope_seq):
3✔
70
        mutated_positions = []
3✔
71
        for i,(wt_aa,mt_aa) in enumerate(zip(wt_epitope_seq,mt_epitope_seq)):
3✔
72
            if wt_aa != mt_aa:
3✔
73
                mutated_positions.append(i+1)
3✔
74
        if len(mutated_positions) == 0:
3✔
75
            return "NA"
3✔
76
        else:
77
            return ", ".join([str(x) for x in mutated_positions])
3✔
78

79
    def aa_ins_change_len(self, aa_change):
3✔
80
        aac = aa_change.split('/')
×
81
        if aac[0][0] == aac[1][0]:
×
82
            return len(aac[1]) - len(aac[0])
×
83
        else:
84
            return len(aac[1]) - len(aac[0]) + 1
×
85

86
    def determine_ins_mut_position_from_previous_result(self, previous_result, mt_epitope_seq, result):
3✔
87
        previous_mutation_position = self.position_to_tuple(previous_result['mutation_position'])
×
88
        aa_ins_change_len = self.aa_ins_change_len(result['amino_acid_change'])
×
89
        if len(previous_mutation_position) == 2:
×
90
            if previous_mutation_position[1] == len(mt_epitope_seq)+1 and aa_ins_change_len > previous_mutation_position[1]-previous_mutation_position[0]+1:
×
91
                return '{}-{}'.format(previous_mutation_position[0]-1, previous_mutation_position[1])
×
92
            elif previous_mutation_position[0] > 1:
×
93
                return '{}-{}'.format(previous_mutation_position[0]-1, previous_mutation_position[1]-1)
×
94
            else:
95
                if previous_mutation_position[1] > 2:
×
96
                    return '1-{}'.format(previous_mutation_position[1]-1)
×
97
                else:
98
                    return '1' # choose '1' over '1-1' format
×
99
        else:
100
            if previous_mutation_position[0] > 1:
×
101
                if aa_ins_change_len > 1:
×
102
                    end = (previous_mutation_position[0] - 1) + (aa_ins_change_len - 1)
×
103
                    if end > len(mt_epitope_seq):
×
104
                        end = len(mt_epitope_seq)
×
105
                    return '{}-{}'.format(previous_mutation_position[0]-1, end)
×
106
                else:
107
                    return '{}'.format(previous_mutation_position[0]-1)
×
108
            else:
109
                return '1'
×
110

111
    def find_ins_mut_position(self, wt_epitope_seq, mt_epitope_seq, aa_change, match_direction):
3✔
112
        if mt_epitope_seq == wt_epitope_seq:
×
113
            return None
×
114
        aal = self.aa_ins_change_len(aa_change)
×
115
        mt_start_pos = None
×
116
        mt_end_pos = None
×
117
        if match_direction == 'left':
×
118
            for i,(wt_aa,mt_aa) in enumerate(zip(wt_epitope_seq,mt_epitope_seq)):
×
119
                if wt_aa != mt_aa:
×
120
                    mt_start_pos = i+1
×
121
                    break
×
122
            mt_end_pos = mt_start_pos + aal - 1
×
123
        elif match_direction == 'right':
×
124
            for i,(wt_aa,mt_aa) in enumerate(zip(wt_epitope_seq[::-1],mt_epitope_seq[::-1])):
×
125
                if wt_aa != mt_aa:
×
126
                    mt_end_pos = i+1
×
127
                    break
×
128
            mt_start_pos = mt_end_pos - aal + 1
×
129

130
        if mt_end_pos > len(mt_epitope_seq):
×
131
            mt_end_pos = len(mt_epitope_seq)
×
132
        if mt_start_pos < 1:
×
133
            mt_start_pos = 1
×
134
        if mt_start_pos == mt_end_pos:
×
135
            return (mt_start_pos,)
×
136
        else:
137
            return (mt_start_pos, mt_end_pos)
×
138

139
    def get_percentiles(self, line, method):
3✔
UNCOV
140
        if method.lower() == 'mhcflurry':
×
UNCOV
141
            if self.flurry_state == 'both':
×
UNCOV
142
                percentiles = {
×
143
                    'percentile': line['percentile'],
144
                    'mhcflurry_presentation_percentile': line['mhcflurry_presentation_percentile'],
145
                }
UNCOV
146
            elif self.flurry_state == 'EL_only':
×
147
                percentiles = {'mhcflurry_presentation_percentile': line['mhcflurry_presentation_percentile']}
×
148
            else:
UNCOV
149
                percentiles = {'percentile': line['percentile']}
×
UNCOV
150
        elif 'percentile' in line:
×
UNCOV
151
            percentiles = {'percentile': line['percentile']}
×
UNCOV
152
        elif 'percentile_rank' in line:
×
UNCOV
153
            percentiles = {'percentile': line['percentile_rank']}
×
UNCOV
154
        elif 'rank' in line:
×
UNCOV
155
            percentiles = {'percentile': line['rank']}
×
156
        else:
UNCOV
157
            return {'percentile': 'NA'}
×
158

UNCOV
159
        return dict((k, float(v)) if v != 'None' and v is not None and v != "" else (k, 'NA') for k, v in percentiles.items())
×
160

161
    def transform_empty_percentiles(self,p):
3✔
162
        return float(p) if p != 'None' and p is not None and p != "" else 'NA'
3✔
163

164
    def get_scores(self, line, method):
3✔
165
        if method.lower() == 'mhcflurry':
3✔
166
            if self.flurry_state == 'both':
3✔
167
                return {
3✔
168
                   'MHCflurry': {
169
                       'ic50': float(line['ic50']),
170
                       'percentile': self.transform_empty_percentiles(line['percentile']),
171
                   },
172
                   'MHCflurryEL Processing': {
173
                       'presentation': float(line['mhcflurry_processing_score']),
174
                   },
175
                   'MHCflurryEL Presentation': {
176
                       'presentation': float(line['mhcflurry_presentation_score']),
177
                       'percentile': self.transform_empty_percentiles(line['mhcflurry_presentation_percentile']),
178
                   }
179
                }
180
            elif self.flurry_state == 'EL_only':
3✔
181
                return {
×
182
                   'MHCflurryEL Processing': {
183
                       'presentation': float(line['mhcflurry_processing_score']),
184
                   },
185
                   'MHCflurryEL Presentation': {
186
                       'presentation': float(line['mhcflurry_presentation_score']),
187
                       'percentile': self.transform_empty_percentiles(line['mhcflurry_presentation_percentile']),
188
                   }
189
               }
190
            else:
191
                return {
3✔
192
                   'MHCflurry': {
193
                       'ic50': float(line['ic50']),
194
                       'percentile': self.transform_empty_percentiles(line['percentile']),
195
                   }
196
               }
197
        elif method.lower() == 'deepimmuno':
3✔
198
            return {
3✔
199
                'DeepImmuno': {
200
                    'immunogenicity': float(line['immunogenicity']),
201
                }
202
            }
203
        elif method.lower() == 'bigmhc_el':
3✔
204
            return {
3✔
205
                'BigMHC_EL': {
206
                    'presentation': float(line['BigMHC_EL']),
207
                }
208
            }
209
        elif method.lower() == 'bigmhc_im':
3✔
210
            return {
3✔
211
                'BigMHC_IM': {
212
                    'immunogenicity': float(line['BigMHC_IM']),
213
                }
214
            }
215
        elif method.lower() == 'netmhcpan_el':
3✔
NEW
216
            return {
×
217
                'NetMHCpanEL': {
218
                    'presentation': float(line['score']),
219
                    'percentile': self.transform_empty_percentiles(line['rank'])
220
                }
221
            }
222
        elif 'netmhciipan_el' in method.lower():
3✔
223
            if 'score' in line:
3✔
224
                presentation = float(line['score'])
3✔
225
            elif 'ic50' in line:
×
NEW
226
                presentation = float(line['ic50'])
×
227
            else:
NEW
228
                 raise Exception("Missing expected columns: 'score' or 'ic50' in NetMHCIIpanEL output")
×
229
            if 'percentile_rank' in line:
3✔
230
                percentile = self.transform_empty_percentiles(line['percentile_rank'])
3✔
NEW
231
            elif 'rank' in line:
×
NEW
232
                percentile = self.transform_empty_percentiles(line['rank'])
×
233
            else:
NEW
234
                 raise Exception("Missing expected columns: 'rank' or 'percentile_rank' in NetMHCIIpanEL output")
×
235
            return {
3✔
236
                'NetMHCIIpanEL': {
237
                    'presentation': presentation,
238
                    'percentile': percentile,
239
                }
240
            }
241
        elif method == 'MixMHCpred':
3✔
242
            return {
3✔
243
                method: {
244
                    'presentation': float(line['score']),
245
                    'percentile': self.transform_empty_percentiles(line['percentile'])
246
                }
247
            }
248
        elif method == 'PRIME':
3✔
249
            return {
3✔
250
                method: {
251
                    'immunogenicity': float(line['score']),
252
                    'percentile': self.transform_empty_percentiles(line['percentile'])
253
                }
254
            }
255
        else:
256
            pretty_method = PredictionClass.prediction_class_name_for_iedb_prediction_method(method)
3✔
257
            if 'percentile' in line:
3✔
258
                percentile = line['percentile']
3✔
259
            elif 'percentile_rank' in line:
3✔
260
                percentile = line['percentile_rank']
3✔
261
            elif 'rank' in line:
3✔
262
                percentile = line['rank']
3✔
263
            else:
264
                percentile = ''
3✔
265
            return {
3✔
266
                pretty_method: {
267
                    'ic50': float(line['ic50']),
268
                    'percentile': self.transform_empty_percentiles(percentile)
269
                }
270
            }
271

272
    def format_match_na(self, result, metric):
3✔
273
        return {method: {field: 'NA' for field in fields.keys()} for method, fields in result[f'mt_{metric}s'].items()}
3✔
274

275
    def position_to_tuple(self, mut_pos):
3✔
276
        # '#-#' -> (#, #); '#' -> (#); or 'NA' -> 'NA'
277
        if mut_pos == 'NA':
×
278
            return mut_pos
×
279
        elif '-' in mut_pos:
×
280
            d_ind = mut_pos.index('-')
×
281
            return (int(mut_pos[0:d_ind]), int(mut_pos[d_ind+1:]))
×
282
        else:
283
            return (int(mut_pos),)
×
284

285
    def match_wildtype_and_mutant_entry_for_missense(self, result, mt_position, wt_results, previous_result):
3✔
286
        #The WT epitope at the same position is the match
287
        match_position = mt_position
3✔
288
        mt_epitope_seq = result['mt_epitope_seq']
3✔
289
        try:
3✔
290
            wt_result      = wt_results[match_position]
3✔
291
        except:
×
292
            import pdb
×
293
            pdb.set_trace()
×
294
        wt_epitope_seq = wt_result['wt_epitope_seq']
3✔
295
        result['wt_epitope_position'] = match_position
3✔
296
        total_matches  = self.determine_total_matches(mt_epitope_seq, wt_epitope_seq)
3✔
297
        if total_matches >= self.min_match_count(int(result['peptide_length'])):
3✔
298
            result['wt_epitope_seq'] = wt_epitope_seq
3✔
299
            result['wt_scores']      = wt_result['wt_scores']
3✔
300
            result['mutation_position'] = self.find_mutation_positions(wt_epitope_seq, mt_epitope_seq)
3✔
301
        else:
302
            result['wt_epitope_seq'] = 'NA'
3✔
303
            result['wt_scores']      = self.format_match_na(result, 'score')
3✔
304
            result['mutation_position'] = 'NA'
3✔
305

306
    def match_wildtype_and_mutant_entry_for_frameshift(self, result, mt_position, wt_results, previous_result):
3✔
307
        #vars for later use
308
        peptide_length = int(result['peptide_length'])
3✔
309
        #The WT epitope at the same position is the match
310
        match_position = mt_position
3✔
311
        #Since the MT sequence is longer than the WT sequence, not all MT epitopes have a match
312
        if match_position not in wt_results:
3✔
313
            result['wt_epitope_seq'] = 'NA'
3✔
314
            result['wt_scores']      = self.format_match_na(result, 'score')
3✔
315
            result['wt_epitope_position'] = 'NA'
3✔
316
            result['mutation_position'] = 'NA'
3✔
317
            return
3✔
318

319
        mt_epitope_seq = result['mt_epitope_seq']
3✔
320
        wt_result      = wt_results[match_position]
3✔
321
        wt_epitope_seq = wt_result['wt_epitope_seq']
3✔
322
        if mt_epitope_seq == wt_epitope_seq:
3✔
323
            #The MT epitope does not overlap the frameshift mutation
324
            result['wt_epitope_seq']    = wt_epitope_seq
3✔
325
            result['wt_scores']         = wt_result['wt_scores']
3✔
326
            result['mutation_position'] = 'NA'
3✔
327
            result['wt_epitope_position'] = 'NA'
3✔
328
        else:
329
            #Determine how many amino acids are the same between the MT epitope and its matching WT epitope
330
            total_matches = self.determine_total_matches(mt_epitope_seq, wt_epitope_seq)
3✔
331
            if total_matches >= self.min_match_count(peptide_length):
3✔
332
                #The minimum amino acid match count is met
333
                result['wt_epitope_seq'] = wt_result['wt_epitope_seq']
3✔
334
                result['wt_scores']      = wt_result['wt_scores']
3✔
335
                result['mutation_position'] = self.find_mutation_positions(wt_epitope_seq, mt_epitope_seq)
3✔
336
            else:
337
                #The minimum amino acid match count is not met
338
                #Even though there is a matching WT epitope there are not enough overlapping amino acids
339
                #We don't include the matching WT epitope in the output
340
                result['wt_epitope_seq'] = 'NA'
3✔
341
                result['wt_scores']      = self.format_match_na(result, 'score')
3✔
342
                result['wt_epitope_seq'] = 'NA'
3✔
343
            result['wt_epitope_position'] = match_position
3✔
344

345
    def match_wildtype_and_mutant_entry_for_inframe_indel(self, result, mt_position, wt_results, previous_result, iedb_results_for_wt_iedb_result_key):
3✔
346
        mt_epitope_seq = result['mt_epitope_seq']
3✔
347
        #If the previous WT epitope was matched "from the right" we can just use that position to infer the mutation position and match direction
348
        if previous_result is not None and previous_result['match_direction'] == 'right':
3✔
349
            best_match_position           = previous_result['wt_epitope_position'] + 1
3✔
350
            result['wt_epitope_position'] = best_match_position
3✔
351
            result['match_direction']     = 'right'
3✔
352

353
            #We need to ensure that the matched WT eptiope has enough overlapping amino acids with the MT epitope
354
            best_match_wt_result = wt_results[str(best_match_position)]
3✔
355
            total_matches = self.determine_total_matches(result['mt_epitope_seq'], best_match_wt_result['wt_epitope_seq'])
3✔
356
            if total_matches and total_matches >= self.min_match_count(int(result['peptide_length'])):
3✔
357
                #The minimum amino acid match count is met
358
                result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq']
3✔
359
                result['wt_scores']      = best_match_wt_result['wt_scores']
3✔
360
                result['mutation_position'] = self.find_mutation_positions(mt_epitope_seq, best_match_wt_result['wt_epitope_seq'])
3✔
361
            else:
362
                #The minimum amino acid match count is not met
363
                #Even though there is a matching WT epitope there are not enough overlapping amino acids
364
                #We don't include the matching WT epitope in the output
365
                result['wt_epitope_seq'] = 'NA'
3✔
366
                result['wt_scores']      = self.format_match_na(result, 'score')
3✔
367
                result['mutation_position'] = 'NA'
3✔
368
            return
3✔
369

370
        #In all other cases the WT epitope at the same position is used as the baseline match
371
        baseline_best_match_position = mt_position
3✔
372

373
        #For an inframe insertion the MT sequence is longer than the WT sequence
374
        #In this case not all MT epitopes might have a baseline match
375
        if baseline_best_match_position not in wt_results:
3✔
376
            insertion_length = len(iedb_results_for_wt_iedb_result_key.keys()) - len(wt_results.keys())
3✔
377
            best_match_position = int(baseline_best_match_position) - insertion_length
3✔
378
            best_match_wt_result = wt_results[str(best_match_position)]
3✔
379
            result['match_direction'] = 'right'
3✔
380
            result['wt_epitope_position'] = best_match_position
3✔
381
            total_matches = self.determine_total_matches(mt_epitope_seq, best_match_wt_result['wt_epitope_seq'])
3✔
382
            if total_matches and total_matches >= self.min_match_count(int(result['peptide_length'])):
3✔
383
                #The minimum amino acid match count is met
384
                result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq']
×
385
                result['wt_scores']      = best_match_wt_result['wt_scores']
×
386
                result['mutation_position'] = self.find_mutation_positions(best_match_wt_result['wt_epitope_seq'], mt_epitope_seq)
×
387
            else:
388
                #The minimum amino acid match count is not met
389
                #Even though there is a matching WT epitope there are not enough overlapping amino acids
390
                #We don't include the matching WT epitope in the output
391
                result['wt_epitope_seq'] = 'NA'
3✔
392
                result['wt_scores']      = self.format_match_na(result, 'score')
3✔
393
                result['mutation_position'] = 'NA'
3✔
394
            return
3✔
395

396
        baseline_best_match_wt_result      = wt_results[baseline_best_match_position]
3✔
397
        baseline_best_match_wt_epitope_seq = baseline_best_match_wt_result['wt_epitope_seq']
3✔
398
        #The MT epitope does not overlap the indel mutation
399
        if baseline_best_match_wt_epitope_seq == mt_epitope_seq:
3✔
400
            result['wt_epitope_seq']      = baseline_best_match_wt_result['wt_epitope_seq']
3✔
401
            result['wt_scores']           = baseline_best_match_wt_result['wt_scores']
3✔
402
            result['wt_epitope_position'] = int(baseline_best_match_position)
3✔
403
            result['mutation_position']   = 'NA'
3✔
404
            result['match_direction']     = 'left'
3✔
405
            return
3✔
406

407
        #If there is no previous result or the previous WT epitope was matched "from the left" we start by comparing to the baseline match
408
        if previous_result is None or previous_result['match_direction'] == 'left':
3✔
409
            best_match_count  = self.determine_consecutive_matches_from_left(mt_epitope_seq, baseline_best_match_wt_epitope_seq)
3✔
410
            #The alternate best match candidate "from the right" is inferred from the baseline best match position and the indel length
411
            if result['variant_type'] == 'inframe_ins':
3✔
412
                insertion_length              = len(iedb_results_for_wt_iedb_result_key.keys()) - len(wt_results.keys())
3✔
413
                alternate_best_match_position = int(baseline_best_match_position) - insertion_length
3✔
414
            elif result['variant_type'] == 'inframe_del':
3✔
415
                deletion_length                 = len(wt_results.keys()) - len(iedb_results_for_wt_iedb_result_key.keys())
3✔
416
                alternate_best_match_position   = int(baseline_best_match_position) + deletion_length
3✔
417
            if alternate_best_match_position > 0:
3✔
418
                alternate_best_match_wt_result      = wt_results[str(alternate_best_match_position)]
3✔
419
                alternate_best_match_wt_epitope_seq = alternate_best_match_wt_result['wt_epitope_seq']
3✔
420
                consecutive_matches_from_right      = self.determine_consecutive_matches_from_right(mt_epitope_seq, alternate_best_match_wt_epitope_seq)
3✔
421
                #We then check if the alternate best match epitope has more matching amino acids than the baseline best match epitope
422
                #If it does, we pick it as the best match
423
                if consecutive_matches_from_right > best_match_count:
3✔
424
                    match_direction      = 'right'
3✔
425
                    best_match_position  = alternate_best_match_position
3✔
426
                    best_match_wt_result = alternate_best_match_wt_result
3✔
427
                else:
428
                    match_direction      = 'left'
3✔
429
                    best_match_position  = baseline_best_match_position
3✔
430
                    best_match_wt_result = baseline_best_match_wt_result
3✔
431
            else:
432
                match_direction      = 'left'
3✔
433
                best_match_position  = baseline_best_match_position
3✔
434
                best_match_wt_result = baseline_best_match_wt_result
3✔
435

436
            #Now that we have found the matching WT epitope we still need to ensure that it has enough overlapping amino acids
437
            total_matches = self.determine_total_matches(mt_epitope_seq, best_match_wt_result['wt_epitope_seq'])
3✔
438
            if total_matches and total_matches >= self.min_match_count(int(result['peptide_length'])):
3✔
439
                #The minimum amino acid match count is met
440
                result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq']
3✔
441
                result['wt_scores']      = best_match_wt_result['wt_scores']
3✔
442
                result['mutation_position'] = self.find_mutation_positions(best_match_wt_result['wt_epitope_seq'], mt_epitope_seq)
3✔
443
            else:
444
                #The minimum amino acid match count is not met
445
                #Even though there is a matching WT epitope there are not enough overlapping amino acids
446
                #We don't include the matching WT epitope in the output
447
                result['wt_epitope_seq'] = 'NA'
×
448
                result['wt_scores']      = self.format_match_na(result, 'score')
×
449
                result['mutation_position'] = 'NA'
×
450

451
            result['match_direction']     = match_direction
3✔
452
            result['wt_epitope_position'] = best_match_position
3✔
453

454
    def match_wildtype_and_mutant_entries(self, iedb_results, wt_iedb_results):
3✔
455
        for key in sorted(iedb_results.keys(), key = lambda x: int(x.split('|')[-1])):
3✔
456
            result = iedb_results[key]
3✔
457
            (wt_iedb_result_key, mt_position) = key.split('|', 1)
3✔
458
            previous_mt_position = str(int(mt_position)-1)
3✔
459
            previous_key = '|'.join([wt_iedb_result_key, previous_mt_position])
3✔
460
            if previous_key in iedb_results:
3✔
461
                previous_result = iedb_results[previous_key]
3✔
462
            else:
463
                previous_result = None
3✔
464
            wt_results = wt_iedb_results[wt_iedb_result_key]
3✔
465
            if result['variant_type'] == 'missense':
3✔
466
                self.match_wildtype_and_mutant_entry_for_missense(result, mt_position, wt_results, previous_result)
3✔
467
            elif result['variant_type'] == 'FS':
3✔
468
                self.match_wildtype_and_mutant_entry_for_frameshift(result, mt_position, wt_results, previous_result)
3✔
469
            elif result['variant_type'] == 'inframe_ins' or result['variant_type'] == 'inframe_del':
3✔
470
                iedb_results_for_wt_iedb_result_key = dict([(key,value) for key, value in iedb_results.items() if key.startswith(wt_iedb_result_key)])
3✔
471
                self.match_wildtype_and_mutant_entry_for_inframe_indel(result, mt_position, wt_results, previous_result, iedb_results_for_wt_iedb_result_key)
3✔
472

473
        return iedb_results
3✔
474

475
    @abstractmethod
3✔
476
    def parse_iedb_file(self, tsv_entries):
3✔
477
        pass
×
478

479
    def get_values_for_summary_metrics(self, result, metric, epitope_type):
3✔
480
        metric_values = dict()
3✔
481
        if metric in ['ic50', 'percentile']:
3✔
482
            for (method, values) in result['{}_scores'.format(epitope_type)].items():
3✔
483
                metric_values[method] = {field: score for field, score in values.items() if field == metric and score != 'NA'}
3✔
484
                if not metric_values[method]:
3✔
485
                    del metric_values[method]
3✔
486
        else:
487
            field = metric.replace("_percentile", "")
3✔
488
            result_subset = {method: scores for method, scores in result['{}_scores'.format(epitope_type)].items() if field in scores}
3✔
489
            for (method, values) in result_subset.items():
3✔
490
                metric_values[method] = {field: score for field, score in values.items() if field == 'percentile' and score != 'NA'}
3✔
491
                if not metric_values[method]:
3✔
492
                    del metric_values[method]
3✔
493
        return metric_values
3✔
494

495
    def add_summary_metrics(self, iedb_results):
3✔
496
        iedb_results_with_metrics = {}
3✔
497
        for key, result in iedb_results.items():
3✔
498
            for metric in ['ic50', 'ic50_percentile', 'immunogenicity_percentile', 'presentation_percentile', 'percentile']:
3✔
499
                mt_values = self.get_values_for_summary_metrics(result, metric, 'mt')
3✔
500
                if not mt_values:
3✔
501
                    result['best_mt_{}'.format(metric)]          = 'NA'
3✔
502
                    result['corresponding_wt_{}'.format(metric)] = 'NA'
3✔
503
                    result['best_mt_{}_method'.format(metric)]   = 'NA'
3✔
504
                    result['median_mt_{}'.format(metric)]        = 'NA'
3✔
505
                else:
506
                    best_mt_value = sys.maxsize
3✔
507
                    for method in sorted(mt_values.keys()):
3✔
508
                        for value in mt_values[method].values():
3✔
509
                            if value < best_mt_value:
3✔
510
                                best_mt_value = value
3✔
511
                                best_mt_value_method = method
3✔
512
                    result['best_mt_{}'.format(metric)] = best_mt_value
3✔
513
                    result['best_mt_{}_method'.format(metric)]   = best_mt_value_method
3✔
514

515
                    if metric == 'ic50':
3✔
516
                        result['corresponding_wt_{}'.format(metric)] = result['wt_scores'][best_mt_value_method]['ic50']
3✔
517
                        result['median_mt_{}'.format(metric)] = median([score['ic50'] for score in mt_values.values()])
3✔
518
                    else:
519
                        result['corresponding_wt_{}'.format(metric)] = result['wt_scores'][best_mt_value_method]['percentile']
3✔
520
                        result['median_mt_{}'.format(metric)] = median([score['percentile'] for score in mt_values.values()])
3✔
521

522
                wt_values = self.get_values_for_summary_metrics(result, metric, 'wt')
3✔
523
                if not wt_values:
3✔
524
                    result['median_wt_{}'.format(metric)] = 'NA'
3✔
525
                else:
526
                    if metric == 'ic50':
3✔
527
                        result['median_wt_{}'.format(metric)] = median([score['ic50'] for score in wt_values.values()])
3✔
528
                    else:
529
                        result['median_wt_{}'.format(metric)] = median([score['percentile'] for score in wt_values.values()])
3✔
530

531
                iedb_results_with_metrics[key]  = result
3✔
532

533
        return iedb_results_with_metrics
3✔
534

535
    def process_input_iedb_file(self, tsv_entries):
3✔
536
        iedb_results = self.parse_iedb_file(tsv_entries)
3✔
537
        iedb_results_with_metrics = self.add_summary_metrics(iedb_results)
3✔
538
        return iedb_results_with_metrics
3✔
539

540
    def base_headers(self):
3✔
541
        headers = [
3✔
542
            'Chromosome',
543
            'Start',
544
            'Stop',
545
            'Reference',
546
            'Variant',
547
            'Transcript',
548
            'Transcript Support Level',
549
            'Transcript Length',
550
            'Canonical',
551
            'MANE Select',
552
            'Biotype',
553
            'Transcript CDS Flags',
554
            'Ensembl Gene ID',
555
            'Variant Type',
556
            'Mutation',
557
            'Protein Position',
558
            'Gene Name',
559
            'HGVSc',
560
            'HGVSp',
561
            'HLA Allele',
562
            'Peptide Length',
563
            'Sub-peptide Position',
564
            'Mutation Position',
565
            'MT Epitope Seq',
566
            'WT Epitope Seq',
567
            'Best MT IC50 Score Method',
568
            'Best MT IC50 Score',
569
            'Corresponding WT IC50 Score',
570
            'Corresponding Fold Change',
571
            'Best MT Percentile Method',
572
            'Best MT Percentile',
573
            'Corresponding WT Percentile',
574
            'Best MT IC50 Percentile Method',
575
            'Best MT IC50 Percentile',
576
            'Corresponding WT IC50 Percentile',
577
            'Best MT Immunogenicity Percentile Method',
578
            'Best MT Immunogenicity Percentile',
579
            'Corresponding WT Immunogenicity Percentile',
580
            'Best MT Presentation Percentile Method',
581
            'Best MT Presentation Percentile',
582
            'Corresponding WT Presentation Percentile',
583
            'Tumor DNA Depth',
584
            'Tumor DNA VAF',
585
            'Tumor RNA Depth',
586
            'Tumor RNA VAF',
587
            'Normal Depth',
588
            'Normal VAF',
589
            'Gene Expression',
590
            'Transcript Expression',
591
            'Median MT IC50 Score',
592
            'Median WT IC50 Score',
593
            'Median Fold Change',
594
            'Median MT Percentile',
595
            'Median WT Percentile',
596
            'Median MT IC50 Percentile',
597
            'Median WT IC50 Percentile',
598
            'Median MT Immunogenicity Percentile',
599
            'Median WT Immunogenicity Percentile',
600
            'Median MT Presentation Percentile',
601
            'Median WT Presentation Percentile',
602
        ]
603
        return headers
3✔
604

605
    def output_headers(self):
3✔
606
        headers = self.base_headers()
3✔
607
        for method in self.prediction_methods():
3✔
608
            if method.lower() == 'mhcflurry':
3✔
609
                if self.flurry_state == 'EL_only':
3✔
UNCOV
610
                    self.flurry_headers(headers)
×
UNCOV
611
                    continue
×
612
                elif self.flurry_state == 'both':
3✔
613
                    self.flurry_headers(headers)
3✔
614

615
            pretty_method = PredictionClass.prediction_class_name_for_iedb_prediction_method(method)
3✔
616
            if method in ['BigMHC_EL', 'netmhciipan_el', 'netmhcpan_el', 'MixMHCpred']:
3✔
617
                headers.append("%s WT Presentation Score" % pretty_method)
3✔
618
                headers.append("%s MT Presentation Score" % pretty_method)
3✔
619
                if method in ['netmhcpan_el', 'netmhciipan_el', 'MixMHCpred']:
3✔
620
                    headers.append("%s WT Percentile" % pretty_method)
3✔
621
                    headers.append("%s MT Percentile" % pretty_method)
3✔
622
            elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
3✔
623
                headers.append("%s WT Immunogenicity Score" % pretty_method)
3✔
624
                headers.append("%s MT Immunogenicity Score" % pretty_method)
3✔
625
                if method in ['PRIME']:
3✔
626
                    headers.append("%s WT Percentile" % pretty_method)
3✔
627
                    headers.append("%s MT Percentile" % pretty_method)
3✔
628
            else:
629
                headers.append("%s WT IC50 Score" % pretty_method)
3✔
630
                headers.append("%s MT IC50 Score" % pretty_method)
3✔
631
                headers.append("%s WT Percentile" % pretty_method)
3✔
632
                headers.append("%s MT Percentile" % pretty_method)
3✔
633
        if self.add_sample_name:
3✔
634
            headers.append("Sample Name")
3✔
635
        headers.append("Index")
3✔
636

637
        return headers
3✔
638

639
    def flurry_headers(self, headers):
3✔
640
        headers.append("MHCflurryEL Processing WT Score")
3✔
641
        headers.append("MHCflurryEL Processing MT Score")
3✔
642
        headers.append("MHCflurryEL Presentation WT Score")
3✔
643
        headers.append("MHCflurryEL Presentation MT Score")
3✔
644
        headers.append("MHCflurryEL Presentation WT Percentile")
3✔
645
        headers.append("MHCflurryEL Presentation MT Percentile")
3✔
646

647
    def prediction_methods(self):
3✔
648
        methods = set()
3✔
649
        pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
3✔
650

651
        for input_iedb_file in self.input_iedb_files:
3✔
652
            filename = os.path.basename(input_iedb_file)
3✔
653
            match = pattern.match(filename)
3✔
654
            method = match.group(1)
3✔
655
            methods.add(method)
3✔
656

657
        return sorted(list(methods))
3✔
658

659
    def add_prediction_scores(self, row, mt_scores, wt_scores):
3✔
660
        for method in self.prediction_methods():
3✔
661
            pretty_method = PredictionClass.prediction_class_name_for_iedb_prediction_method(method)
3✔
662
            if pretty_method == 'MHCflurry':
3✔
663
                if self.flurry_state == 'EL_only' or self.flurry_state == 'both':
3✔
664
                    row['MHCflurryEL Processing MT Score'] = self.score_or_na(mt_scores, 'MHCflurryEL Processing', 'presentation')
3✔
665
                    row['MHCflurryEL Processing WT Score'] = self.score_or_na(wt_scores, 'MHCflurryEL Processing', 'presentation')
3✔
666
                    row['MHCflurryEL Presentation MT Score'] = self.score_or_na(mt_scores, 'MHCflurryEL Presentation', 'presentation')
3✔
667
                    row['MHCflurryEL Presentation MT Percentile'] = self.score_or_na(mt_scores, 'MHCflurryEL Presentation', 'percentile')
3✔
668
                    row['MHCflurryEL Presentation WT Score'] = self.score_or_na(wt_scores, 'MHCflurryEL Presentation', 'presentation')
3✔
669
                    row['MHCflurryEL Presentation WT Percentile'] = self.score_or_na(wt_scores, 'MHCflurryEL Presentation', 'percentile')
3✔
670
                if self.flurry_state in ['both', 'BA_only', None]:
3✔
671
                    row['MHCflurry MT IC50 Score'] = self.score_or_na(mt_scores, 'MHCflurry', 'ic50')
3✔
672
                    row['MHCflurry MT Percentile'] = self.score_or_na(mt_scores, 'MHCflurry', 'percentile')
3✔
673
                    row['MHCflurry WT IC50 Score'] = self.score_or_na(wt_scores, 'MHCflurry', 'ic50')
3✔
674
                    row['MHCflurry WT Percentile'] = self.score_or_na(wt_scores, 'MHCflurry', 'percentile')
3✔
675
            elif pretty_method in ['BigMHC_EL', 'NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHCpred']:
3✔
676
                row[f'{pretty_method} MT Presentation Score'] = self.score_or_na(mt_scores, pretty_method, 'presentation')
3✔
677
                row[f'{pretty_method} WT Presentation Score'] = self.score_or_na(wt_scores, pretty_method, 'presentation')
3✔
678
                if method in ['NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHCpred']:
3✔
679
                    row[f'{pretty_method} MT Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
3✔
680
                    row[f'{pretty_method} WT Percentile'] = self.score_or_na(wt_scores, pretty_method, 'percentile')
3✔
681
            elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
3✔
682
                row[f'{pretty_method} MT Immunogenicity Score'] = self.score_or_na(mt_scores, pretty_method, 'immunogenicity')
3✔
683
                row[f'{pretty_method} WT Immunogenicity Score'] = self.score_or_na(wt_scores, pretty_method, 'immunogenicity')
3✔
684
                if method in ['PRIME']:
3✔
685
                    row[f'{pretty_method} MT Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
3✔
686
                    row[f'{pretty_method} WT Percentile'] = self.score_or_na(wt_scores, pretty_method, 'percentile')
3✔
687
            else:
688
                row[f'{pretty_method} MT IC50 Score'] = self.score_or_na(mt_scores, pretty_method, 'ic50')
3✔
689
                row[f'{pretty_method} MT Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
3✔
690
                row[f'{pretty_method} WT IC50 Score'] = self.score_or_na(wt_scores, pretty_method, 'ic50')
3✔
691
                row[f'{pretty_method} WT Percentile'] = self.score_or_na(wt_scores, pretty_method, 'percentile')
3✔
692
        return row
3✔
693

694
    def score_or_na(self, all_scores, method, score):
3✔
695
        if method in all_scores:
3✔
696
            return all_scores[method][score]
3✔
697
        else:
NEW
698
            return 'NA'
×
699

700
    def rounded_score_or_na(self, score):
3✔
701
        if score == 'NA':
3✔
702
            return score
3✔
703
        else:
704
            return round(score, 3)
3✔
705

706
    def execute(self):
3✔
707
        tsv_entries = self.parse_input_tsv_file()
3✔
708
        iedb_results = self.process_input_iedb_file(tsv_entries)
3✔
709

710
        tmp_output_file = self.output_file + '.tmp'
3✔
711
        tmp_output_filehandle = open(tmp_output_file, 'w')
3✔
712
        tsv_writer = csv.DictWriter(tmp_output_filehandle, delimiter='\t', fieldnames=self.output_headers())
3✔
713
        tsv_writer.writeheader()
3✔
714

715
        for result in iedb_results.values():
3✔
716
            tsv_entry = tsv_entries[result['tsv_index']]
3✔
717
            if result['mt_epitope_seq'] != result['wt_epitope_seq']:
3✔
718
                if result['corresponding_wt_ic50'] == 'NA':
3✔
719
                    corresponding_fold_change = 'NA'
3✔
720
                elif result['best_mt_ic50'] == 0:
3✔
UNCOV
721
                    corresponding_fold_change = inf
×
722
                else:
723
                    corresponding_fold_change = round((result['corresponding_wt_ic50']/result['best_mt_ic50']), 3)
3✔
724

725
                if result['median_wt_ic50'] == 'NA':
3✔
726
                    median_fold_change = 'NA'
3✔
727
                elif result['median_mt_ic50'] == 0:
3✔
UNCOV
728
                    median_fold_change = inf
×
729
                else:
730
                    median_fold_change = round((result['median_wt_ic50']/result['median_mt_ic50']), 3)
3✔
731
                row = {
3✔
732
                    'Chromosome'          : tsv_entry['chromosome_name'],
733
                    'Start'               : tsv_entry['start'],
734
                    'Stop'                : tsv_entry['stop'],
735
                    'Reference'           : tsv_entry['reference'],
736
                    'Variant'             : tsv_entry['variant'],
737
                    'Transcript'          : tsv_entry['transcript_name'],
738
                    'Transcript Support Level': tsv_entry['transcript_support_level'],
739
                    'Transcript Length'   : tsv_entry['transcript_length'],
740
                    'Canonical'           : tsv_entry['canonical'],
741
                    'MANE Select'         : tsv_entry['mane_select'],
742
                    'Biotype'             : tsv_entry['biotype'],
743
                    'Transcript CDS Flags': tsv_entry['transcript_cds_flags'],
744
                    'Ensembl Gene ID'     : tsv_entry['ensembl_gene_id'],
745
                    'HGVSc'               : tsv_entry['hgvsc'],
746
                    'HGVSp'               : tsv_entry['hgvsp'],
747
                    'Variant Type'        : tsv_entry['variant_type'],
748
                    'Mutation'            : result['amino_acid_change'],
749
                    'Protein Position'    : tsv_entry['protein_position'],
750
                    'Gene Name'           : result['gene_name'],
751
                    'HLA Allele'          : result['allele'],
752
                    'Peptide Length'      : result['peptide_length'],
753
                    'Sub-peptide Position': result['position'],
754
                    'Mutation Position'   : result['mutation_position'] if 'mutation_position' in result else 'NA',
755
                    'MT Epitope Seq'      : result['mt_epitope_seq'],
756
                    'WT Epitope Seq'      : result['wt_epitope_seq'],
757
                    'Index'               : result['tsv_index'],
758
                    #Median IC50 Score
759
                    'Median MT IC50 Score': self.rounded_score_or_na(result['median_mt_ic50']),
760
                    'Median WT IC50 Score': self.rounded_score_or_na(result['median_wt_ic50']),
761
                    'Median Fold Change': median_fold_change,
762
                    #Median Percentile
763
                    'Median MT Percentile': self.rounded_score_or_na(result['median_mt_percentile']),
764
                    'Median WT Percentile': self.rounded_score_or_na(result['median_wt_percentile']),
765
                    #Median IC50 Percentile
766
                    'Median MT IC50 Percentile': self.rounded_score_or_na(result['median_mt_ic50_percentile']),
767
                    'Median WT IC50 Percentile': self.rounded_score_or_na(result['median_wt_ic50_percentile']),
768
                    #Median Immunogenicity Percentile
769
                    'Median MT Immunogenicity Percentile': self.rounded_score_or_na(result['median_mt_immunogenicity_percentile']),
770
                    'Median WT Immunogenicity Percentile': self.rounded_score_or_na(result['median_wt_immunogenicity_percentile']),
771
                    #Median Presentation Percentile
772
                    'Median MT Presentation Percentile': self.rounded_score_or_na(result['median_mt_presentation_percentile']),
773
                    'Median WT Presentation Percentile': self.rounded_score_or_na(result['median_wt_presentation_percentile']),
774
                    #Best IC50 Score
775
                    'Best MT IC50 Score': self.rounded_score_or_na(result['best_mt_ic50']),
776
                    'Best MT IC50 Score Method': result['best_mt_ic50_method'],
777
                    'Corresponding WT IC50 Score': self.rounded_score_or_na(result['corresponding_wt_ic50']),
778
                    'Corresponding Fold Change': corresponding_fold_change,
779
                    #Best Percentile
780
                    'Best MT Percentile': self.rounded_score_or_na(result['best_mt_percentile']),
781
                    'Best MT Percentile Method': result['best_mt_percentile_method'],
782
                    'Corresponding WT Percentile': self.rounded_score_or_na(result['corresponding_wt_percentile']),
783
                    #Best IC50 Percentile
784
                    'Best MT IC50 Percentile': self.rounded_score_or_na(result['best_mt_ic50_percentile']),
785
                    'Best MT IC50 Percentile Method': result['best_mt_ic50_percentile_method'],
786
                    'Corresponding WT IC50 Percentile': self.rounded_score_or_na(result['corresponding_wt_ic50_percentile']),
787
                    #Best Immunogenicity Percentile
788
                    'Best MT Immunogenicity Percentile': self.rounded_score_or_na(result['best_mt_immunogenicity_percentile']),
789
                    'Best MT Immunogenicity Percentile Method': result['best_mt_immunogenicity_percentile_method'],
790
                    'Corresponding WT Immunogenicity Percentile': self.rounded_score_or_na(result['corresponding_wt_immunogenicity_percentile']),
791
                    #Best Presentation Percentile
792
                    'Best MT Presentation Percentile': self.rounded_score_or_na(result['best_mt_presentation_percentile']),
793
                    'Best MT Presentation Percentile Method': result['best_mt_presentation_percentile_method'],
794
                    'Corresponding WT Presentation Percentile': self.rounded_score_or_na(result['corresponding_wt_presentation_percentile']),
795
                }
796
                row = self.add_prediction_scores(row, result['mt_scores'], result['wt_scores'])
3✔
797

798
                for (tsv_key, row_key) in zip(['gene_expression', 'transcript_expression', 'normal_vaf', 'tdna_vaf', 'trna_vaf'], ['Gene Expression', 'Transcript Expression', 'Normal VAF', 'Tumor DNA VAF', 'Tumor RNA VAF']):
3✔
799
                    if tsv_key in tsv_entry:
3✔
800
                        if tsv_entry[tsv_key] == 'NA':
3✔
801
                            row[row_key] = 'NA'
3✔
802
                        else:
803
                            row[row_key] = round(float(tsv_entry[tsv_key]), 3)
3✔
804
                for (tsv_key, row_key) in zip(['normal_depth', 'tdna_depth', 'trna_depth'], ['Normal Depth', 'Tumor DNA Depth', 'Tumor RNA Depth']):
3✔
805
                    if tsv_key in tsv_entry:
3✔
806
                        row[row_key] = tsv_entry[tsv_key]
3✔
807
                if self.add_sample_name:
3✔
808
                    row['Sample Name'] = self.sample_name
3✔
809
                tsv_writer.writerow(row)
3✔
810

811
        tmp_output_filehandle.close()
3✔
812
        os.replace(tmp_output_file, self.output_file)
3✔
813

814

815
class DefaultOutputParser(OutputParser):
3✔
816

817
    def parse_iedb_file(self, tsv_entries):
3✔
818
        with open(self.key_file, 'r') as key_file_reader:
3✔
819
            protein_identifiers_from_label = yaml.load(key_file_reader, Loader=yaml.FullLoader)
3✔
820
        iedb_results = {}
3✔
821
        wt_iedb_results = {}
3✔
822
        for input_iedb_file in self.input_iedb_files:
3✔
823
            with open(input_iedb_file, 'r') as reader:
3✔
824
                iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
3✔
825
                filename = os.path.basename(input_iedb_file)
3✔
826

827
                pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
3✔
828
                match = pattern.match(filename)
3✔
829
                method = match.group(1)
3✔
830

831
                for line in iedb_tsv_reader:
3✔
832
                    if "Warning: Potential DNA sequence(s)" in line['allele']:
3✔
833
                        continue
3✔
834
                    protein_label  = int(line['seq_num'])
3✔
835
                    if 'core_peptide' in line and int(line['end']) - int(line['start']) == 8:
3✔
836
                        #Start and end refer to the position of the core peptide
837
                        #Infer the (start) position of the peptide from the positions of the core peptide
838
                        position   = str(int(line['start']) - line['peptide'].find(line['core_peptide']))
3✔
839
                    else:
840
                        position   = line['start']
3✔
841
                    epitope        = line['peptide']
3✔
842
                    scores         = self.get_scores(line, method)
3✔
843
                    allele         = line['allele']
3✔
844
                    peptide_length = len(epitope)
3✔
845

846
                    if protein_identifiers_from_label[protein_label] is not None:
3✔
847
                        protein_identifiers = protein_identifiers_from_label[protein_label]
3✔
848

849
                    for protein_identifier in protein_identifiers:
3✔
850
                        (protein_type, tsv_index) = protein_identifier.split('.', 1)
3✔
851
                        if protein_type == 'MT':
3✔
852
                            tsv_entry = tsv_entries[tsv_index]
3✔
853
                            key = "%s|%s" % (tsv_index, position)
3✔
854
                            if key not in iedb_results:
3✔
855
                                iedb_results[key]                      = {}
3✔
856
                                iedb_results[key]['mt_scores']         = {}
3✔
857
                                iedb_results[key]['mt_epitope_seq']    = epitope
3✔
858
                                iedb_results[key]['gene_name']         = tsv_entry['gene_name']
3✔
859
                                iedb_results[key]['amino_acid_change'] = tsv_entry['amino_acid_change']
3✔
860
                                iedb_results[key]['variant_type']      = tsv_entry['variant_type']
3✔
861
                                iedb_results[key]['position']          = position
3✔
862
                                iedb_results[key]['tsv_index']         = tsv_index
3✔
863
                                iedb_results[key]['allele']            = allele
3✔
864
                                iedb_results[key]['peptide_length']    = peptide_length
3✔
865
                            iedb_results[key]['mt_scores'].update(scores)
3✔
866
                        else:
867
                            if tsv_index not in wt_iedb_results:
3✔
868
                                wt_iedb_results[tsv_index] = {}
3✔
869
                            if position not in wt_iedb_results[tsv_index]:
3✔
870
                                wt_iedb_results[tsv_index][position] = {}
3✔
871
                                wt_iedb_results[tsv_index][position]['wt_scores'] = {}
3✔
872
                            wt_iedb_results[tsv_index][position]['wt_epitope_seq'] = epitope
3✔
873
                            wt_iedb_results[tsv_index][position]['wt_scores'].update(scores)
3✔
874

875
        return self.match_wildtype_and_mutant_entries(iedb_results, wt_iedb_results)
3✔
876

877

878
class UnmatchedSequencesOutputParser(OutputParser):
3✔
879
    def parse_iedb_file(self):
3✔
880
        with open(self.key_file, 'r') as key_file_reader:
3✔
881
            tsv_indices_from_label = yaml.load(key_file_reader, Loader=yaml.FullLoader)
3✔
882
        iedb_results = {}
3✔
883
        for input_iedb_file in self.input_iedb_files:
3✔
884
            with open(input_iedb_file, 'r') as reader:
3✔
885
                iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
3✔
886
                filename = os.path.basename(input_iedb_file)
3✔
887

888
                pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
3✔
889
                match = pattern.match(filename)
3✔
890
                method = match.group(1)
3✔
891

892
                for line in iedb_tsv_reader:
3✔
893
                    if "Warning: Potential DNA sequence(s)" in line['allele']:
3✔
UNCOV
894
                        continue
×
895
                    protein_label  = int(line['seq_num'])
3✔
896
                    if 'core_peptide' in line and int(line['end']) - int(line['start']) == 8:
3✔
897
                        #Start and end refer to the position of the core peptide
898
                        #Infer the (start) position of the peptide from the positions of the core peptide
UNCOV
899
                        position   = str(int(line['start']) - line['peptide'].find(line['core_peptide']))
×
900
                    else:
901
                        position   = line['start']
3✔
902
                    epitope        = line['peptide']
3✔
903
                    scores         = self.get_scores(line, method)
3✔
904
                    allele         = line['allele']
3✔
905
                    peptide_length = len(epitope)
3✔
906

907
                    if tsv_indices_from_label[protein_label] is not None:
3✔
908
                        tsv_indices = tsv_indices_from_label[protein_label]
3✔
909

910
                    for index in tsv_indices:
3✔
911
                        key = '|'.join([index, position])
3✔
912
                        if key not in iedb_results:
3✔
913
                            iedb_results[key]                      = {}
3✔
914
                            iedb_results[key]['mt_scores']         = {}
3✔
915
                            iedb_results[key]['mt_epitope_seq']    = epitope
3✔
916
                            iedb_results[key]['position']          = position
3✔
917
                            iedb_results[key]['tsv_index']         = index
3✔
918
                            iedb_results[key]['allele']            = allele
3✔
919
                        iedb_results[key]['mt_scores'].update(scores)
3✔
920
        return iedb_results
3✔
921

922
    def add_summary_metrics(self, iedb_results):
3✔
923
        iedb_results_with_metrics = {}
3✔
924
        for key, result in iedb_results.items():
3✔
925
            for metric in ['ic50', 'ic50_percentile', 'immunogenicity_percentile', 'presentation_percentile', 'percentile']:
3✔
926
                mt_values = self.get_values_for_summary_metrics(result, metric, 'mt')
3✔
927
                if not mt_values:
3✔
928
                    result['best_mt_{}'.format(metric)]          = 'NA'
3✔
929
                    result['best_mt_{}_method'.format(metric)]   = 'NA'
3✔
930
                    result['median_mt_{}'.format(metric)]        = 'NA'
3✔
931
                else:
932
                    best_mt_value = sys.maxsize
3✔
933
                    for method in sorted(mt_values.keys()):
3✔
934
                        for value in mt_values[method].values():
3✔
935
                            if value < best_mt_value:
3✔
936
                                best_mt_value        = value
3✔
937
                                best_mt_value_method = method
3✔
938
                    result['best_mt_{}'.format(metric)]          = best_mt_value
3✔
939
                    result['best_mt_{}_method'.format(metric)]   = best_mt_value_method
3✔
940

941
                    if metric == 'ic50':
3✔
942
                        result['median_mt_{}'.format(metric)] = median([score['ic50'] for score in mt_values.values()])
3✔
943
                    else:
944
                        result['median_mt_{}'.format(metric)] = median([score['percentile'] for score in mt_values.values()])
3✔
945
                iedb_results_with_metrics[key]  = result
3✔
946
        return iedb_results_with_metrics
3✔
947

948
    def process_input_iedb_file(self):
3✔
949
        iedb_results              = self.parse_iedb_file()
3✔
950
        iedb_results_with_metrics = self.add_summary_metrics(iedb_results)
3✔
951
        return iedb_results_with_metrics
3✔
952

953
    def base_headers(self):
3✔
954
        return[
3✔
955
            'Mutation',
956
            'HLA Allele',
957
            'Sub-peptide Position',
958
            'Epitope Seq',
959
            'Median IC50 Score',
960
            'Best IC50 Score',
961
            'Best IC50 Score Method',
962
            'Median Percentile',
963
            'Best Percentile',
964
            'Best Percentile Method',
965
            'Median IC50 Percentile',
966
            'Best IC50 Percentile',
967
            'Best IC50 Percentile Method',
968
            'Median Immunogenicity Percentile',
969
            'Best Immunogenicity Percentile',
970
            'Best Immunogenicity Percentile Method',
971
            'Median Presentation Percentile',
972
            'Best Presentation Percentile',
973
            'Best Presentation Percentile Method',
974
        ]
975

976
    def output_headers(self):
3✔
977
        headers = self.base_headers()
3✔
978
        for method in self.prediction_methods():
3✔
979
            if method.lower() == 'mhcflurry':
3✔
980
                if self.flurry_state == 'EL_only':
3✔
NEW
981
                    self.flurry_headers(headers)
×
NEW
982
                    continue
×
983
                elif self.flurry_state == 'both':
3✔
NEW
984
                    self.flurry_headers(headers)
×
985

986
            pretty_method = PredictionClass.prediction_class_name_for_iedb_prediction_method(method)
3✔
987
            if method in ['BigMHC_EL', 'netmhciipan_el', 'netmhcpan_el', 'MixMHCpred']:
3✔
UNCOV
988
                headers.append("%s Presentation Score" % pretty_method)
×
UNCOV
989
                if method in ['netmhcpan_el', 'netmhciipan_el', 'MixMHCpred']:
×
UNCOV
990
                    headers.append("%s Percentile" % pretty_method)
×
991
            elif method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
3✔
UNCOV
992
                headers.append("%s Immunogenicity Score" % pretty_method)
×
UNCOV
993
                if method in ['PRIME']:
×
UNCOV
994
                    headers.append("%s Percentile" % pretty_method)
×
995
            else:
996
                headers.append("%s IC50 Score" % pretty_method)
3✔
997
                headers.append("%s Percentile" % pretty_method)
3✔
998
        if self.add_sample_name:
3✔
UNCOV
999
            headers.append("Sample Name")
×
1000
        return headers
3✔
1001

1002
    def flurry_headers(self, headers):
3✔
NEW
1003
        headers.append("MHCflurryEL Processing Score")
×
NEW
1004
        headers.append("MHCflurryEL Presentation Score")
×
NEW
1005
        headers.append("MHCflurryEL Presentation Percentile")
×
1006

1007
    def add_prediction_scores(self, row, mt_scores):
3✔
1008
        for method in self.prediction_methods():
3✔
1009
            pretty_method = PredictionClass.prediction_class_name_for_iedb_prediction_method(method)
3✔
1010
            if pretty_method == 'MHCflurry':
3✔
1011
                if self.flurry_state == 'EL_only' or self.flurry_state == 'both':
3✔
UNCOV
1012
                    row['MHCflurryEL Processing Score'] = self.score_or_na(mt_scores, 'MHCflurryEL Processing', 'presentation')
×
UNCOV
1013
                    row['MHCflurryEL Presentation Score'] = self.score_or_na(mt_scores, 'MHCflurryEL Presentation', 'presentation')
×
NEW
1014
                    row['MHCflurryEL Presentation Percentile'] = self.score_or_na(mt_scores, 'MHCflurryEL Presentation', 'percentile')
×
1015
                if self.flurry_state in ['both', 'BA_only', None]:
3✔
1016
                    row['MHCflurry IC50 Score'] = self.score_or_na(mt_scores, 'MHCflurry', 'ic50')
3✔
1017
                    row['MHCflurry Percentile'] = self.score_or_na(mt_scores, 'MHCflurry', 'percentile')
3✔
1018
            elif pretty_method in ['BigMHC_EL', 'NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHCpred']:
3✔
UNCOV
1019
                row[f'{pretty_method} Presentation Score'] = self.score_or_na(mt_scores, pretty_method, 'presentation')
×
1020
                if method in ['NetMHCIIpanEL', 'NetMHCpanEL', 'MixMHCpred']:
×
1021
                    row[f'{pretty_method} Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
×
1022
            elif pretty_method in ['BigMHC_IM', 'DeepImmuno', 'PRIME']:
3✔
UNCOV
1023
                row[f'{pretty_method} Immunogenicity Score'] = self.score_or_na(mt_scores, pretty_method, 'immunogenicity')
×
NEW
1024
                if method in ['PRIME']:
×
NEW
1025
                    row[f'{pretty_method} Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
×
1026
            else:
1027
                row[f'{pretty_method} IC50 Score'] = self.score_or_na(mt_scores, pretty_method, 'ic50')
3✔
1028
                row[f'{pretty_method} Percentile'] = self.score_or_na(mt_scores, pretty_method, 'percentile')
3✔
1029
        return row
3✔
1030

1031
    def execute(self):
3✔
1032
        tmp_output_file = self.output_file + '.tmp'
3✔
1033
        tmp_output_filehandle = open(tmp_output_file, 'w')
3✔
1034
        tsv_writer = csv.DictWriter(tmp_output_filehandle, delimiter='\t', fieldnames=self.output_headers())
3✔
1035
        tsv_writer.writeheader()
3✔
1036

1037
        iedb_results = self.process_input_iedb_file()
3✔
1038
        for result in iedb_results.values():
3✔
1039
            row = {
3✔
1040
                'HLA Allele'          : result['allele'],
1041
                'Sub-peptide Position': result['position'],
1042
                'Epitope Seq'         : result['mt_epitope_seq'],
1043
                'Mutation'            : result['tsv_index'],
1044
                #Median IC50 Score
1045
                'Median IC50 Score': self.rounded_score_or_na(result['median_mt_ic50']),
1046
                #Median Percentile
1047
                'Median Percentile': self.rounded_score_or_na(result['median_mt_percentile']),
1048
                #Median IC50 Percentile
1049
                'Median IC50 Percentile': self.rounded_score_or_na(result['median_mt_ic50_percentile']),
1050
                #Median Immunogenicity Percentile
1051
                'Median Immunogenicity Percentile': self.rounded_score_or_na(result['median_mt_immunogenicity_percentile']),
1052
                #Median Presentation Percentile
1053
                'Median Presentation Percentile': self.rounded_score_or_na(result['median_mt_presentation_percentile']),
1054
                #Best IC50 Score
1055
                'Best IC50 Score': self.rounded_score_or_na(result['best_mt_ic50']),
1056
                'Best IC50 Score Method': result['best_mt_ic50_method'],
1057
                #Best Percentile
1058
                'Best Percentile': self.rounded_score_or_na(result['best_mt_percentile']),
1059
                'Best Percentile Method': result['best_mt_percentile_method'],
1060
                #Best IC50 Percentile
1061
                'Best IC50 Percentile': self.rounded_score_or_na(result['best_mt_ic50_percentile']),
1062
                'Best IC50 Percentile Method': result['best_mt_ic50_percentile_method'],
1063
                #Best Immunogenicity Percentile
1064
                'Best Immunogenicity Percentile': self.rounded_score_or_na(result['best_mt_immunogenicity_percentile']),
1065
                'Best Immunogenicity Percentile Method': result['best_mt_immunogenicity_percentile_method'],
1066
                #Best Presentation Percentile
1067
                'Best Presentation Percentile': self.rounded_score_or_na(result['best_mt_presentation_percentile']),
1068
                'Best Presentation Percentile Method': result['best_mt_presentation_percentile_method'],
1069
            }
1070
            row = self.add_prediction_scores(row, result['mt_scores'])
3✔
1071
            if self.add_sample_name:
3✔
NEW
1072
                row['Sample Name'] = self.sample_name
×
1073
            tsv_writer.writerow(row)
3✔
1074

1075
        tmp_output_filehandle.close()
3✔
1076
        os.replace(tmp_output_file, self.output_file)
3✔
1077

1078

1079
class PvacspliceOutputParser(UnmatchedSequencesOutputParser):
3✔
1080
    def parse_iedb_file(self):
3✔
1081
        # input key file
1082
        with open(self.key_file, 'r') as key_file_reader:
3✔
1083
            protein_identifiers_from_label = yaml.load(key_file_reader, Loader=yaml.FullLoader)
3✔
1084
        # final output
1085
        iedb_results = {}
3✔
1086
        for input_iedb_file in self.input_iedb_files:
3✔
1087
            # input iedb file
1088
            with open(input_iedb_file, 'r') as reader:
3✔
1089
                iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
3✔
1090
                filename = os.path.basename(input_iedb_file)
3✔
1091
                pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
3✔
1092
                match = pattern.match(filename)
3✔
1093
                method = match.group(1)
3✔
1094

1095
                # header: allele, seq_num, start, end, length, peptide, ic50, percentile_rank
1096
                for line in iedb_tsv_reader:
3✔
1097
                    if "Warning: Potential DNA sequence(s)" in line['allele']:
3✔
UNCOV
1098
                        continue
×
1099
                    allele         = line['allele']
3✔
1100
                    fasta_label    = int(line['seq_num'])
3✔
1101
                    epitope        = line['peptide']
3✔
1102
                    peptide_length = len(epitope)
3✔
1103
                    scores         = self.get_scores(line, method)
3✔
1104
                    # get fasta_id/combined_name from fasta key file
1105
                    if protein_identifiers_from_label[fasta_label] is not None:
3✔
1106
                        # comma-separated string (1 or more ids) as 1 entry in list
1107
                        protein_label = protein_identifiers_from_label[fasta_label][0]
3✔
1108
                        # one index at a time
1109
                        for key in protein_label.split(','):
3✔
1110

1111
                            if key not in iedb_results:
3✔
1112
                                iedb_results[key]                   = {}
3✔
1113
                                iedb_results[key]['mt_scores']      = {}
3✔
1114
                                iedb_results[key]['mt_epitope_seq'] = epitope
3✔
1115
                                iedb_results[key]['fasta_id']       = fasta_label
3✔
1116
                                iedb_results[key]['tsv_index']      = key
3✔
1117
                                iedb_results[key]['allele']         = allele
3✔
1118
                                iedb_results[key]['peptide_length'] = peptide_length
3✔
1119
                            iedb_results[key]['mt_scores'].update(scores)
3✔
1120

1121
        return iedb_results
3✔
1122

1123
    def base_headers(self):
3✔
1124
        return[
3✔
1125
            'Chromosome',
1126
            'Start',
1127
            'Stop',
1128
            'Reference',
1129
            'Variant',
1130
            'Junction',
1131
            'Junction Start',
1132
            'Junction Stop',
1133
            'Junction Score',
1134
            'Junction Anchor',
1135
            'Transcript',
1136
            'Transcript Support Level',
1137
            'Canonical',
1138
            'MANE Select',
1139
            'Biotype',
1140
            'Transcript CDS Flags',
1141
            'Ensembl Gene ID',
1142
            'Variant Type',
1143
            'Amino Acid Change',
1144
            'Gene Name',
1145
            'HGVSc',
1146
            'HGVSp',
1147
            'WT Protein Length',
1148
            'ALT Protein Length',
1149
            'Frameshift Event',
1150
            'Protein Position', # start position of peptide in alt protein
1151
            'HLA Allele',
1152
            'Peptide Length',
1153
            'Epitope Seq',
1154
            'Median IC50 Score',
1155
            'Best IC50 Score',
1156
            'Best IC50 Score Method',
1157
            'Median Percentile',
1158
            'Best Percentile',
1159
            'Best Percentile Method',
1160
            'Median IC50 Percentile',
1161
            'Best IC50 Percentile',
1162
            'Best IC50 Percentile Method',
1163
            'Median Immunogenicity Percentile',
1164
            'Best Immunogenicity Percentile',
1165
            'Best Immunogenicity Percentile Method',
1166
            'Median Presentation Percentile',
1167
            'Best Presentation Percentile',
1168
            'Best Presentation Percentile Method',
1169
            'Tumor DNA Depth',
1170
            'Tumor DNA VAF',
1171
            'Tumor RNA Depth',
1172
            'Tumor RNA VAF',
1173
            'Normal Depth',
1174
            'Normal VAF',
1175
            'Gene Expression',
1176
            'Transcript Expression',
1177
            'Index', # this is junction index
1178
            'Fasta Key', # unique num for traceback to correct sequence - key to combined fasta header
1179
        ]
1180

1181
    def execute(self):
3✔
1182
        tmp_output_file = self.output_file + '.tmp'
3✔
1183
        tmp_output_filehandle = open(tmp_output_file, 'w')
3✔
1184
        tsv_writer = csv.DictWriter(tmp_output_filehandle, delimiter='\t', fieldnames=self.output_headers())
3✔
1185
        tsv_writer.writeheader()
3✔
1186

1187
        # added for pvacsplice - variant info
1188
        tsv_entries = self.parse_input_tsv_file()
3✔
1189

1190
        # get binding info from iedb files
1191
        iedb_results = self.process_input_iedb_file()
3✔
1192

1193
        # from input iedb files
1194
        for result in iedb_results.values():
3✔
1195
            # get unique index
1196
            (final_index, protein_position) = result['tsv_index'].rsplit('.', 1)
3✔
1197
            tsv_entry = tsv_entries[final_index]
3✔
1198
            row = {
3✔
1199
                'Chromosome'          : tsv_entry['chromosome_name'],
1200
                'Start'               : tsv_entry['start'],
1201
                'Stop'                : tsv_entry['stop'],
1202
                'Reference'           : tsv_entry['reference'],
1203
                'Variant'             : tsv_entry['variant'],
1204
                'Transcript'          : tsv_entry['transcript_name'],
1205
                'Transcript Support Level': tsv_entry['transcript_support_level'],
1206
                'Canonical'           : tsv_entry['canonical'],
1207
                'MANE Select'         : tsv_entry['mane_select'],
1208
                'Biotype'             : tsv_entry['biotype'],
1209
                'Transcript CDS Flags': tsv_entry['transcript_cds_flags'],
1210
                ### junction info from RegTools
1211
                'Junction'            : tsv_entry['name'],
1212
                'Junction Start'      : tsv_entry['junction_start'],
1213
                'Junction Stop'       : tsv_entry['junction_stop'],
1214
                'Junction Score'      : tsv_entry['score'],
1215
                'Junction Anchor'     : tsv_entry['anchor'],
1216
                ###
1217
                'Ensembl Gene ID'     : tsv_entry['gene_name'],
1218
                'Variant Type'        : tsv_entry['variant_type'],
1219
                'Amino Acid Change'   : tsv_entry['amino_acid_change'],
1220
                'Protein Position' : protein_position,
1221
                'Gene Name'           : tsv_entry['gene_name'],
1222
                'HGVSc'               : tsv_entry['hgvsc'],
1223
                'HGVSp'               : tsv_entry['hgvsp'],
1224
                'Index'               : final_index,
1225
                'Fasta Key'           : result['fasta_id'],
1226
                'WT Protein Length' : tsv_entry['wt_protein_length'],
1227
                'ALT Protein Length': tsv_entry['alt_protein_length'],
1228
                'Frameshift Event'     : tsv_entry['frameshift_event'],
1229
                ### pvacbind info
1230
                'HLA Allele'          : result['allele'],
1231
                'Peptide Length'      : len(result['mt_epitope_seq']),
1232
                'Epitope Seq'         : result['mt_epitope_seq'],
1233
                #Median IC50 Score
1234
                'Median IC50 Score': self.rounded_score_or_na(result['median_mt_ic50']),
1235
                #Median Percentile
1236
                'Median Percentile': self.rounded_score_or_na(result['median_mt_percentile']),
1237
                #Median IC50 Percentile
1238
                'Median IC50 Percentile': self.rounded_score_or_na(result['median_mt_ic50_percentile']),
1239
                #Median Immunogenicity Percentile
1240
                'Median Immunogenicity Percentile': self.rounded_score_or_na(result['median_mt_immunogenicity_percentile']),
1241
                #Median Presentation Percentile
1242
                'Median Presentation Percentile': self.rounded_score_or_na(result['median_mt_presentation_percentile']),
1243
                #Best IC50 Score
1244
                'Best IC50 Score': self.rounded_score_or_na(result['best_mt_ic50']),
1245
                'Best IC50 Score Method': result['best_mt_ic50_method'],
1246
                #Best Percentile
1247
                'Best Percentile': self.rounded_score_or_na(result['best_mt_percentile']),
1248
                'Best Percentile Method': result['best_mt_percentile_method'],
1249
                #Best IC50 Percentile
1250
                'Best IC50 Percentile': self.rounded_score_or_na(result['best_mt_ic50_percentile']),
1251
                'Best IC50 Percentile Method': result['best_mt_ic50_percentile_method'],
1252
                #Best Immunogenicity Percentile
1253
                'Best Immunogenicity Percentile': self.rounded_score_or_na(result['best_mt_immunogenicity_percentile']),
1254
                'Best Immunogenicity Percentile Method': result['best_mt_immunogenicity_percentile_method'],
1255
                #Best Presentation Percentile
1256
                'Best Presentation Percentile': self.rounded_score_or_na(result['best_mt_presentation_percentile']),
1257
                'Best Presentation Percentile Method': result['best_mt_presentation_percentile_method'],
1258
            }
1259
            row = self.add_prediction_scores(row, result['mt_scores'])
3✔
1260

1261
            for (tsv_key, row_key) in zip(['gene_expression', 'transcript_expression', 'normal_vaf', 'tdna_vaf', 'trna_vaf'], ['Gene Expression', 'Transcript Expression', 'Normal VAF', 'Tumor DNA VAF', 'Tumor RNA VAF']):
3✔
1262
                if tsv_key in tsv_entry:
3✔
1263
                    if tsv_entry[tsv_key] == 'NA':
3✔
NEW
1264
                        row[row_key] = 'NA'
×
1265
                    else:
1266
                        # no --normal-sample-name parameter causes ValueError here bc tries to convert empty string to float
1267
                        if 'normal' in tsv_key and tsv_entry[tsv_key] == '':
3✔
NEW
1268
                            row[row_key] = 'NA'
×
1269
                        else:
1270
                            row[row_key] = round(float(tsv_entry[tsv_key]), 3)
3✔
1271

1272
            for (tsv_key, row_key) in zip(['normal_depth', 'tdna_depth', 'trna_depth'], ['Normal Depth', 'Tumor DNA Depth', 'Tumor RNA Depth']):
3✔
1273
                if tsv_key in tsv_entry:
3✔
1274
                    row[row_key] = tsv_entry[tsv_key]
3✔
NEW
1275
                elif 'normal' in tsv_key and tsv_entry[tsv_key] == '':
×
NEW
1276
                    row[row_key] = 'NA'
×
1277

1278
            if self.add_sample_name:
3✔
NEW
1279
                row['Sample Name'] = self.sample_name
×
1280
            tsv_writer.writerow(row)
3✔
1281

1282
        tmp_output_filehandle.close()
3✔
1283
        os.replace(tmp_output_file, self.output_file)
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc