• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 15877160304

25 Jun 2025 01:05PM UTC coverage: 86.038% (-0.2%) from 86.224%
15877160304

Pull #1255

github

web-flow
Merge 652286d65 into 5e8083eb6
Pull Request #1255: Fix bug in processing inframe deletions during aggregation

0 of 1 new or added line in 1 file covered. (0.0%)

19 existing lines in 2 files now uncovered.

7863 of 9139 relevant lines covered (86.04%)

4.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.89
/pvactools/lib/aggregate_all_epitopes.py
1
import pandas as pd
5✔
2
import numpy as np
5✔
3
from collections import defaultdict, Counter
5✔
4
import json
5✔
5
import os
5✔
6
import shutil
5✔
7
from abc import ABCMeta, abstractmethod
5✔
8
import itertools
5✔
9
import csv
5✔
10
import glob
5✔
11
import ast
5✔
12
from pvactools.lib.run_utils import get_anchor_positions
5✔
13

14
from pvactools.lib.prediction_class import PredictionClass
5✔
15

16
class AggregateAllEpitopes:
5✔
17
    def __init__(self):
5✔
18
        self.hla_types = pd.read_csv(self.input_file, delimiter="\t", usecols=["HLA Allele"])['HLA Allele'].unique()
5✔
19
        allele_specific_binding_thresholds = {}
5✔
20
        for hla_type in self.hla_types:
5✔
21
            threshold = PredictionClass.cutoff_for_allele(hla_type)
5✔
22
            if threshold is not None:
5✔
23
                allele_specific_binding_thresholds[hla_type] = float(threshold)
5✔
24
        self.allele_specific_binding_thresholds = allele_specific_binding_thresholds
5✔
25

26
    @abstractmethod
5✔
27
    def get_list_unique_mutation_keys(self):
4✔
28
        raise Exception("Must implement method in child class")
×
29

30
    @abstractmethod
5✔
31
    def calculate_clonal_vaf(self):
4✔
32
        raise Exception("Must implement method in child class")
×
33

34
    @abstractmethod
5✔
35
    def read_input_file(self, used_columns, dtypes):
4✔
36
        raise Exception("Must implement method in child class")
×
37

38
    @abstractmethod
5✔
39
    def get_sub_df(self, df, key):
4✔
40
        raise Exception("Must implement method in child class")
×
41

42
    @abstractmethod
5✔
43
    def get_best_binder(self, df):
4✔
44
        raise Exception("Must implement method in child class")
×
45

46
    @abstractmethod
5✔
47
    def get_tier(self, mutation, vaf_clonal):
4✔
48
        raise Exception("Must implement method in child class")
×
49

50
    @abstractmethod
5✔
51
    def get_included_df(self, df):
4✔
52
        raise Exception("Must implement method in child class")
×
53

54
    @abstractmethod
5✔
55
    def get_unique_peptide_hla_counts(self, included_df):
4✔
56
        raise Exception("Must implement method in child class")
×
57

58
    @abstractmethod
5✔
59
    def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
4✔
60
        raise Exception("Must implement method in child class")
×
61

62
    @abstractmethod
5✔
63
    def calculate_unique_peptide_count(self, included_df):
4✔
64
        raise Exception("Must implement method in child class")
×
65

66
    @abstractmethod
5✔
67
    def calculate_good_binder_count(self, included_df):
4✔
68
        raise Exception("Must implement method in child class")
×
69

70
    @abstractmethod
5✔
71
    def get_default_annotation_count(self):
4✔
72
        raise Exception("Must implement method in child class")
×
73

74
    @abstractmethod
5✔
75
    def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count):
4✔
76
        raise Exception("Must implement method in child class")
×
77

78
    @abstractmethod
5✔
79
    def get_metrics(self, peptides, best):
4✔
80
        raise Exception("Must implement method in child class")
×
81

82
    @abstractmethod
5✔
83
    def write_metrics_file(self, metrics):
4✔
84
        raise Exception("Must implement method in child class")
×
85

86
    @abstractmethod
5✔
87
    def sort_table(self, df):
4✔
88
        raise Exception("Must implement method in child class")
×
89

90
    @abstractmethod
5✔
91
    def copy_pvacview_r_files(self):
4✔
92
        raise Exception("Must implement method in child class")
×
93

94
    def get_best_mut_line(self, df, key, prediction_algorithms, el_algorithms, percentile_algorithms, vaf_clonal):
5✔
95
        #order by best median score and get best ic50 peptide
96
        best = self.get_best_binder(df)
5✔
97

98
        #these are all lines meeting the aggregate inclusion binding threshold
99
        included_df = self.get_included_df(df)
5✔
100
        best_df = pd.DataFrame.from_dict([best])
5✔
101
        if not best_df.index.isin(included_df.index).all():
5✔
102
            included_df = pd.concat([included_df, best_df])
5✔
103
        best_df = best_df.to_dict()
5✔
104
        peptide_hla_counts = self.get_unique_peptide_hla_counts(included_df)
5✔
105
        hla_counts = Counter(peptide_hla_counts["HLA Allele"])
5✔
106
        hla = dict(map(lambda x : (x, hla_counts[x]) if x in hla_counts else (x, ""), self.hla_types))
5✔
107
        #get a list of all unique gene/transcript/aa_change combinations
108
        #store a count of all unique peptides that passed
109
        (peptides, anno_count) = self.get_included_df_metrics(included_df, prediction_algorithms, el_algorithms, percentile_algorithms)
5✔
110
        included_peptide_count = self.calculate_unique_peptide_count(included_df)
5✔
111
        good_binder_count = self.calculate_good_binder_count(included_df)
5✔
112

113
        #assemble the line
114
        out_dict = self.assemble_result_line(best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count)
5✔
115

116
        metric = self.get_metrics(peptides, best)
5✔
117
        return (out_dict, metric)
5✔
118

119
    def determine_used_prediction_algorithms(self):
5✔
120
        headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
5✔
121
        potential_algorithms = PredictionClass.prediction_methods()
5✔
122
        prediction_algorithms = []
5✔
123
        for algorithm in potential_algorithms:
5✔
124
            if algorithm in ['NetMHCpanEL', 'NetMHCIIpanEL', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno']:
5✔
125
                continue
5✔
126
            if "{} MT IC50 Score".format(algorithm) in headers or "{} IC50 Score".format(algorithm) in headers:
5✔
127
                prediction_algorithms.append(algorithm)
5✔
128
        return prediction_algorithms
5✔
129

130
    def determine_used_epitope_lengths(self):
5✔
131
        col_name = self.determine_epitope_seq_column_name()
5✔
132
        return list(set([len(s) for s in pd.read_csv(self.input_file, delimiter="\t", usecols=[col_name])[col_name]]))
5✔
133

134
    def determine_epitope_seq_column_name(self):
5✔
135
        headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
5✔
136
        for header in ["MT Epitope Seq", "Epitope Seq"]:
5✔
137
            if header in headers:
5✔
138
                return header
5✔
139
        raise Exception("No mutant epitope sequence header found.")
×
140

141
    def problematic_positions_exist(self):
5✔
142
        headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
5✔
143
        return 'Problematic Positions' in headers
5✔
144

145
    def calculate_allele_expr(self, line):
5✔
146
        if line['Gene Expression'] == 'NA' or line['Tumor RNA VAF'] == 'NA':
5✔
147
            return 'NA'
×
148
        else:
149
            return round(float(line['Gene Expression']) * float(line['Tumor RNA VAF']), 3)
5✔
150

151
    def determine_used_el_algorithms(self):
5✔
152
        headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
5✔
153
        potential_algorithms = ["MHCflurryEL Processing", "MHCflurryEL Presentation", "NetMHCpanEL", "NetMHCIIpanEL", "BigMHC_EL", 'BigMHC_IM', 'DeepImmuno']
5✔
154
        prediction_algorithms = []
5✔
155
        for algorithm in potential_algorithms:
5✔
156
            if "{} MT Score".format(algorithm) in headers or "{} Score".format(algorithm) in headers:
5✔
157
                prediction_algorithms.append(algorithm)
5✔
158
        return prediction_algorithms
5✔
159

160
    def determine_used_percentile_algorithms(self, prediction_algorithms, el_algorithms):
5✔
161
        headers = pd.read_csv(self.input_file, delimiter="\t", nrows=0).columns.tolist()
5✔
162
        percentile_algorithms = []
5✔
163
        for algorithm in prediction_algorithms + el_algorithms:
5✔
164
            if "{} MT Percentile".format(algorithm) in headers or "{} Percentile".format(algorithm) in headers:
5✔
165
                percentile_algorithms.append(algorithm)
5✔
166
        return percentile_algorithms
5✔
167

168
    def determine_columns_used_for_aggregation(self, prediction_algorithms, el_algorithms):
5✔
169
        used_columns = [
5✔
170
            "Index", "Chromosome", "Start", "Stop", "Reference", "Variant",
171
            "Transcript", "Transcript Support Level", "Biotype", "Transcript Length", "Variant Type", "Mutation",
172
            "Protein Position", "Gene Name", "HLA Allele",
173
            "Mutation Position", "MT Epitope Seq", "WT Epitope Seq",
174
            "Tumor DNA VAF", "Tumor RNA Depth",
175
            "Tumor RNA VAF", "Gene Expression", "Transcript Expression",
176
            "Median MT IC50 Score", "Median WT IC50 Score", "Median MT Percentile", "Median WT Percentile",
177
            "Best MT IC50 Score", "Corresponding WT IC50 Score", "Best MT Percentile", "Corresponding WT Percentile",
178
        ]
179
        for algorithm in prediction_algorithms:
5✔
180
            used_columns.extend(["{} WT IC50 Score".format(algorithm), "{} MT IC50 Score".format(algorithm)])
5✔
181
            used_columns.extend(["{} WT Percentile".format(algorithm), "{} MT Percentile".format(algorithm)])
5✔
182
        for algorithm in el_algorithms:
5✔
183
            used_columns.extend(["{} WT Score".format(algorithm), "{} MT Score".format(algorithm)])
5✔
184
            if algorithm not in ["MHCflurryEL Processing", "BigMHC_EL", "BigMHC_IM", 'DeepImmuno']:
5✔
185
                used_columns.extend(["{} WT Percentile".format(algorithm), "{} MT Percentile".format(algorithm)])
5✔
186
        if self.problematic_positions_exist():
5✔
187
            used_columns.append("Problematic Positions")
5✔
188
        return used_columns
5✔
189

190
    def set_column_types(self, prediction_algorithms):
5✔
191
        dtypes = {
5✔
192
            'Chromosome': "string",
193
            "Start": "int32",
194
            "Stop": "int32",
195
            'Reference': "string",
196
            'Variant': "string",
197
            "Variant Type": "category",
198
            "Mutation Position": "category",
199
            "Median MT IC50 Score": "float32",
200
            "Median MT Percentile": "float32",
201
            "Best MT IC50 Score": "float32",
202
            "Best MT Percentile": "float32",
203
            "Protein Position": "string",
204
            "Transcript Length": "int32",
205
        }
206
        for algorithm in prediction_algorithms:
5✔
207
            if algorithm == 'SMM' or algorithm == 'SMMPMBEC':
5✔
208
                continue
5✔
209
            dtypes["{} MT Score".format(algorithm)] = "float32"
5✔
210
            dtypes["{} MT Percentile".format(algorithm)] = "float32"
5✔
211
        return dtypes
5✔
212

213
    def execute(self):
5✔
214
        prediction_algorithms = self.determine_used_prediction_algorithms()
5✔
215
        epitope_lengths = self.determine_used_epitope_lengths()
5✔
216
        el_algorithms = self.determine_used_el_algorithms()
5✔
217
        percentile_algorithms = self.determine_used_percentile_algorithms(prediction_algorithms, el_algorithms)
5✔
218
        used_columns = self.determine_columns_used_for_aggregation(prediction_algorithms, el_algorithms)
5✔
219
        dtypes = self.set_column_types(prediction_algorithms)
5✔
220

221
        ##do a crude estimate of clonal vaf/purity
222
        vaf_clonal = self.calculate_clonal_vaf()
5✔
223

224
        if self.__class__.__name__ == 'PvacseqAggregateAllEpitopes':
5✔
225
            metrics = {
5✔
226
                'tumor_purity': self.tumor_purity,
227
                'vaf_clonal': round(vaf_clonal, 3),
228
                'vaf_subclonal': round(vaf_clonal/2, 3),
229
                'binding_threshold': self.binding_threshold,
230
                'aggregate_inclusion_binding_threshold': self.aggregate_inclusion_binding_threshold,
231
                'aggregate_inclusion_count_limit': self.aggregate_inclusion_count_limit,
232
                'trna_vaf': self.trna_vaf,
233
                'trna_cov': self.trna_cov,
234
                'allele_expr_threshold': self.allele_expr_threshold,
235
                'maximum_transcript_support_level': self.maximum_transcript_support_level,
236
                'percentile_threshold': self.percentile_threshold,
237
                'percentile_threshold_strategy': self.percentile_threshold_strategy,
238
                'use_allele_specific_binding_thresholds': self.use_allele_specific_binding_thresholds,
239
                'mt_top_score_metric': self.mt_top_score_metric,
240
                'wt_top_score_metric': self.wt_top_score_metric,
241
                'allele_specific_binding_thresholds': self.allele_specific_binding_thresholds,
242
                'allele_specific_anchors': self.allele_specific_anchors,
243
                'alleles': self.hla_types.tolist(),
244
                'anchor_contribution_threshold': self.anchor_contribution_threshold,
245
                'epitope_lengths': epitope_lengths,
246
            }
247
        else:
248
            metrics = {}
5✔
249

250
        data = []
5✔
251
        all_epitopes_df = self.read_input_file(used_columns, dtypes)
5✔
252

253
        ## get a list of unique mutations
254
        keys = self.get_list_unique_mutation_keys(all_epitopes_df)
5✔
255

256
        for key in keys:
5✔
257
            (df, key_str) = self.get_sub_df(all_epitopes_df, key)
5✔
258
            (best_mut_line, metrics_for_key) = self.get_best_mut_line(df, key_str, prediction_algorithms, el_algorithms, percentile_algorithms, vaf_clonal)
5✔
259
            data.append(best_mut_line)
5✔
260
            metrics[key_str] = metrics_for_key
5✔
261
        peptide_table = pd.DataFrame(data=data)
5✔
262
        peptide_table = self.sort_table(peptide_table)
5✔
263

264
        peptide_table.to_csv(self.output_file, sep='\t', na_rep='NA', index=False, float_format='%.3f')
5✔
265

266
        self.write_metrics_file(metrics)
5✔
267
        self.copy_pvacview_r_files()
5✔
268

269

270
class PvacseqAggregateAllEpitopes(AggregateAllEpitopes, metaclass=ABCMeta):
5✔
271
    def __init__(
5✔
272
            self,
273
            input_file,
274
            output_file,
275
            tumor_purity=None,
276
            binding_threshold=500,
277
            trna_vaf=0.25,
278
            trna_cov=10,
279
            expn_val=1,
280
            maximum_transcript_support_level=1,
281
            percentile_threshold=None,
282
            percentile_threshold_strategy='conservative',
283
            allele_specific_binding_thresholds=False,
284
            top_score_metric="median",
285
            allele_specific_anchors=False,
286
            anchor_contribution_threshold=0.8,
287
            aggregate_inclusion_binding_threshold=5000,
288
            aggregate_inclusion_count_limit=15,
289
        ):
290
        self.input_file = input_file
5✔
291
        self.output_file = output_file
5✔
292
        self.tumor_purity = tumor_purity
5✔
293
        self.binding_threshold = binding_threshold
5✔
294
        self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds
5✔
295
        self.percentile_threshold = percentile_threshold
5✔
296
        self.percentile_threshold_strategy = percentile_threshold_strategy
5✔
297
        self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold
5✔
298
        self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit
5✔
299
        self.allele_expr_threshold = trna_vaf * expn_val * 10
5✔
300
        self.trna_cov = trna_cov
5✔
301
        self.trna_vaf = trna_vaf
5✔
302
        self.maximum_transcript_support_level = maximum_transcript_support_level
5✔
303
        if top_score_metric == 'median':
5✔
304
            self.mt_top_score_metric = "Median"
5✔
305
            self.wt_top_score_metric = "Median"
5✔
306
        else:
307
            self.mt_top_score_metric = "Best"
5✔
308
            self.wt_top_score_metric = "Corresponding"
5✔
309
        self.metrics_file = output_file.replace('.tsv', '.metrics.json')
5✔
310
        anchor_probabilities = {}
5✔
311
        for length in [8, 9, 10, 11]:
5✔
312
            base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
313
            file_name = os.path.join(base_dir, 'tools', 'pvacview', 'data', "Normalized_anchor_predictions_{}_mer.tsv".format(length))
5✔
314
            probs = {}
5✔
315
            with open(file_name, 'r') as fh:
5✔
316
                reader = csv.DictReader(fh, delimiter="\t")
5✔
317
                for line in reader:
5✔
318
                    hla = line.pop('HLA')
5✔
319
                    probs[hla] = line
5✔
320
            anchor_probabilities[length] = probs
5✔
321
        self.anchor_probabilities = anchor_probabilities
5✔
322

323
        mouse_anchor_positions = {}
5✔
324
        for length in [8, 9, 10, 11]:
5✔
325
            base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
326
            file_name = os.path.join(base_dir, 'tools', 'pvacview', 'data', "mouse_anchor_predictions_{}_mer.tsv".format(length))
5✔
327
            values = {}
5✔
328
            with open(file_name, 'r') as fh:
5✔
329
                reader = csv.DictReader(fh, delimiter="\t")
5✔
330
                for line in reader:
5✔
331
                    allele = line.pop('Allele')
5✔
332
                    values[allele] = {int(k): ast.literal_eval(v) for k, v in line.items()}
5✔
333
            mouse_anchor_positions[length] = values
5✔
334
        self.mouse_anchor_positions = mouse_anchor_positions
5✔
335

336
        self.allele_specific_anchors = allele_specific_anchors
5✔
337
        self.anchor_contribution_threshold = anchor_contribution_threshold
5✔
338
        super().__init__()
5✔
339

340
    def get_list_unique_mutation_keys(self, df):
5✔
341
        keys = df[['Chromosome', 'Start', 'Stop', 'Reference', 'Variant']].values.tolist()
5✔
342
        keys = [list(i) for i in set(tuple(i) for i in keys)]
5✔
343
        return sorted(keys)
5✔
344

345
    def calculate_clonal_vaf(self):
5✔
346
        if self.tumor_purity:
5✔
347
            vaf_clonal =  self.tumor_purity * 0.5
×
348
            print("Tumor clonal VAF estimated as {} (calculated from user-provided tumor purity of {}). Assuming variants with VAF < {} are subclonal".format(round(vaf_clonal, 3), round(self.tumor_purity, 3), round(vaf_clonal/2, 3)))
×
349
            return vaf_clonal
×
350
        else:
351
        #if no tumor purity is provided, make a rough estimate by taking the list of VAFs < 0.6 (assumption is that these are CN-neutral) and return the largest as the marker of the founding clone
352
            vafs = np.sort(pd.read_csv(self.input_file, delimiter="\t", usecols=["Tumor DNA VAF"])['Tumor DNA VAF'].unique())[::-1]
5✔
353
            vafs_clonal = list(filter(lambda vaf: vaf < 0.6, vafs))
5✔
354
            if len(vafs_clonal) == 0:
5✔
355
                vaf_clonal = 0.6
×
356
            else:
357
                vaf_clonal = vafs_clonal[0]
5✔
358
                if vaf_clonal > 0.5:
5✔
359
                    vaf_clonal = 0.5
5✔
360
            print("Tumor clonal VAF estimated as {} (estimated from Tumor DNA VAF data). Assuming variants with VAF < {} are subclonal".format(round(vaf_clonal, 3), round(vaf_clonal/2, 3)))
5✔
361
            return vaf_clonal
5✔
362

363
    def read_input_file(self, used_columns, dtypes):
5✔
364
        df = pd.read_csv(self.input_file, delimiter='\t', float_precision='high', low_memory=False, na_values="NA", keep_default_na=False, usecols=used_columns, dtype=dtypes)
5✔
365
        df = df.astype({"{} MT IC50 Score".format(self.mt_top_score_metric):'float'})
5✔
366
        return df
5✔
367

368
    def get_sub_df(self, all_epitopes_df, key):
5✔
369
        key_str = "{}-{}-{}-{}-{}".format(key[0], key[1], key[2], key[3], key[4])
5✔
370
        df = (all_epitopes_df[lambda x: (x['Chromosome'] == key[0]) & (x['Start'] == key[1]) & (x['Stop'] == key[2]) & (x['Reference'] == key[3]) & (x['Variant'] == key[4])]).copy()
5✔
371
        df['annotation'] = df[['Transcript', 'Gene Name', 'Mutation', 'Protein Position']].agg('-'.join, axis=1)
5✔
372
        df['key'] = key_str
5✔
373
        return (df, key_str)
5✔
374

375
    def get_best_binder(self, df):
5✔
376
        #get all entries with Biotype 'protein_coding'
377
        biotype_df = df[df['Biotype'] == 'protein_coding']
5✔
378
        #if there are none, reset to previous dataframe
379
        if biotype_df.shape[0] == 0:
5✔
380
            biotype_df = df
5✔
381

382
        #subset protein_coding dataframe to only include entries with a TSL < maximum_transcript_support_level
383
        tsl_df = biotype_df[biotype_df['Transcript Support Level'].notnull()]
5✔
384
        tsl_df = tsl_df[tsl_df['Transcript Support Level'] != 'Not Supported']
5✔
385
        tsl_df = tsl_df[tsl_df['Transcript Support Level'] <= self.maximum_transcript_support_level]
5✔
386
        #if this results in an empty dataframe, reset to previous dataframe
387
        if tsl_df.shape[0] == 0:
5✔
388
            tsl_df = biotype_df
5✔
389

390
        #subset tsl dataframe to only include entries with no problematic positions
391
        if self.problematic_positions_exist():
5✔
392
            prob_pos_df = tsl_df[tsl_df['Problematic Positions'] == "None"]
5✔
393
            #if this results in an empty dataframe, reset to previous dataframe
394
            if prob_pos_df.shape[0] == 0:
5✔
395
                prob_pos_df = tsl_df
5✔
396
        else:
397
            prob_pos_df = tsl_df
5✔
398

399
        #subset prob_pos dataframe to only include entries that pass the anchor position check
400
        prob_pos_df['anchor_residue_pass'] = prob_pos_df.apply(lambda x: self.is_anchor_residue_pass(x), axis=1)
5✔
401
        anchor_residue_pass_df = prob_pos_df[prob_pos_df['anchor_residue_pass']]
5✔
402
        if anchor_residue_pass_df.shape[0] == 0:
5✔
403
            anchor_residue_pass_df = prob_pos_df
5✔
404

405
        #determine the entry with the lowest IC50 Score, lowest TSL, and longest Transcript
406
        anchor_residue_pass_df.sort_values(by=[
5✔
407
            "{} MT IC50 Score".format(self.mt_top_score_metric),
408
            "Transcript Support Level",
409
            "Transcript Length",
410
        ], inplace=True, ascending=[True, True, False])
411
        return anchor_residue_pass_df.iloc[0]
5✔
412

413
    def is_anchor_residue_pass(self, mutation):
5✔
414
        if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
415
            binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
5✔
416
        else:
417
            binding_threshold = self.binding_threshold
5✔
418

419
        anchors = get_anchor_positions(mutation['HLA Allele'], len(mutation['MT Epitope Seq']), self.allele_specific_anchors, self.anchor_probabilities, self.anchor_contribution_threshold, self.mouse_anchor_positions)
5✔
420
        # parse out mutation positions from str
421
        position = mutation["Mutation Position"]
5✔
422
        if pd.isna(position):
5✔
423
            return True
5✔
424
        else:
425
            positions = position.split(", ")
5✔
426
            if len(positions) > 2:
5✔
427
                return True
5✔
428
            anchor_residue_pass = True
5✔
429
            if all(int(pos) in anchors for pos in positions):
5✔
430
                if pd.isna(mutation["{} WT IC50 Score".format(self.wt_top_score_metric)]):
5✔
431
                    anchor_residue_pass = False
5✔
432
                elif mutation["{} WT IC50 Score".format(self.wt_top_score_metric)] < binding_threshold:
5✔
433
                    anchor_residue_pass = False
5✔
434
            return anchor_residue_pass
5✔
435

436
    #assign mutations to a "Classification" based on their favorability
437
    def get_tier(self, mutation, vaf_clonal):
5✔
438
        if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
439
            binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
5✔
440
        else:
441
            binding_threshold = self.binding_threshold
5✔
442
        
443
        ic50_pass = mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold
5✔
444
        percentile_pass = (
5✔
445
            self.percentile_threshold is None or 
446
            mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold
447
        )
448
        binding_pass = (
5✔
449
            (ic50_pass and percentile_pass) 
450
            if self.percentile_threshold_strategy == 'conservative' 
451
            else (ic50_pass or percentile_pass)
452
        )
453

454
        anchor_residue_pass = self.is_anchor_residue_pass(mutation)
5✔
455

456
        tsl_pass = True
5✔
457
        if mutation["Transcript Support Level"] == "Not Supported":
5✔
458
            pass
5✔
459
        elif pd.isna(mutation["Transcript Support Level"]):
5✔
460
            tsl_pass = False
5✔
461
        else:
462
            if mutation["Transcript Support Level"] > self.maximum_transcript_support_level:
5✔
463
                tsl_pass = False
5✔
464

465
        allele_expr_pass = True
5✔
466
        if (mutation['Tumor RNA VAF'] != 'NA' and mutation['Gene Expression'] != 'NA' and
5✔
467
            mutation['Tumor RNA VAF'] * mutation['Gene Expression'] <= self.allele_expr_threshold):
468
            allele_expr_pass = False
5✔
469

470
        vaf_clonal_pass = True
5✔
471
        if (mutation['Tumor DNA VAF'] != 'NA' and mutation['Tumor DNA VAF'] < (vaf_clonal/2)):
5✔
472
            vaf_clonal_pass = False
5✔
473

474
        #writing these out as explicitly as possible for ease of understanding
475
        if (binding_pass and
5✔
476
           allele_expr_pass and
477
           vaf_clonal_pass and
478
           tsl_pass and
479
           anchor_residue_pass):
480
            return "Pass"
5✔
481

482
        #anchor residues
483
        if (binding_pass and
5✔
484
           allele_expr_pass and
485
           vaf_clonal_pass and
486
           tsl_pass and
487
           not anchor_residue_pass):
488
            return "Anchor"
5✔
489

490
        #not in founding clone
491
        if (binding_pass and
5✔
492
           allele_expr_pass and
493
           not vaf_clonal_pass and
494
           tsl_pass and
495
           anchor_residue_pass):
496
            return "Subclonal"
5✔
497

498
        #relax expression.  Include sites that have reasonable vaf but zero overall gene expression
499
        lowexpr=False
5✔
500
        if mutation['Tumor RNA VAF'] != 'NA' and mutation['Gene Expression'] != 'NA' and ['Tumor RNA Depth'] != 'NA':
5✔
501
            if ((mutation["Tumor RNA VAF"] * mutation["Gene Expression"] > 0) or
5✔
502
               (mutation["Gene Expression"] == 0 and
503
               mutation["Tumor RNA Depth"] > self.trna_cov and
504
               mutation["Tumor RNA VAF"] > self.trna_vaf)):
505
                lowexpr=True
5✔
506

507
        #if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor)
508
        if (binding_pass and
5✔
509
           lowexpr and
510
           vaf_clonal_pass and
511
           tsl_pass and
512
           anchor_residue_pass):
513
            return "LowExpr"
5✔
514

515
        #zero expression
516
        if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr:
5✔
517
            return "NoExpr"
5✔
518

519
        #everything else
520
        return "Poor"
5✔
521

522
    def get_included_df(self, df):
5✔
523
        binding_df = df[df["{} MT IC50 Score".format(self.mt_top_score_metric)] < self.aggregate_inclusion_binding_threshold]
5✔
524
        if binding_df.shape[0] == 0:
5✔
525
            return binding_df
5✔
526
        else:
527
            peptides = list(set(binding_df["MT Epitope Seq"]))
5✔
528
            if len(peptides) <= self.aggregate_inclusion_count_limit:
5✔
529
                return binding_df
5✔
530

531
            best_peptide_entries = []
5✔
532
            for peptide in peptides:
5✔
533
                peptide_df = binding_df[binding_df["MT Epitope Seq"] == peptide]
5✔
534
                best_peptide_entries.append(self.get_best_binder(peptide_df))
5✔
535
            best_peptide_entries_df = pd.DataFrame(best_peptide_entries)
5✔
536
            top_n_best_peptide_entries_df = self.sort_included_df(best_peptide_entries_df).iloc[:self.aggregate_inclusion_count_limit]
5✔
537
            top_n_best_peptides = list(set(top_n_best_peptide_entries_df["MT Epitope Seq"]))
5✔
538
            return binding_df[binding_df["MT Epitope Seq"].isin(top_n_best_peptides)]
5✔
539

540
    def sort_included_df(self, df):
5✔
541
        df['biotype_sort'] = df['Biotype'].apply(lambda x: 1 if x == 'protein_coding' else 2)
5✔
542
        df['tsl_sort'] = df['Transcript Support Level'].apply(lambda x: 6 if pd.isnull(x) or x == 'Not Supported' else int(x))
5✔
543
        if self.problematic_positions_exist():
5✔
544
            df['problematic_positions_sort'] = df['Problematic Positions'].apply(lambda x: 1 if x == "None" else 2)
5✔
545
        df['anchor_residue_pass_sort'] = df.apply(lambda x: 1 if self.is_anchor_residue_pass(x) else 2, axis=1)
5✔
546
        if self.problematic_positions_exist():
5✔
547
            sort_columns = [
5✔
548
                "biotype_sort",
549
                "tsl_sort",
550
                "problematic_positions_sort",
551
                "anchor_residue_pass_sort",
552
                "{} MT IC50 Score".format(self.mt_top_score_metric),
553
                "Transcript Length",
554
                "{} MT Percentile".format(self.mt_top_score_metric),
555
            ]
556
            sort_order = [True, True, True, True, True, False, True]
5✔
557
        else:
558
            sort_columns = [
5✔
559
                "biotype_sort",
560
                "tsl_sort",
561
                "anchor_residue_pass_sort",
562
                "{} MT IC50 Score".format(self.mt_top_score_metric),
563
                "Transcript Length",
564
                "{} MT Percentile".format(self.mt_top_score_metric),
565
            ]
566
            sort_order = [True, True, True, True, False, True]
5✔
567
        df.sort_values(by=sort_columns, inplace=True, ascending=sort_order)
5✔
568
        df.drop(columns = ['biotype_sort', 'tsl_sort', 'problematic_positions_sort', 'anchor_residue_pass_sort'], inplace=True, errors='ignore')
5✔
569
        return df
5✔
570

571
    def get_unique_peptide_hla_counts(self, included_df):
5✔
572
        return pd.DataFrame(included_df.groupby(['HLA Allele', 'MT Epitope Seq']).size().reset_index())
5✔
573

574
    def replace_nas(self, items):
5✔
575
        return ["NA" if pd.isna(x) else x for x in items]
5✔
576

577
    def round_to_ints(self, items):
5✔
578
        return [round(x) if (type(x) == float and not pd.isna(x)) else x for x in items]
5✔
579

580
    def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
5✔
581
        peptides = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
5✔
582
        included_peptides = included_df["MT Epitope Seq"].unique()
5✔
583
        included_transcripts = included_df['annotation'].unique()
5✔
584
        peptide_sets = {}
5✔
585
        for annotation in included_transcripts:
5✔
586
            included_df_annotation = included_df[included_df['annotation'] == annotation]
5✔
587
            peptide_set = tuple(included_df_annotation["MT Epitope Seq"].unique())
5✔
588
            if peptide_set in peptide_sets:
5✔
589
                peptide_sets[peptide_set].append(annotation)
5✔
590
            else:
591
                peptide_sets[peptide_set] = [annotation]
5✔
592

593
        set_number = 1
5✔
594
        for peptide_set, annotations in peptide_sets.items():
5✔
595
            set_name = "Transcript Set {}".format(set_number)
5✔
596
            annotation = annotations[0]
5✔
597
            included_df_annotation = included_df[included_df['annotation'] == annotation]
5✔
598
            results = defaultdict(lambda: defaultdict(list))
5✔
599
            for peptide in list(peptide_set):
5✔
600
                included_df_peptide_annotation = included_df_annotation[included_df_annotation['MT Epitope Seq'] == peptide]
5✔
601
                if len(included_df_peptide_annotation) > 0:
5✔
602
                    individual_ic50_calls = { 'algorithms': prediction_algorithms }
5✔
603
                    individual_ic50_percentile_calls = { 'algorithms': prediction_algorithms }
5✔
604
                    individual_el_calls = { 'algorithms': el_algorithms }
5✔
605
                    individual_el_percentile_calls = { 'algorithms': el_algorithms }
5✔
606
                    individual_percentile_calls = { 'algorithms': percentile_algorithms }
5✔
607
                    anchor_fails = []
5✔
608
                    for peptide_type, top_score_metric in zip(['MT', 'WT'], [self.mt_top_score_metric, self.wt_top_score_metric]):
5✔
609
                        ic50s = {}
5✔
610
                        percentiles = {}
5✔
611
                        ic50_calls = {}
5✔
612
                        percentile_calls = {}
5✔
613
                        el_calls = {}
5✔
614
                        el_percentile_calls = {}
5✔
615
                        all_percentile_calls = {}
5✔
616
                        for index, line in included_df_peptide_annotation.to_dict(orient='index').items():
5✔
617
                            ic50s[line['HLA Allele']] = line['{} {} IC50 Score'.format(top_score_metric, peptide_type)]
5✔
618
                            percentiles[line['HLA Allele']] = line['{} {} Percentile'.format(top_score_metric, peptide_type)]
5✔
619
                            ic50_calls[line['HLA Allele']] = self.replace_nas([line["{} {} IC50 Score".format(algorithm, peptide_type)] for algorithm in prediction_algorithms])
5✔
620
                            percentile_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in prediction_algorithms])
5✔
621
                            el_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Score".format(algorithm, peptide_type)] for algorithm in el_algorithms])
5✔
622
                            el_percentile_calls[line['HLA Allele']] = self.replace_nas(['NA' if algorithm in ['MHCflurryEL Processing', 'BigMHC_EL', 'BigMHC_IM', 'DeepImmuno'] else line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in el_algorithms])
5✔
623
                            all_percentile_calls[line['HLA Allele']] = self.replace_nas([line["{} {} Percentile".format(algorithm, peptide_type)] for algorithm in percentile_algorithms])
5✔
624
                            if peptide_type == 'MT' and not self.is_anchor_residue_pass(line):
5✔
625
                                anchor_fails.append(line['HLA Allele'])
5✔
626
                        sorted_ic50s = []
5✔
627
                        sorted_percentiles = []
5✔
628
                        for hla_type in sorted(self.hla_types):
5✔
629
                            if hla_type in ic50s:
5✔
630
                                sorted_ic50s.append(ic50s[hla_type])
5✔
631
                            else:
632
                                sorted_ic50s.append('X')
5✔
633
                            if hla_type in percentiles:
5✔
634
                                sorted_percentiles.append(percentiles[hla_type])
5✔
635
                            else:
636
                                sorted_percentiles.append('X')
5✔
637
                        results[peptide]['ic50s_{}'.format(peptide_type)] = self.replace_nas(sorted_ic50s)
5✔
638
                        results[peptide]['percentiles_{}'.format(peptide_type)] = self.replace_nas(sorted_percentiles)
5✔
639
                        individual_ic50_calls[peptide_type] = ic50_calls
5✔
640
                        individual_ic50_percentile_calls[peptide_type] = percentile_calls
5✔
641
                        individual_el_calls[peptide_type] = el_calls
5✔
642
                        individual_el_percentile_calls[peptide_type] = el_percentile_calls
5✔
643
                        individual_percentile_calls[peptide_type] = all_percentile_calls
5✔
644
                    results[peptide]['hla_types'] = sorted(self.hla_types)
5✔
645
                    results[peptide]['mutation_position'] = "NA" if pd.isna(included_df_peptide_annotation.iloc[0]['Mutation Position']) else str(included_df_peptide_annotation.iloc[0]['Mutation Position'])
5✔
646
                    results[peptide]['problematic_positions'] = str(included_df_peptide_annotation.iloc[0]['Problematic Positions']) if 'Problematic Positions' in included_df_peptide_annotation.iloc[0] else 'None'
5✔
647
                    if len(anchor_fails) > 0:
5✔
648
                        results[peptide]['anchor_fails'] = ', '.join(anchor_fails)
5✔
649
                    else:
650
                        results[peptide]['anchor_fails'] = 'None'
5✔
651
                    results[peptide]['individual_ic50_calls'] = individual_ic50_calls
5✔
652
                    results[peptide]['individual_ic50_percentile_calls'] = individual_ic50_percentile_calls
5✔
653
                    results[peptide]['individual_el_calls'] = individual_el_calls
5✔
654
                    results[peptide]['individual_el_percentile_calls'] = individual_el_percentile_calls
5✔
655
                    results[peptide]['individual_percentile_calls'] = individual_percentile_calls
5✔
656
                    wt_peptide = included_df_peptide_annotation.iloc[0]['WT Epitope Seq']
5✔
657
                    if pd.isna(wt_peptide):
5✔
658
                        variant_type = included_df_peptide_annotation.iloc[0]['Variant Type']
5✔
659
                        if variant_type == 'FS':
5✔
660
                            wt_peptide = 'FS-NA'
5✔
661
                        elif variant_type == 'inframe_ins':
5✔
662
                            wt_peptide = 'INS-NA'
5✔
NEW
663
                        elif variant_type == 'inframe_del':
×
664
                            wt_peptide = 'DEL-NA'
×
665
                    results[peptide]['wt_peptide'] = wt_peptide
5✔
666
            peptides[set_name]['peptides'] = self.sort_peptides(results)
5✔
667
            sorted_transcripts = self.sort_transcripts(annotations, included_df)
5✔
668
            peptides[set_name]['transcripts'] = list(sorted_transcripts.Annotation)
5✔
669
            peptides[set_name]['transcript_expr'] = self.replace_nas(list(sorted_transcripts.Expr))
5✔
670
            peptides[set_name]['tsl'] = self.replace_nas(self.round_to_ints(list(sorted_transcripts.TSL)))
5✔
671
            peptides[set_name]['biotype'] = list(sorted_transcripts.Biotype)
5✔
672
            peptides[set_name]['transcript_length'] = [int(l) for l in list(sorted_transcripts.Length)]
5✔
673
            peptides[set_name]['transcript_count'] = len(annotations)
5✔
674
            peptides[set_name]['peptide_count'] = len(peptide_set)
5✔
675
            peptides[set_name]['total_expr'] = sum([0 if x == 'NA' else (float(x)) for x in peptides[set_name]['transcript_expr']])
5✔
676
            set_number += 1
5✔
677
        anno_count = len(included_transcripts)
5✔
678

679
        return (peptides, anno_count)
5✔
680

681
    def sort_peptides(self, results):
5✔
682
        for k, v in results.items():
5✔
683
            v['problematic_positions_sort'] = 1 if v['problematic_positions'] == 'None' else 2
5✔
684
            v['anchor_fail_sort'] = 1 if v['anchor_fails'] == 'None' else 2
5✔
685
            v['best_ic50s_MT'] = min([ic50 for ic50 in v['ic50s_MT'] if ic50 != 'X'])
5✔
686
        sorted_results = dict(sorted(results.items(), key=lambda x:(x[1]['problematic_positions_sort'],x[1]['anchor_fail_sort'],x[1]['best_ic50s_MT'])))
5✔
687
        for k, v in sorted_results.items():
5✔
688
            v.pop('problematic_positions_sort')
5✔
689
            v.pop('anchor_fail_sort')
5✔
690
            v.pop('best_ic50s_MT')
5✔
691
        return sorted_results
5✔
692

693
    def sort_transcripts(self, annotations, included_df):
5✔
694
        transcript_table = pd.DataFrame()
5✔
695
        for annotation in annotations:
5✔
696
            line = included_df[included_df['annotation'] == annotation].iloc[0]
5✔
697
            data = {
5✔
698
                'Annotation': line['annotation'],
699
                'Biotype': line['Biotype'],
700
                'TSL': line['Transcript Support Level'],
701
                'Length': line['Transcript Length'],
702
                'Expr': line['Transcript Expression'],
703
            }
704
            transcript_table = pd.concat([transcript_table, pd.DataFrame.from_records(data, index=[0])], ignore_index=True)
5✔
705
        transcript_table['Biotype Sort'] = transcript_table.Biotype.map(lambda x: 1 if x == 'protein_coding' else 2)
5✔
706
        tsl_sort_criteria = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 'NA': 6, 'Not Supported': 6}
5✔
707
        transcript_table['TSL Sort'] = transcript_table.TSL.map(tsl_sort_criteria)
5✔
708
        transcript_table.sort_values(by=["Biotype Sort", "TSL Sort", "Length"], inplace=True, ascending=[True, True, False])
5✔
709
        return transcript_table
5✔
710

711
    def calculate_unique_peptide_count(self, included_df):
5✔
712
        return len(included_df["MT Epitope Seq"].unique())
5✔
713

714
    def calculate_good_binder_count(self, included_df):
5✔
715
        if self.use_allele_specific_binding_thresholds:
5✔
716
            selection = []
5✔
717
            for index, row in included_df.iterrows():
5✔
718
                if row['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
719
                    binding_threshold = self.allele_specific_binding_thresholds[row['HLA Allele']]
5✔
720
                else:
721
                    binding_threshold = self.binding_threshold
×
722
                if row["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold:
5✔
723
                    selection.append(index)
5✔
724
            good_binders = included_df[included_df.index.isin(selection)]
5✔
725
        else:
726
            good_binders = included_df[included_df["{} MT IC50 Score".format(self.mt_top_score_metric)] < self.binding_threshold]
5✔
727
        return len(good_binders["MT Epitope Seq"].unique())
5✔
728

729
    def get_default_annotation_count(self):
5✔
730
        return 0
×
731

732
    def get_best_aa_change(self, best):
5✔
733
        if best['Variant Type'] == 'FS':
5✔
734
            return 'FS{}'.format(best['Protein Position'])
5✔
735
        else:
736
            (wt_aa, mt_aa) = best["Mutation"].split("/")
5✔
737
            return "".join([wt_aa, best["Protein Position"], mt_aa])
5✔
738

739
    def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count):
5✔
740
        allele_expr = self.calculate_allele_expr(best)
5✔
741
        tier = self.get_tier(mutation=best, vaf_clonal=vaf_clonal)
5✔
742

743
        problematic_positions = best['Problematic Positions'] if 'Problematic Positions' in best else 'None'
5✔
744
        tsl = best['Transcript Support Level'] if best['Transcript Support Level'] == "Not Supported" or pd.isna(best['Transcript Support Level']) else str(int(best['Transcript Support Level']))
5✔
745

746
        out_dict = { 'ID': key, 'Index': best['Index'] }
5✔
747
        out_dict.update({ k.replace('HLA-', ''):v for k,v in sorted(hla.items()) })
5✔
748
        out_dict.update({
5✔
749
            'Gene': best["Gene Name"],
750
            'AA Change': self.get_best_aa_change(best),
751
            'Num Passing Transcripts': anno_count,
752
            'Best Peptide': best["MT Epitope Seq"],
753
            'Best Transcript': best["Transcript"],
754
            'TSL': tsl,
755
            'Allele': best["HLA Allele"],
756
            'Pos': best["Mutation Position"],
757
            'Prob Pos': problematic_positions,
758
            'Num Included Peptides': included_peptide_count,
759
            'Num Passing Peptides': good_binder_count,
760
            'IC50 MT': best["{} MT IC50 Score".format(self.mt_top_score_metric)],
761
            'IC50 WT': best["{} WT IC50 Score".format(self.wt_top_score_metric)],
762
            '%ile MT': best["{} MT Percentile".format(self.mt_top_score_metric)],
763
            '%ile WT': best["{} WT Percentile".format(self.wt_top_score_metric)],
764
            'RNA Expr': best["Gene Expression"],
765
            'RNA VAF': best["Tumor RNA VAF"],
766
            'Allele Expr': allele_expr,
767
            'RNA Depth': best["Tumor RNA Depth"],
768
            'DNA VAF': best["Tumor DNA VAF"],
769
            'Tier': tier,
770
            'Evaluation': 'Pending',
771
        })
772
        return out_dict
5✔
773

774
    def get_metrics(self, peptides, best):
5✔
775
        return {
5✔
776
            'good_binders': peptides,
777
            'sets': list(peptides.keys()),
778
            'transcript_counts': [v['transcript_count'] for k, v in peptides.items()],
779
            'peptide_counts': [v['peptide_count'] for k, v in peptides.items()],
780
            'set_expr': [v['total_expr'] for k, v in peptides.items()],
781
            'DNA VAF': 'NA' if pd.isna(best['Tumor DNA VAF']) else float(best['Tumor DNA VAF']),
782
            'RNA VAF': 'NA' if pd.isna(best['Tumor RNA VAF']) else float(best['Tumor RNA VAF']),
783
            'gene_expr': 'NA' if pd.isna(best['Gene Expression']) else float(best['Gene Expression']),
784
            'best_peptide_mt': best['MT Epitope Seq'],
785
            'best_peptide_wt': 'NA' if pd.isna(best['WT Epitope Seq']) else best['WT Epitope Seq'],
786
            'best_hla_allele': best['HLA Allele'],
787
        }
788

789
    def write_metrics_file(self, metrics):
5✔
790
        with open(self.metrics_file, 'w') as fh:
5✔
791
            json.dump(metrics, fh, indent=2, separators=(',', ': '))
5✔
792

793
    #sort the table in our preferred manner
794
    def sort_table(self, df):
5✔
795
        #make sure the tiers sort in the expected order
796
        tier_sorter = ["Pass", "LowExpr", "Anchor", "Subclonal", "Poor", "NoExpr"]
5✔
797
        sorter_index = dict(zip(tier_sorter,range(len(tier_sorter))))
5✔
798
        df["rank_tier"] = df['Tier'].map(sorter_index)
5✔
799

800
        df["rank_ic50"] = df["IC50 MT"].rank(ascending=True, method='dense')
5✔
801
        df["rank_expr"] = pd.to_numeric(df["Allele Expr"], errors='coerce').rank(ascending=False, method='dense', na_option="bottom")
5✔
802
        df["rank"] = df["rank_ic50"] + df["rank_expr"]
5✔
803

804
        df.sort_values(by=["rank_tier", "rank", "Gene", "AA Change"], inplace=True, ascending=True)
5✔
805

806
        df.drop(labels='rank_tier', axis=1, inplace=True)
5✔
807
        df.drop(labels='rank_ic50', axis=1, inplace=True)
5✔
808
        df.drop(labels='rank_expr', axis=1, inplace=True)
5✔
809
        df.drop(labels='rank', axis=1, inplace=True)
5✔
810

811
        return df
5✔
812

813
    def copy_pvacview_r_files(self):
5✔
814
        module_dir = os.path.dirname(__file__)
5✔
815
        r_folder = os.path.abspath(os.path.join(module_dir,"..","tools","pvacview"))
5✔
816
        files = glob.iglob(os.path.join(r_folder, "*.R"))
5✔
817
        destination = os.path.abspath(os.path.dirname(self.output_file))
5✔
818
        os.makedirs(os.path.join(destination, "www"), exist_ok=True)
5✔
819
        for i in files:
5✔
820
            shutil.copy(i, destination)
5✔
821
        for i in ["anchor.jpg", "pVACview_logo.png", "pVACview_logo_mini.png"]:
5✔
822
            shutil.copy(os.path.join(r_folder, "www", i), os.path.join(destination, "www", i))
5✔
823

824

825
class UnmatchedSequenceAggregateAllEpitopes(AggregateAllEpitopes, metaclass=ABCMeta):
5✔
826
    def __init__(self,
5✔
827
            input_file,
828
            output_file,
829
            binding_threshold=500,
830
            percentile_threshold=None,
831
            percentile_threshold_strategy='conservative',
832
            allele_specific_binding_thresholds=False,
833
            top_score_metric="median",
834
            aggregate_inclusion_binding_threshold=5000,
835
            aggregate_inclusion_count_limit=15,
836
        ):
837
        self.input_file = input_file
5✔
838
        self.output_file = output_file
5✔
839
        self.binding_threshold = binding_threshold
5✔
840
        self.percentile_threshold = percentile_threshold
5✔
841
        self.percentile_threshold_strategy = percentile_threshold_strategy
5✔
842
        self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds
5✔
843
        self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold
5✔
844
        self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit
5✔
845
        if top_score_metric == 'median':
5✔
846
            self.top_score_metric = "Median"
5✔
847
        else:
848
            self.top_score_metric = "Best"
5✔
849
        self.metrics_file = output_file.replace('.tsv', '.metrics.json')
5✔
850
        super().__init__()
5✔
851

852
    def get_list_unique_mutation_keys(self, df):
5✔
853
        keys = df["Mutation"].values.tolist()
5✔
854
        return sorted(list(set(keys)))
5✔
855

856
    def calculate_clonal_vaf(self):
5✔
857
        if self.__class__.__name__ == 'PvacspliceAggregateAllEpitopes':
5✔
858
            return PvacseqAggregateAllEpitopes.calculate_clonal_vaf(self)
5✔
859
        else:
860
            return None
5✔
861

862
    def read_input_file(self, used_columns, dtypes):
5✔
863
        df = pd.read_csv(self.input_file, delimiter='\t', float_precision='high', low_memory=False, na_values="NA", keep_default_na=False, dtype={"Mutation": str})
5✔
864
        df = df[df["{} IC50 Score".format(self.top_score_metric)] != 'NA']
5✔
865
        df = df.astype({"{} IC50 Score".format(self.top_score_metric):'float'})
5✔
866
        return df
5✔
867

868
    def get_sub_df(self, all_epitopes_df, key):
5✔
869
        df = (all_epitopes_df[lambda x: (x['Mutation'] == key)]).copy()
5✔
870
        return (df, key)
5✔
871

872
    def get_best_binder(self, df):
5✔
873
        #subset dataframe to only include entries with no problematic positions
874
        if self.problematic_positions_exist():
5✔
875
            prob_pos_df = df[df['Problematic Positions'] == "None"]
5✔
876
            #if this results in an empty dataframe, reset to previous dataframe
877
            if prob_pos_df.shape[0] == 0:
5✔
878
                prob_pos_df = df
×
879
        else:
880
            prob_pos_df = df
5✔
881
        prob_pos_df.sort_values(by=["{} IC50 Score".format(self.top_score_metric)], inplace=True, ascending=True)
5✔
882
        return prob_pos_df.iloc[0]
5✔
883

884
    def get_included_df(self, df):
5✔
885
        binding_df = df[df["{} IC50 Score".format(self.top_score_metric)] < self.aggregate_inclusion_binding_threshold]
5✔
886
        if binding_df.shape[0] == 0:
5✔
887
            return binding_df
5✔
888
        else:
889
            peptides = list(set(binding_df["Epitope Seq"]))
5✔
890
            if len(peptides) <= self.aggregate_inclusion_count_limit:
5✔
891
                return binding_df
5✔
892

893
            best_peptide_entries = []
5✔
894
            for peptide in peptides:
5✔
895
                peptide_df = binding_df[binding_df["Epitope Seq"] == peptide]
5✔
896
                best_peptide_entries.append(self.get_best_binder(peptide_df))
5✔
897
            best_peptide_entries_df = pd.DataFrame(best_peptide_entries)
5✔
898
            top_n_best_peptide_entries_df = self.sort_included_df(best_peptide_entries_df).iloc[:self.aggregate_inclusion_count_limit]
5✔
899
            top_n_best_peptides = list(set(top_n_best_peptide_entries_df["Epitope Seq"]))
5✔
900
            return binding_df[binding_df["Epitope Seq"].isin(top_n_best_peptides)]
5✔
901

902
    def sort_included_df(self, df):
5✔
903
        df.sort_values(by=["{} IC50 Score".format(self.top_score_metric)], inplace=True, ascending=True)
5✔
904
        return df
5✔
905

906
    def get_unique_peptide_hla_counts(self, included_df):
5✔
907
        return pd.DataFrame(included_df.groupby(['HLA Allele', 'Epitope Seq']).size().reset_index())
5✔
908

909
    def get_included_df_metrics(self, included_df, prediction_algorithms, el_algorithms, percentile_algorithms):
5✔
910
        return (None, "NA")
5✔
911

912
    def calculate_unique_peptide_count(self, included_df):
5✔
913
        return len(included_df["Epitope Seq"].unique())
5✔
914

915
    def calculate_good_binder_count(self, included_df):
5✔
916
        if self.use_allele_specific_binding_thresholds:
5✔
917
            selection = []
5✔
918
            for index, row in included_df.iterrows():
5✔
919
                if row['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
920
                    binding_threshold = self.allele_specific_binding_thresholds[row['HLA Allele']]
×
921
                else:
922
                    binding_threshold = self.binding_threshold
5✔
923
                if row["{} IC50 Score".format(self.top_score_metric)] < binding_threshold:
5✔
924
                    selection.append(index)
5✔
925
            good_binders = included_df[included_df.index.isin(selection)]
5✔
926
        else:
927
            good_binders = included_df[included_df["{} IC50 Score".format(self.top_score_metric)] < self.binding_threshold]
5✔
928
        return len(good_binders["Epitope Seq"].unique())
5✔
929

930
    def get_default_annotation_count(self):
5✔
931
        return "NA"
×
932

933
    def get_metrics(self, peptides, best):
5✔
934
        return None
5✔
935

936
    def write_metrics_file(self, metrics):
5✔
937
        pass
5✔
938

939
    #sort the table in our preferred manner
940
    def sort_table(self, df):
5✔
941
        df.sort_values(by=["IC50 MT", "ID"], inplace=True, ascending=[True, True])
5✔
942

943
        tier_sorter = ["Pass", "Poor"]
5✔
944
        sorter_index = dict(zip(tier_sorter,range(len(tier_sorter))))
5✔
945
        df["rank_tier"] = df['Tier'].map(sorter_index)
5✔
946

947
        df.sort_values(by=["rank_tier", "IC50 MT", "ID"], inplace=True, ascending=[True, True, True])
5✔
948

949
        df.drop(labels='rank_tier', axis=1, inplace=True)
5✔
950
        return df
5✔
951

952
    def copy_pvacview_r_files(self):
5✔
953
        pass
5✔
954

955
class PvacfuseAggregateAllEpitopes(UnmatchedSequenceAggregateAllEpitopes, metaclass=ABCMeta):
5✔
956
    def __init__(
5✔
957
        self,
958
        input_file,
959
        output_file,
960
        binding_threshold=500,
961
        percentile_threshold=None,
962
        percentile_threshold_strategy='conservative',
963
        allele_specific_binding_thresholds=False,
964
        top_score_metric="median",
965
        read_support=5,
966
        expn_val=0.1,
967
        aggregate_inclusion_binding_threshold=5000,
968
        aggregate_inclusion_count_limit=15,
969
    ):
970
        UnmatchedSequenceAggregateAllEpitopes.__init__(
5✔
971
            self,
972
            input_file,
973
            output_file,
974
            binding_threshold=binding_threshold,
975
            percentile_threshold=percentile_threshold,
976
            percentile_threshold_strategy = percentile_threshold_strategy,
977
            allele_specific_binding_thresholds=allele_specific_binding_thresholds,
978
            top_score_metric=top_score_metric,
979
            aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
980
            aggregate_inclusion_count_limit=aggregate_inclusion_count_limit,
981
        )
982
        self.read_support = read_support
5✔
983
        self.expn_val = expn_val
5✔
984

985
    def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count):
5✔
986
        tier = self.get_tier(mutation=best, vaf_clonal=vaf_clonal)
5✔
987

988
        out_dict = { 'ID': key }
5✔
989
        out_dict.update({ k.replace('HLA-', ''):v for k,v in sorted(hla.items()) })
5✔
990
        gene = best['Gene Name'] if 'Gene Name' in best else 'NA'
5✔
991
        transcript = best['Transcript'] if 'Transcript' in best else 'NA'
5✔
992
        problematic_positions = best['Problematic Positions'] if 'Problematic Positions' in best else 'None'
5✔
993
        out_dict.update({
5✔
994
            'Gene': gene,
995
            'Best Peptide': best["Epitope Seq"],
996
            'Best Transcript': transcript,
997
            'Allele': best['HLA Allele'],
998
            'Prob Pos': problematic_positions,
999
            'Num Included Peptides': included_peptide_count,
1000
            'Num Passing Peptides': good_binder_count,
1001
            'IC50 MT': best["{} IC50 Score".format(self.top_score_metric)],
1002
            '%ile MT': best["{} Percentile".format(self.top_score_metric)],
1003
            'Expr': best['Expression'],
1004
            'Read Support': best['Read Support'],
1005
            'Tier': tier,
1006
            'Evaluation': 'Pending',
1007
        })
1008
        return out_dict
5✔
1009

1010
    def get_tier(self, mutation, vaf_clonal):
5✔
1011
        if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
1012
            binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
×
1013
        else:
1014
            binding_threshold = self.binding_threshold
5✔
1015
        
1016
        ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
5✔
1017
        percentile_pass = (
5✔
1018
            self.percentile_threshold is None or 
1019
            mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
1020
        )
1021
        binding_pass = (
5✔
1022
            (ic50_pass and percentile_pass) 
1023
            if self.percentile_threshold_strategy == 'conservative' 
1024
            else (ic50_pass or percentile_pass)
1025
        )
1026

1027
        low_read_support = False
5✔
1028
        if mutation['Read Support'] != 'NA' and mutation['Read Support'] < self.read_support:
5✔
1029
            low_read_support = True
5✔
1030

1031
        low_expr = False
5✔
1032
        if mutation['Expression'] != 'NA' and mutation['Expression'] < self.expn_val:
5✔
1033
            low_expr = True
×
1034

1035
        if (binding_pass and
5✔
1036
          not low_read_support and
1037
          not low_expr):
1038
            return "Pass"
5✔
1039

1040
        #low read support
1041
        if (binding_pass and
5✔
1042
          low_read_support and
1043
          not low_expr):
1044
            return "LowReadSupport"
5✔
1045

1046
        #low expression
1047
        if (binding_pass and
5✔
1048
          not low_read_support and
1049
          low_expr):
1050
            return "LowExpr"
×
1051

1052
        return "Poor"
5✔
1053

1054
    def sort_table(self, df):
5✔
1055
        df.sort_values(by=["IC50 MT", "ID"], inplace=True, ascending=[True, True])
5✔
1056

1057
        tier_sorter = ["Pass", "LowReadSupport", "LowExpr", "Poor"]
5✔
1058
        sorter_index = dict(zip(tier_sorter,range(len(tier_sorter))))
5✔
1059
        df["rank_tier"] = df['Tier'].map(sorter_index)
5✔
1060

1061
        df["rank_ic50"] = df["IC50 MT"].rank(ascending=True, method='dense')
5✔
1062
        df["rank_expr"] = pd.to_numeric(df["Expr"], errors='coerce').rank(ascending=False, method='dense', na_option="bottom")
5✔
1063
        df["rank"] = df["rank_ic50"] + df["rank_expr"]
5✔
1064

1065
        df.sort_values(by=["rank_tier", "rank", "IC50 MT", "ID"], inplace=True, ascending=True)
5✔
1066

1067
        df.drop(labels='rank_tier', axis=1, inplace=True)
5✔
1068
        df.drop(labels='rank_ic50', axis=1, inplace=True)
5✔
1069
        df.drop(labels='rank_expr', axis=1, inplace=True)
5✔
1070
        df.drop(labels='rank', axis=1, inplace=True)
5✔
1071
        return df
5✔
1072

1073

1074
class PvacbindAggregateAllEpitopes(UnmatchedSequenceAggregateAllEpitopes, metaclass=ABCMeta):
5✔
1075
    def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count):
5✔
1076
        tier = self.get_tier(mutation=best, vaf_clonal=vaf_clonal)
5✔
1077

1078
        out_dict = { 'ID': key }
5✔
1079
        out_dict.update({ k.replace('HLA-', ''):v for k,v in sorted(hla.items()) })
5✔
1080
        problematic_positions = best['Problematic Positions'] if 'Problematic Positions' in best else 'None'
5✔
1081
        out_dict.update({
5✔
1082
            'Best Peptide': best["Epitope Seq"],
1083
            'Prob Pos': problematic_positions,
1084
            'Num Included Peptides': included_peptide_count,
1085
            'Num Passing Peptides': good_binder_count,
1086
            'IC50 MT': best["{} IC50 Score".format(self.top_score_metric)],
1087
            '%ile MT': best["{} Percentile".format(self.top_score_metric)],
1088
            'Tier': tier,
1089
            'Evaluation': 'Pending',
1090
        })
1091
        return out_dict
5✔
1092

1093
    def get_tier(self, mutation, vaf_clonal):
5✔
1094
        if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
1095
            binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
×
1096
        else:
1097
            binding_threshold = self.binding_threshold
5✔
1098
        
1099
        ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
5✔
1100
        percentile_pass = (
5✔
1101
            self.percentile_threshold is None or 
1102
            mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
1103
        )
1104
        binding_pass = (
5✔
1105
            (ic50_pass and percentile_pass) 
1106
            if self.percentile_threshold_strategy == 'conservative' 
1107
            else (ic50_pass or percentile_pass)
1108
        )
1109

1110
        if binding_pass:
5✔
1111
            return "Pass"
5✔
1112

1113
        return "Poor"
5✔
1114

1115

1116
class PvacspliceAggregateAllEpitopes(PvacbindAggregateAllEpitopes, metaclass=ABCMeta):
5✔
1117
    def __init__(
5✔
1118
        self,
1119
        input_file,
1120
        output_file,
1121
        tumor_purity=None,
1122
        binding_threshold=500,
1123
        percentile_threshold=None,
1124
        percentile_threshold_strategy='conservative',
1125
        allele_specific_binding_thresholds=False,
1126
        aggregate_inclusion_binding_threshold=5000,
1127
        aggregate_inclusion_count_limit=15,
1128
        top_score_metric="median",
1129
        trna_vaf=0.25,
1130
        trna_cov=10,
1131
        expn_val=1,
1132
        maximum_transcript_support_level=1,
1133
    ):
1134
        PvacbindAggregateAllEpitopes.__init__(
5✔
1135
            self,
1136
            input_file,
1137
            output_file,
1138
            binding_threshold=binding_threshold,
1139
            percentile_threshold=percentile_threshold,
1140
            percentile_threshold_strategy = percentile_threshold_strategy,
1141
            allele_specific_binding_thresholds=allele_specific_binding_thresholds,
1142
            aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
1143
            aggregate_inclusion_count_limit=aggregate_inclusion_count_limit,
1144
            top_score_metric=top_score_metric,
1145
        )
1146
        self.tumor_purity = tumor_purity
5✔
1147
        self.trna_vaf = trna_vaf
5✔
1148
        self.trna_cov = trna_cov
5✔
1149
        self.expn_val = expn_val
5✔
1150
        self.allele_expr_threshold = trna_vaf * expn_val * 10
5✔
1151
        self.maximum_transcript_support_level = maximum_transcript_support_level
5✔
1152

1153
    # pvacbind w/ Index instead of Mutation
1154
    def get_list_unique_mutation_keys(self, df):
5✔
1155
        keys = df["Index"].values.tolist()
5✔
1156
        return sorted(list(set(keys)))
5✔
1157

1158
    # pvacbind w/ Index instead of Mutation
1159
    def read_input_file(self, used_columns, dtypes):
5✔
1160
        return pd.read_csv(self.input_file, delimiter='\t', float_precision='high', low_memory=False,
5✔
1161
                           na_values="NA", keep_default_na=False, dtype={"Index": str})
1162

1163
    # pvacbind w/ Index instead of Mutation
1164
    def get_sub_df(self, all_epitopes_df, df_key):
5✔
1165
        df = (all_epitopes_df[lambda x: (x['Index'] == df_key)]).copy()
5✔
1166
        return df, df_key
5✔
1167

1168
    def get_tier(self, mutation, vaf_clonal):
5✔
1169
        if self.use_allele_specific_binding_thresholds and mutation['HLA Allele'] in self.allele_specific_binding_thresholds:
5✔
1170
            binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
×
1171
        else:
1172
            binding_threshold = self.binding_threshold
5✔
1173
        
1174
        ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
5✔
1175
        percentile_pass = (
5✔
1176
            self.percentile_threshold is None or 
1177
            mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
1178
        )
1179
        binding_pass = (
5✔
1180
            (ic50_pass and percentile_pass) 
1181
            if self.percentile_threshold_strategy == 'conservative' 
1182
            else (ic50_pass or percentile_pass)
1183
        )
1184

1185
        tsl_pass = True
5✔
1186
        if mutation["Transcript Support Level"] == "Not Supported":
5✔
1187
            pass
×
1188
        elif pd.isna(mutation["Transcript Support Level"]):
5✔
1189
            tsl_pass = False
5✔
1190
        else:
1191
            if mutation["Transcript Support Level"] > self.maximum_transcript_support_level:
5✔
1192
                tsl_pass = False
5✔
1193

1194
        allele_expr_pass = True
5✔
1195
        if (mutation['Tumor RNA VAF'] != 'NA' and mutation['Gene Expression'] != 'NA' and
5✔
1196
            mutation['Tumor RNA VAF'] * mutation['Gene Expression'] <= self.allele_expr_threshold):
1197
            allele_expr_pass = False
5✔
1198

1199
        vaf_clonal_pass = True
5✔
1200
        if (mutation['Tumor DNA VAF'] != 'NA' and mutation['Tumor DNA VAF'] < (vaf_clonal/2)):
5✔
1201
            vaf_clonal_pass = False
×
1202

1203
        #writing these out as explicitly as possible for ease of understanding
1204
        if (binding_pass and
5✔
1205
           allele_expr_pass and
1206
           vaf_clonal_pass and
1207
           tsl_pass):
1208
            return "Pass" 
5✔
1209

1210
        #not in founding clone
1211
        if (binding_pass and
5✔
1212
           allele_expr_pass and
1213
           not vaf_clonal_pass and
1214
           tsl_pass):
1215
            return "Subclonal"
×
1216

1217
        #relax expression.  Include sites that have reasonable vaf but zero overall gene expression
1218
        lowexpr=False
5✔
1219
        if mutation['Tumor RNA VAF'] != 'NA' and mutation['Gene Expression'] != 'NA' and ['Tumor RNA Depth'] != 'NA':
5✔
1220
            if ((mutation["Tumor RNA VAF"] * mutation["Gene Expression"] > 0) or
5✔
1221
               (mutation["Gene Expression"] == 0 and
1222
               mutation["Tumor RNA Depth"] > self.trna_cov and
1223
               mutation["Tumor RNA VAF"] > self.trna_vaf)):
1224
                lowexpr=True
5✔
1225

1226
        #if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor)
1227
        if (binding_pass and
5✔
1228
           lowexpr and
1229
           vaf_clonal_pass and
1230
           tsl_pass):
1231
            return "LowExpr"
5✔
1232

1233
        #zero expression
1234
        if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr:
5✔
1235
            return "NoExpr"
×
1236

1237
        #everything else
1238
        return "Poor"
5✔
1239

1240
    def sort_table(self, df):
5✔
1241
        #make sure the tiers sort in the expected order
1242
        tier_sorter = ["Pass", "LowExpr", "Subclonal", "Poor", "NoExpr"]
5✔
1243
        sorter_index = dict(zip(tier_sorter,range(len(tier_sorter))))
5✔
1244
        df["rank_tier"] = df['Tier'].map(sorter_index)
5✔
1245

1246
        df["rank_ic50"] = df["IC50 MT"].rank(ascending=True, method='dense')
5✔
1247
        df["rank_expr"] = pd.to_numeric(df["Allele Expr"], errors='coerce').rank(ascending=False, method='dense', na_option="bottom")
5✔
1248
        df["rank"] = df["rank_ic50"] + df["rank_expr"]
5✔
1249

1250
        df.sort_values(by=["rank_tier", "rank", "Gene", "Transcript", "AA Change"], inplace=True, ascending=True)
5✔
1251

1252
        df.drop(labels='rank_tier', axis=1, inplace=True)
5✔
1253
        df.drop(labels='rank_ic50', axis=1, inplace=True)
5✔
1254
        df.drop(labels='rank_expr', axis=1, inplace=True)
5✔
1255
        df.drop(labels='rank', axis=1, inplace=True)
5✔
1256

1257
        return df
5✔
1258

1259
    # pvacbind w/ vaf and expression info included
1260
    def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_peptide_count, good_binder_count):
5✔
1261
        tier = self.get_tier(mutation=best, vaf_clonal=vaf_clonal)
5✔
1262

1263
        out_dict = {'ID': key}
5✔
1264
        out_dict.update({k.replace('HLA-', ''): v for k, v in sorted(hla.items())})
5✔
1265

1266
        gene = best['Gene Name'] if 'Gene Name' in best else 'NA'
5✔
1267
        transcript = best['Transcript'] if 'Transcript' in best else 'NA'
5✔
1268
        problematic_positions = best['Problematic Positions'] if 'Problematic Positions' in best else 'None'
5✔
1269
        tsl = best['Transcript Support Level'] if best['Transcript Support Level'] == "Not Supported" or pd.isna(best['Transcript Support Level']) else str(int(best['Transcript Support Level']))
5✔
1270
        allele_expr = self.calculate_allele_expr(best)
5✔
1271

1272
        out_dict.update({
5✔
1273
            'Gene': gene,
1274
            'Transcript': transcript,
1275
            'Junction Name': best['Junction'],
1276
            'AA Change': best['Amino Acid Change'],
1277
            'Best Peptide': best["Epitope Seq"],
1278
            'TSL': tsl,
1279
            'Allele': best["HLA Allele"],
1280
            'Pos': best['Protein Position'],
1281
            'Prob Pos': problematic_positions,
1282
            'Num Included Peptides': included_peptide_count,
1283
            'Num Passing Peptides': good_binder_count,
1284
            'IC50 MT': best["{} IC50 Score".format(self.top_score_metric)],
1285
            '%ile MT': best["{} Percentile".format(self.top_score_metric)],
1286
            'RNA Expr': best["Gene Expression"],
1287
            'RNA VAF': best["Tumor RNA VAF"],
1288
            'Allele Expr': allele_expr,
1289
            'RNA Depth': best["Tumor RNA Depth"],
1290
            'DNA VAF': best["Tumor DNA VAF"],
1291
            'Tier': tier,
1292
            'Evaluation': 'Pending',
1293
        })
1294
        return out_dict
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc