• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 15877160304

25 Jun 2025 01:05PM UTC coverage: 86.038% (-0.2%) from 86.224%
15877160304

Pull #1255

github

web-flow
Merge 652286d65 into 5e8083eb6
Pull Request #1255: Fix bug in processing inframe deletions during aggregation

0 of 1 new or added line in 1 file covered. (0.0%)

19 existing lines in 2 files now uncovered.

7863 of 9139 relevant lines covered (86.04%)

4.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.27
/pvactools/lib/prediction_class.py
1
from abc import ABCMeta, abstractmethod
5✔
2
import os
5✔
3
import csv
5✔
4
import sys
5✔
5
import inspect
5✔
6
import requests
5✔
7
import re
5✔
8
import pandas as pd
5✔
9
import time
5✔
10
from subprocess import run, DEVNULL, STDOUT
5✔
11
import tempfile
5✔
12
from collections import defaultdict
5✔
13
from Bio import SeqIO
5✔
14
import random
5✔
15
import uuid
5✔
16
import io
5✔
17
from datetime import datetime
5✔
18

19
import pvactools.lib.run_utils
5✔
20

21
class IEDB(metaclass=ABCMeta):
5✔
22
    @classmethod
5✔
23
    def iedb_prediction_methods(cls):
4✔
24
        return [prediction_class().iedb_prediction_method for prediction_class in cls.prediction_classes()]
×
25

26
    @abstractmethod
5✔
27
    def parse_iedb_allele_file(self):
4✔
28
        pass
×
29

30
    @abstractmethod
5✔
31
    def iedb_executable_params(self, args):
4✔
32
        pass
×
33

34
    @property
5✔
35
    @abstractmethod
5✔
36
    def iedb_prediction_method(self):
4✔
37
        pass
×
38

39
    @property
5✔
40
    @abstractmethod
5✔
41
    def url(self):
4✔
42
        pass
×
43

44
    @classmethod
5✔
45
    def filter_response(cls, response_text):
4✔
46
        lines = response_text.splitlines()
5✔
47
        remaining_lines = lines.copy()
5✔
48
        for line in lines:
5✔
49
            if line.startswith(b"allele"):
5✔
50
                return b"\n".join(remaining_lines)
5✔
51
            else:
52
                remaining_lines.pop(0)
5✔
53

54
    def check_length_valid_for_allele(self, length, allele):
5✔
55
        return True
×
56

57
    def check_iedb_api_response_matches(self, input_file, response_text, epitope_length):
5✔
58
        input_peptides = set()
5✔
59
        with open(input_file) as input_fh:
5✔
60
            for record in SeqIO.parse(input_fh, "fasta"):
5✔
61
                seq = record.seq
5✔
62
                input_peptides.update([seq[i:i+epitope_length] for i in range(0, len(seq)-epitope_length+1)])
5✔
63

64
        output_peptides = set()
5✔
65
        for record in csv.DictReader(io.StringIO(response_text), delimiter="\t"):
5✔
66
            if 'peptide' in record:
5✔
67
                output_peptides.add(record['peptide'])
5✔
68

69
        return (input_peptides == output_peptides, input_peptides, output_peptides)
5✔
70

71

72
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
73
        if log_dir is not None:
5✔
74
            log_file = os.path.join(log_dir, "iedb.log")
5✔
75
        else:
76
            log_file = None
5✔
77
        if iedb_executable_path is not None:
5✔
78
            arguments = [sys.executable]
×
79
            arguments.extend(self.iedb_executable_params(iedb_executable_path, self.iedb_prediction_method, allele, input_file, epitope_length))
×
80
            response_fh = tempfile.TemporaryFile(dir=tmp_dir)
×
81
            response = run(arguments, stdout=response_fh, check=True)
×
82
            response_fh.seek(0)
×
83
            response_text = self.filter_response(response_fh.read())
×
84
            response_fh.close()
×
85
            return (response_text, 'wb')
×
86
        else:
87
            with open(input_file, 'r') as input_fh:
5✔
88
                sequence_text = input_fh.read()
5✔
89
                data = {
5✔
90
                    'sequence_text': sequence_text,
91
                    'method':        self.iedb_prediction_method,
92
                    'allele':        allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB'),
93
                    'length':        epitope_length,
94
                    'user_tool':     'pVac-seq',
95
                }
96

97
            response_timestamp = datetime.now()
5✔
98
            response = requests.post(self.url, data=data)
5✔
99
            (peptides_match, input_peptides, output_peptides) = self.check_iedb_api_response_matches(input_file, response.text, epitope_length)
5✔
100
            retries = 0
5✔
101
            while (response.status_code == 500 or response.status_code == 403 or not peptides_match) and retries < iedb_retries:
5✔
UNCOV
102
                if response.status_code == 200 and not peptides_match:
×
UNCOV
103
                    log_text = "IEDB API Output doesn't match input. Retrying.\n"
×
UNCOV
104
                    log_text += "{}\n".format(response_timestamp)
×
UNCOV
105
                    log_text += "Inputs:\n"
×
UNCOV
106
                    log_text += "{}\n".format(data)
×
UNCOV
107
                    log_text += "Output:\n"
×
UNCOV
108
                    log_text += "{}\n".format(response.text)
×
UNCOV
109
                    if log_file:
×
UNCOV
110
                        with open(log_file, "a") as log_fh:
×
UNCOV
111
                            log_fh.write(log_text)
×
112
                    else:
113
                        print(log_text)
×
114

UNCOV
115
                random.seed(uuid.uuid4().int)
×
UNCOV
116
                time.sleep(random.randint(30,90) * retries)
×
UNCOV
117
                retries += 1
×
UNCOV
118
                print("IEDB: Retry %s of %s" % (retries, iedb_retries))
×
UNCOV
119
                response_timestamp = datetime.now()
×
UNCOV
120
                response = requests.post(self.url, data=data)
×
UNCOV
121
                (peptides_match, input_peptides, output_peptides) = self.check_iedb_api_response_matches(input_file, response.text, epitope_length)
×
122

123
            if response.status_code != 200:
5✔
124
                sys.exit("Error posting request to IEDB.\n%s" % response.text)
×
125
            if not peptides_match:
5✔
126
                log_text = "Error. IEDB API Output doesn't match input and number of retries exceeded."
×
127
                log_text += "{}\n".format(response_timestamp)
×
128
                log_text += "Inputs:\n"
×
129
                log_text += "{}\n".format(data)
×
130
                log_text += "Output:\n"
×
131
                log_text += "{}\n".format(response.text)
×
132
                if log_file:
×
133
                    with open(log_file, "a") as log_fh:
×
134
                        log_fh.write(log_text)
×
135
                else:
136
                    print(log_text)
×
137
                sys.exit("Error. IEDB API Output doesn't match input and number of retries exceeded.")
×
138

139
            output_mode = 'w'
5✔
140
            return (response.text, 'w')
5✔
141

142
class MHCnuggets(metaclass=ABCMeta):
5✔
143
    def check_length_valid_for_allele(self, length, allele):
5✔
144
        return True
×
145

146
    def valid_allele_names_for_class(self, class_type):
5✔
147
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
148
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', class_type)
5✔
149
        alleles_file_name = os.path.join(alleles_dir, "MHCnuggets.txt")
5✔
150
        with open(alleles_file_name, 'r') as fh:
5✔
151
            return list(filter(None, fh.read().split('\n')))
5✔
152

153
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, class_type, tmp_dir=None, log_dir=None):
5✔
154
        tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
5✔
155
        script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "call_mhcnuggets.py")
5✔
156
        arguments = ["python", script, input_file, allele, str(epitope_length), class_type, tmp_output_file.name]
5✔
157
        if tmp_dir:
5✔
158
            arguments.extend(['--tmp-dir', tmp_dir])
×
159
        stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
160
        try:
5✔
161
            response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
5✔
162
        except:
×
163
            stderr_fh.close()
×
164
            with open(stderr_fh.name, 'r') as fh:
×
165
                err = fh.read()
×
166
            os.unlink(stderr_fh.name)
×
167
            raise Exception("An error occurred while calling MHCnuggets:\n{}".format(err))
×
168
        stderr_fh.close()
5✔
169
        os.unlink(stderr_fh.name)
5✔
170
        tmp_output_file.close()
5✔
171
        df = pd.read_csv(tmp_output_file.name)
5✔
172
        os.unlink(tmp_output_file.name)
5✔
173
        return (df, 'pandas')
5✔
174

175
class PredictionClass(metaclass=ABCMeta):
5✔
176
    valid_allele_names_dict = {}
5✔
177
    allele_cutoff_dict = {}
5✔
178

179
    @classmethod
5✔
180
    def prediction_classes(cls):
4✔
181
        prediction_classes = []
5✔
182
        if not inspect.isabstract(cls):
5✔
183
            prediction_classes.append(cls)
5✔
184
        for subclass in cls.__subclasses__():
5✔
185
            prediction_classes.extend(subclass.prediction_classes())
5✔
186
        return prediction_classes
5✔
187

188
    @classmethod
5✔
189
    def prediction_methods(cls):
4✔
190
        return sorted([prediction_class.__name__ for prediction_class in cls.prediction_classes()])
5✔
191

192
    @classmethod
5✔
193
    def prediction_methods_with_all(cls):
4✔
194
        methods = cls.prediction_methods()
5✔
195
        methods.extend(['all', 'all_class_i', 'all_class_ii'])
5✔
196
        return methods
5✔
197

198
    @classmethod
5✔
199
    def prediction_class_for_iedb_prediction_method(cls, method):
4✔
200
        prediction_classes = cls.prediction_classes()
5✔
201
        for prediction_class in prediction_classes:
5✔
202
            prediction_class_object = prediction_class()
5✔
203
            if ( issubclass(prediction_class_object.__class__, IEDBMHCI) or issubclass(prediction_class_object.__class__, IEDBMHCII) ) and prediction_class_object.iedb_prediction_method == method:
5✔
204
                return prediction_class_object
5✔
205
        module = getattr(sys.modules[__name__], method)
5✔
206
        return module()
5✔
207

208
    @classmethod
5✔
209
    def prediction_class_name_for_iedb_prediction_method(cls, method):
4✔
210
        return cls.prediction_class_for_iedb_prediction_method(method).__class__.__name__
5✔
211

212
    @classmethod
5✔
213
    def allele_info(cls, prediction_algorithms, name_filter):
4✔
214
        alleles = defaultdict(list)
×
215
        if prediction_algorithms is None:
×
216
            prediction_classes = cls.prediction_classes()
×
217
        else:
218
            prediction_classes = map(lambda a: globals()[a], prediction_algorithms.split(','))
×
219
        for prediction_class in prediction_classes:
×
220
            for allele in prediction_class().valid_allele_names():
×
221
                if name_filter is not None:
×
222
                    if name_filter.lower() in allele.lower():
×
223
                        alleles[allele].append(prediction_class.__name__)
×
224
                else:
225
                    alleles[allele].append(prediction_class.__name__)
×
226
        info = []
×
227
        for allele, prediction_algorithms in alleles.items():
×
228
            info.append({
×
229
                'name': allele,
230
                'prediction_algorithms': prediction_algorithms,
231
            })
232
        return info
×
233

234
    @classmethod
5✔
235
    def all_valid_allele_names(cls):
4✔
236
        valid_alleles = set()
5✔
237
        for prediction_class in cls.prediction_classes():
5✔
238
            valid_alleles.update(prediction_class().valid_allele_names())
5✔
239
        return list(valid_alleles)
5✔
240

241
    @classmethod
5✔
242
    def check_alleles_valid(cls, alleles):
4✔
243
        valid_alleles = cls.all_valid_allele_names()
5✔
244
        for allele in alleles:
5✔
245
            if allele not in valid_alleles:
5✔
246
                sys.exit("Allele %s not valid. Run `pvacseq valid_alleles` for a list of valid allele names." % allele)
×
247

248
    @classmethod
5✔
249
    def allele_to_species_map(self):
4✔
250
        return {
5✔
251
            'HLA' : 'human',
252
            'DP'  : 'human',
253
            'DQ'  : 'human',
254
            'DR'  : 'human',
255
            'Atbe': 'white-fronted spider monkey',
256
            'Atfu': 'black-headed spider monkey',
257
            'BoLA': 'cow',
258
            'Caja': 'common marmoset',
259
            'Cemi': 'blue monkey',
260
            'Chae': 'grivet',
261
            'DLA' : 'dog',
262
            'Eqca': 'horse',
263
            'Gogo': 'gorilla',
264
            'H-2' : 'mouse',
265
            'H2'  : 'mouse',
266
            'Hyla': 'lar gibbon',
267
            'Lero': 'golden lion tamarin',
268
            'Maar': 'stump-tailed macaque',
269
            'Mafa': 'crab-eating macaque',
270
            'Mamu': 'rhesus macaque',
271
            'Mane': 'southern pig-tailed macaque',
272
            'Onmy': 'rainbow trout',
273
            'Ovar': 'sheep',
274
            'Paan': 'olive baboon',
275
            'Pacy': 'yellow baboon',
276
            'Paha': 'hamadryas baboon',
277
            'Papa': 'bonobo',
278
            'Patr': 'chimpanzee',
279
            'Pipi': 'white-faced saki',
280
            'Popy': 'bornean orangutan',
281
            'Safu': 'brown-mantled tamarin',
282
            'Sage': "Geoffroy's tamarin",
283
            'Samy': 'moustached tamarin',
284
            'Saoe': 'cottontop tamarin',
285
            'Sasa': 'atlantic salmon',
286
            'Sasc': 'common squirrel monkey',
287
            'SLA' : 'pig',
288
        }
289

290
    @classmethod
5✔
291
    def species_for_allele(self, allele):
4✔
292
        species = [v for k,v in PredictionClass.allele_to_species_map().items() if allele.startswith(k)]
5✔
293
        if len(species) == 1:
5✔
294
            return species[0]
5✔
295
        elif len(species) == 0:
×
296
            raise Exception("Unable to determine species for allele {}".format(allele))
×
297
        else:
298
            raise Exception("Multiple matching species found for allele {}".format(allele))
×
299

300
    @classmethod
5✔
301
    def parse_allele_cutoff_file(cls):
4✔
302
        base_dir                = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
303
        iedb_alleles_dir        = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles')
5✔
304
        allele_cutoff_file_name = os.path.join(iedb_alleles_dir, "cutoffs.csv")
5✔
305
        cutoffs = {}
5✔
306
        with open(allele_cutoff_file_name) as allele_cutoff_file:
5✔
307
            csv_reader = csv.DictReader(allele_cutoff_file)
5✔
308
            for row in csv_reader:
5✔
309
                cutoffs[row['allele']] = row['allele_specific_cutoff']
5✔
310
        return cutoffs
5✔
311

312
    @classmethod
5✔
313
    def print_all_allele_cutoffs(cls):
4✔
314
        if not cls.allele_cutoff_dict:
5✔
315
            cls.allele_cutoff_dict = cls.parse_allele_cutoff_file()
×
316
        for allele, cutoff in sorted(cls.allele_cutoff_dict.items()):
5✔
317
            print("%s\t%s" % (allele, cutoff))
5✔
318

319
    @classmethod
5✔
320
    def cutoff_for_allele(cls, allele):
4✔
321
        if not cls.allele_cutoff_dict:
5✔
322
            cls.allele_cutoff_dict = cls.parse_allele_cutoff_file()
5✔
323
        return cls.allele_cutoff_dict.get(allele, None)
5✔
324

325
    @abstractmethod
5✔
326
    def valid_allele_names(self):
4✔
327
        pass
×
328

329
    @property
5✔
330
    @abstractmethod
5✔
331
    def needs_epitope_length(self):
4✔
332
        pass
×
333

334
    def check_allele_valid(self, allele):
5✔
335
        valid_alleles = self.valid_allele_names()
×
336
        if allele not in valid_alleles:
×
337
            sys.exit("Allele %s not valid for method %s. Run `pvacseq valid_alleles %s` for a list of valid allele names." % (allele, self.__class__.__name__, self.__class__.__name__))
×
338

339

340
class MHCI(PredictionClass, metaclass=ABCMeta):
5✔
341
    @property
5✔
342
    def needs_epitope_length(self):
4✔
343
        return True
×
344

345
class DeepImmuno(MHCI):
5✔
346
    def valid_allele_names(self):
5✔
347
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
348
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
5✔
349
        alleles_file_name = os.path.join(alleles_dir, "DeepImmuno.tsv")
5✔
350
        alleles           = []
5✔
351
        with open(alleles_file_name) as alleles_file:
5✔
352
            tsv_reader = csv.DictReader(alleles_file, delimiter='\t')
5✔
353
            for row in tsv_reader:
5✔
354
                alleles.append(row['HLA'])
5✔
355
        return alleles
5✔
356

357
    def check_length_valid_for_allele(self, length, allele):
5✔
358
        return True
×
359

360
    def valid_lengths_for_allele(self, allele):
5✔
361
        return [9,10]
×
362

363
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
364
        results = pd.DataFrame()
5✔
365
        all_epitopes = []
5✔
366
        for record in SeqIO.parse(input_file, "fasta"):
5✔
367
            seq_num = record.id
5✔
368
            peptide = str(record.seq)
5✔
369
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
370
            all_epitopes.extend(epitopes.values())
5✔
371
        all_epitopes = list(set(all_epitopes))
5✔
372

373
        if len(all_epitopes) > 0:
5✔
374
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
375
            output_dir = tempfile.TemporaryDirectory(dir=tmp_dir)
5✔
376
            for epitope in all_epitopes:
5✔
377
                tmp_input_file.write("{},{}\n".format(epitope, allele.replace(':', '')))
5✔
378
            tmp_input_file.close()
5✔
379
            arguments = ['deepimmuno-cnn', '--mode', 'multiple', '--intdir', tmp_input_file.name, '--outdir', output_dir.name]
5✔
380
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
381
            try:
5✔
382
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
5✔
383
            except:
×
384
                stderr_fh.close()
×
385
                with open(stderr_fh.name, 'r') as fh:
×
386
                    err = fh.read()
×
387
                os.unlink(stderr_fh.name)
×
388
                raise Exception("An error occurred while calling DeepImmuno:\n{}".format(err))
×
389
            stderr_fh.close()
5✔
390
            os.unlink(stderr_fh.name)
5✔
391
            os.unlink(tmp_input_file.name)
5✔
392
            tmp_output_file_name = os.path.join(output_dir.name, "deepimmuno-cnn-result.txt")
5✔
393
            df = pd.read_csv(tmp_output_file_name, sep="\t")
5✔
394
            df.rename(columns={
5✔
395
                'HLA': 'allele',
396
            }, inplace=True)
397
            output_dir.cleanup()
5✔
398
            for record in SeqIO.parse(input_file, "fasta"):
5✔
399
                seq_num = record.id
5✔
400
                peptide = str(record.seq)
5✔
401
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
402
                for start, epitope in epitopes.items():
5✔
403
                    epitope_df = df[df['peptide'] == epitope]
5✔
404
                    epitope_df['seq_num'] = seq_num
5✔
405
                    epitope_df['start'] = start
5✔
406
                    results = pd.concat((results, epitope_df), axis=0)
5✔
407
        return (results, 'pandas')
5✔
408

409

410
class BigMHC(metaclass=ABCMeta):
5✔
411
    def valid_allele_names(self):
5✔
412
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
413
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
5✔
414
        alleles_file_name = os.path.join(alleles_dir, "BigMHC.txt")
5✔
415
        with open(alleles_file_name, 'r') as fh:
5✔
416
            return list(filter(None, fh.read().split('\n')))
5✔
417

418
    def check_length_valid_for_allele(self, length, allele):
5✔
419
        return True
×
420

421
    def valid_lengths_for_allele(self, allele):
5✔
422
        return [8,9,10,11,12,13,14,15]
×
423

424
    def predict_bigmhc(self, bigmhc_type, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
425
        results = pd.DataFrame()
5✔
426
        all_epitopes = []
5✔
427
        for record in SeqIO.parse(input_file, "fasta"):
5✔
428
            seq_num = record.id
5✔
429
            peptide = str(record.seq)
5✔
430
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
431
            all_epitopes.extend(epitopes.values())
5✔
432
        all_epitopes = list(set(all_epitopes))
5✔
433

434
        if len(all_epitopes) > 0:
5✔
435
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
436
            for epitope in all_epitopes:
5✔
437
                tmp_input_file.write("{}\n".format(epitope))
5✔
438
            tmp_input_file.close()
5✔
439
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
5✔
440
            arguments = ['bigmhc_predict', '-a', allele, '-i', tmp_input_file.name, '-p', '0', '-c', '0', '-o', tmp_output_file.name, '-m', bigmhc_type, '-d', 'cpu']
5✔
441
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
442
            try:
5✔
443
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
5✔
444
            except:
×
445
                stderr_fh.close()
×
446
                with open(stderr_fh.name, 'r') as fh:
×
447
                    err = fh.read()
×
448
                os.unlink(stderr_fh.name)
×
449
                raise Exception("An error occurred while calling BigMHC:\n{}".format(err))
×
450
            stderr_fh.close()
5✔
451
            os.unlink(stderr_fh.name)
5✔
452
            os.unlink(tmp_input_file.name)
5✔
453
            tmp_output_file.close()
5✔
454
            df = pd.read_csv(tmp_output_file.name)
5✔
455
            df.rename(columns={
5✔
456
                'pep': 'peptide',
457
                'mhc': 'allele',
458
            }, inplace=True)
459
            os.unlink(tmp_output_file.name)
5✔
460
            for record in SeqIO.parse(input_file, "fasta"):
5✔
461
                seq_num = record.id
5✔
462
                peptide = str(record.seq)
5✔
463
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
464
                for start, epitope in epitopes.items():
5✔
465
                    epitope_df = df[df['peptide'] == epitope]
5✔
466
                    epitope_df['seq_num'] = seq_num
5✔
467
                    epitope_df['start'] = start
5✔
468
                    results = pd.concat((results, epitope_df), axis=0)
5✔
469
        return (results, 'pandas')
5✔
470

471
class BigMHC_EL(BigMHC, MHCI):
5✔
472
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
473
        return self.predict_bigmhc('el', input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None)
5✔
474

475
class BigMHC_IM(BigMHC, MHCI):
5✔
476
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
477
        return self.predict_bigmhc('im', input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None)
5✔
478

479
class MHCflurry(MHCI):
5✔
480
    def valid_allele_names(self):
5✔
481
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
482
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
5✔
483
        alleles_file_name = os.path.join(alleles_dir, "MHCflurry.txt")
5✔
484
        with open(alleles_file_name, 'r') as fh:
5✔
485
            return list(filter(None, fh.read().split('\n')))
5✔
486

487
    def check_length_valid_for_allele(self, length, allele):
5✔
488
        return True
×
489

490
    def valid_lengths_for_allele(self, allele):
5✔
491
        return [8,9,10,11,12,13,14,15]
×
492

493
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
494
        results = pd.DataFrame()
5✔
495
        all_epitopes = []
5✔
496
        for record in SeqIO.parse(input_file, "fasta"):
5✔
497
            seq_num = record.id
5✔
498
            peptide = str(record.seq)
5✔
499
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
500
            all_epitopes.extend(epitopes.values())
5✔
501

502
        all_epitopes = list(set(all_epitopes))
5✔
503
        if len(all_epitopes) > 0:
5✔
504
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
5✔
505
            arguments = ["mhcflurry-predict", "--alleles", allele, "--out", tmp_output_file.name, "--peptides"]
5✔
506
            arguments.extend(all_epitopes)
5✔
507
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
5✔
508
            try:
5✔
509
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
5✔
510
            except:
×
511
                stderr_fh.close()
×
512
                with open(stderr_fh.name, 'r') as fh:
×
513
                    err = fh.read()
×
514
                os.unlink(stderr_fh.name)
×
515
                raise Exception("An error occurred while calling MHCflurry:\n{}".format(err))
×
516
            stderr_fh.close()
5✔
517
            os.unlink(stderr_fh.name)
5✔
518
            tmp_output_file.close()
5✔
519
            df = pd.read_csv(tmp_output_file.name)
5✔
520
            os.unlink(tmp_output_file.name)
5✔
521
            df.rename(columns={
5✔
522
                'mhcflurry_prediction': 'ic50',
523
                'mhcflurry_affinity': 'ic50',
524
                'mhcflurry_prediction_percentile': 'percentile',
525
                'mhcflurry_affinity_percentile': 'percentile'
526
            }, inplace=True)
527
            for record in SeqIO.parse(input_file, "fasta"):
5✔
528
                seq_num = record.id
5✔
529
                peptide = str(record.seq)
5✔
530
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
5✔
531
                for start, epitope in epitopes.items():
5✔
532
                    epitope_df = df[df['peptide'] == epitope]
5✔
533
                    epitope_df['seq_num'] = seq_num
5✔
534
                    epitope_df['start'] = start
5✔
535
                    results = pd.concat((results, epitope_df), axis=0)
5✔
536
        return (results, 'pandas')
5✔
537

538
class MHCflurryEL(MHCflurry):
5✔
539
    pass
5✔
540

541
class MHCnuggetsI(MHCI, MHCnuggets):
5✔
542
    def valid_allele_names(self):
5✔
543
        return self.valid_allele_names_for_class('class_i')
5✔
544

545
    def valid_lengths_for_allele(self, allele):
5✔
546
        return [8,9,10,11,12,13,14,15]
×
547

548
    def mhcnuggets_allele(self, allele):
5✔
549
        return allele.replace('*', '')
×
550

551
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
552
        return MHCnuggets.predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, 'I', tmp_dir=tmp_dir)
5✔
553

554
class IEDBMHCI(MHCI, IEDB, metaclass=ABCMeta):
5✔
555
    @property
5✔
556
    def url(self):
4✔
557
        return 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'
5✔
558

559
    def parse_iedb_allele_file(self):
5✔
560
        #Ultimately we probably want this method to call out to IEDB but their command is currently broken
561
        #curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
562
        base_dir               = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
563
        iedb_alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
5✔
564
        iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % self.iedb_prediction_method)
5✔
565
        alleles = {}
5✔
566
        with open(iedb_alleles_file_name) as iedb_alleles_file:
5✔
567
            tsv_reader = csv.DictReader(iedb_alleles_file, delimiter='\t')
5✔
568
            for row in tsv_reader:
5✔
569
                allele = row['MHC']
5✔
570
                if allele not in alleles.keys():
5✔
571
                    alleles[allele] = []
5✔
572
                alleles[allele].append(int(row['PeptideLength']))
5✔
573
        return alleles
5✔
574

575
    def valid_allele_names(self):
5✔
576
        method = self.iedb_prediction_method
5✔
577
        if not self.valid_allele_names_dict:
5✔
578
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
5✔
579
        return self.valid_allele_names_dict.keys()
5✔
580

581
    def valid_lengths_for_allele(self, allele):
5✔
582
        method = self.iedb_prediction_method
5✔
583
        if not self.valid_allele_names_dict:
5✔
584
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
×
585
        return self.valid_allele_names_dict[allele]
5✔
586

587
    def check_length_valid_for_allele(self, length, allele):
5✔
588
        valid_lengths = self.valid_lengths_for_allele(allele)
×
589
        if length not in valid_lengths:
×
590
            sys.exit("Length %s not valid for allele %s and method %s." % (length, allele, self.iedb_prediction_method))
×
591

592
    def iedb_executable_params(self, iedb_executable_path, method, allele, input_file, epitope_length):
5✔
593
        return [iedb_executable_path, method, allele, str(epitope_length), input_file]
×
594

595
class NetMHC(IEDBMHCI):
5✔
596
    @property
5✔
597
    def iedb_prediction_method(self):
4✔
598
        return 'ann'
5✔
599

600
class NetMHCpan(IEDBMHCI):
5✔
601
    @property
5✔
602
    def iedb_prediction_method(self):
4✔
603
        return 'netmhcpan'
5✔
604

605
class NetMHCpanEL(IEDBMHCI):
5✔
606
    @property
5✔
607
    def iedb_prediction_method(self):
4✔
608
        return 'netmhcpan_el'
5✔
609

610
class SMMPMBEC(IEDBMHCI):
5✔
611
    @property
5✔
612
    def iedb_prediction_method(self):
4✔
613
        return 'smmpmbec'
5✔
614

615
class SMM(IEDBMHCI):
5✔
616
    @property
5✔
617
    def iedb_prediction_method(self):
4✔
618
        return 'smm'
5✔
619

620
class NetMHCcons(IEDBMHCI):
5✔
621
    @property
5✔
622
    def iedb_prediction_method(self):
4✔
623
        return 'netmhccons'
5✔
624

625
class PickPocket(IEDBMHCI):
5✔
626
    @property
5✔
627
    def iedb_prediction_method(self):
4✔
628
        return 'pickpocket'
5✔
629

630
class MHCII(PredictionClass, metaclass=ABCMeta):
5✔
631
    @property
5✔
632
    def needs_epitope_length(self):
4✔
633
        return False
×
634

635
class MHCnuggetsII(MHCII, MHCnuggets):
5✔
636
    def valid_allele_names(self):
5✔
637
        return self.valid_allele_names_for_class('class_ii')
5✔
638

639
    def valid_lengths_for_allele(self, allele):
5✔
640
        return [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
×
641

642
    def mhcnuggets_allele(self,allele):
5✔
643
        return "HLA-{}".format(allele).replace('*', '')
×
644

645
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
5✔
646
        return MHCnuggets.predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, 'II', tmp_dir=tmp_dir)
5✔
647

648
class IEDBMHCII(MHCII, IEDB, metaclass=ABCMeta):
5✔
649
    @property
5✔
650
    def url(self):
4✔
651
        return 'http://tools-cluster-interface.iedb.org/tools_api/mhcii/'
5✔
652

653
    def parse_iedb_allele_file(self):
5✔
654
        #Ultimately we probably want this method to call out to IEDB but their command is currently broken
655
        #curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
656
        file_name = next(
5✔
657
            (name for name in ["netmhciipan", "netmhciipan_el"] if name in self.iedb_prediction_method),
658
            self.iedb_prediction_method
659
        )
660
        base_dir               = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
5✔
661
        iedb_alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_ii')
5✔
662
        iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % file_name)
5✔
663
        alleles = []
5✔
664
        with open(iedb_alleles_file_name) as iedb_alleles_file:
5✔
665
            for row in iedb_alleles_file:
5✔
666
                alleles.append(row.rstrip())
5✔
667
        return alleles
5✔
668

669
    def valid_lengths_for_allele(self, allele):
5✔
670
        return [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
5✔
671

672
    def valid_allele_names(self):
5✔
673
        method = self.iedb_prediction_method
5✔
674
        if not self.valid_allele_names_dict:
5✔
675
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
5✔
676
        return self.valid_allele_names_dict
5✔
677

678
    def iedb_executable_params(self, iedb_executable_path, method, allele, input_file, epitope_length):
5✔
679
        allele = allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB')
×
680
        return [iedb_executable_path, method, allele, input_file, str(epitope_length)]
×
681

682
class NetMHCIIVersion:
5✔
683
    netmhciipan_version = None
5✔
684

685
class NetMHCIIpan(IEDBMHCII):
5✔
686
    @property
5✔
687
    def iedb_prediction_method(self):
4✔
688
        if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
5✔
689
            return 'netmhciipan_ba-' + NetMHCIIVersion.netmhciipan_version
5✔
690
        return 'netmhciipan_ba'
5✔
691

692
class NetMHCIIpanEL(IEDBMHCII):
5✔
693
    @property
5✔
694
    def iedb_prediction_method(self):
4✔
695
        if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
5✔
696
            return 'netmhciipan_el-' + NetMHCIIVersion.netmhciipan_version
5✔
697
        return 'netmhciipan_el'
5✔
698

699
class NNalign(IEDBMHCII):
5✔
700
    @property
5✔
701
    def iedb_prediction_method(self):
4✔
702
        return 'nn_align'
5✔
703

704
class SMMalign(IEDBMHCII):
5✔
705
    @property
5✔
706
    def iedb_prediction_method(self):
4✔
707
        return 'smm_align'
5✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc