• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / pVACtools / 18038500289

26 Sep 2025 01:01PM UTC coverage: 82.716% (-0.8%) from 83.548%
18038500289

Pull #1307

github

web-flow
Merge bab58f8f7 into 9aa720e9d
Pull Request #1307: Add support for MixMHCpred and PRIME prediction algorithms

199 of 307 new or added lines in 2 files covered. (64.82%)

39 existing lines in 2 files now uncovered.

8997 of 10877 relevant lines covered (82.72%)

2.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

49.92
/pvactools/lib/prediction_class.py
1
from abc import ABCMeta, abstractmethod
3✔
2
import os
3✔
3
import csv
3✔
4
import sys
3✔
5
import inspect
3✔
6
import requests
3✔
7
import re
3✔
8
import pandas as pd
3✔
9
import time
3✔
10
from subprocess import run, DEVNULL, STDOUT
3✔
11
import tempfile
3✔
12
from collections import defaultdict
3✔
13
from Bio import SeqIO
3✔
14
import random
3✔
15
import uuid
3✔
16
import io
3✔
17
from datetime import datetime
3✔
18

19
import pvactools.lib.run_utils
3✔
20

21
class IEDB(metaclass=ABCMeta):
3✔
22
    @classmethod
3✔
23
    def iedb_prediction_methods(cls):
3✔
24
        return [prediction_class().iedb_prediction_method for prediction_class in cls.prediction_classes()]
×
25

26
    @abstractmethod
3✔
27
    def parse_iedb_allele_file(self):
3✔
28
        pass
×
29

30
    @abstractmethod
3✔
31
    def iedb_executable_params(self, args):
3✔
32
        pass
×
33

34
    @property
3✔
35
    @abstractmethod
3✔
36
    def iedb_prediction_method(self):
3✔
37
        pass
×
38

39
    @property
3✔
40
    @abstractmethod
3✔
41
    def url(self):
3✔
42
        pass
×
43

44
    @classmethod
3✔
45
    def filter_response(cls, response_text):
3✔
46
        lines = response_text.splitlines()
×
47
        remaining_lines = lines.copy()
×
48
        for line in lines:
×
49
            if line.startswith(b"allele"):
×
50
                return b"\n".join(remaining_lines)
×
51
            else:
52
                remaining_lines.pop(0)
×
53

54
    def check_length_valid_for_allele(self, length, allele):
3✔
55
        return True
×
56

57
    def check_iedb_api_response_matches(self, input_file, response_text, epitope_length):
3✔
58
        input_peptides = set()
3✔
59
        with open(input_file) as input_fh:
3✔
60
            for record in SeqIO.parse(input_fh, "fasta"):
3✔
61
                seq = record.seq
3✔
62
                input_peptides.update([seq[i:i+epitope_length] for i in range(0, len(seq)-epitope_length+1)])
3✔
63

64
        output_peptides = set()
3✔
65
        for record in csv.DictReader(io.StringIO(response_text), delimiter="\t"):
3✔
66
            if 'peptide' in record:
3✔
67
                output_peptides.add(record['peptide'])
3✔
68

69
        return (input_peptides == output_peptides, input_peptides, output_peptides)
3✔
70

71

72
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
73
        if log_dir is not None:
3✔
74
            log_file = os.path.join(log_dir, "iedb.log")
3✔
75
        else:
76
            log_file = None
×
77
        if iedb_executable_path is not None:
3✔
78
            arguments = [sys.executable]
×
79
            arguments.extend(self.iedb_executable_params(iedb_executable_path, self.iedb_prediction_method, allele, input_file, epitope_length))
×
80
            response_fh = tempfile.TemporaryFile(dir=tmp_dir)
×
81
            response = run(arguments, stdout=response_fh, check=True)
×
82
            response_fh.seek(0)
×
83
            response_text = self.filter_response(response_fh.read())
×
84
            response_fh.close()
×
85
            return (response_text, 'wb')
×
86
        else:
87
            with open(input_file, 'r') as input_fh:
3✔
88
                sequence_text = input_fh.read()
3✔
89
                data = {
3✔
90
                    'sequence_text': sequence_text,
91
                    'method':        self.iedb_prediction_method,
92
                    'allele':        allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB'),
93
                    'length':        epitope_length,
94
                    'user_tool':     'pVac-seq',
95
                }
96

97
            response_timestamp = datetime.now()
3✔
98
            response = requests.post(self.url, data=data)
3✔
99
            (peptides_match, input_peptides, output_peptides) = self.check_iedb_api_response_matches(input_file, response.text, epitope_length)
3✔
100
            retries = 0
3✔
101
            while (response.status_code == 500 or response.status_code == 403 or not peptides_match) and retries < iedb_retries:
3✔
UNCOV
102
                if response.status_code == 200 and not peptides_match:
×
103
                    log_text = "IEDB API Output doesn't match input. Retrying.\n"
×
104
                    log_text += "{}\n".format(response_timestamp)
×
105
                    log_text += "Inputs:\n"
×
106
                    log_text += "{}\n".format(data)
×
107
                    log_text += "Output:\n"
×
108
                    log_text += "{}\n".format(response.text)
×
109
                    if log_file:
×
110
                        with open(log_file, "a") as log_fh:
×
111
                            log_fh.write(log_text)
×
112
                    else:
113
                        print(log_text)
×
114

UNCOV
115
                random.seed(uuid.uuid4().int)
×
UNCOV
116
                time.sleep(random.randint(30,90) * retries)
×
UNCOV
117
                retries += 1
×
UNCOV
118
                print("IEDB: Retry %s of %s" % (retries, iedb_retries))
×
UNCOV
119
                response_timestamp = datetime.now()
×
UNCOV
120
                response = requests.post(self.url, data=data)
×
UNCOV
121
                (peptides_match, input_peptides, output_peptides) = self.check_iedb_api_response_matches(input_file, response.text, epitope_length)
×
122

123
            if response.status_code != 200:
3✔
124
                sys.exit("Error posting request to IEDB.\n%s" % response.text)
×
125
            if not peptides_match:
3✔
126
                log_text = "Error. IEDB API Output doesn't match input and number of retries exceeded."
×
127
                log_text += "{}\n".format(response_timestamp)
×
128
                log_text += "Inputs:\n"
×
129
                log_text += "{}\n".format(data)
×
130
                log_text += "Output:\n"
×
131
                log_text += "{}\n".format(response.text)
×
132
                if log_file:
×
133
                    with open(log_file, "a") as log_fh:
×
134
                        log_fh.write(log_text)
×
135
                else:
136
                    print(log_text)
×
137
                sys.exit("Error. IEDB API Output doesn't match input and number of retries exceeded.")
×
138

139
            output_mode = 'w'
3✔
140
            return (response.text, 'w')
3✔
141

142
class MHCnuggets(metaclass=ABCMeta):
3✔
143
    def check_length_valid_for_allele(self, length, allele):
3✔
144
        return True
×
145

146
    def valid_allele_names_for_class(self, class_type):
3✔
147
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
148
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', class_type)
3✔
149
        alleles_file_name = os.path.join(alleles_dir, "MHCnuggets.txt")
3✔
150
        with open(alleles_file_name, 'r') as fh:
3✔
151
            return list(filter(None, fh.read().split('\n')))
3✔
152

153
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, class_type, tmp_dir=None, log_dir=None):
3✔
154
        tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
×
155
        script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "call_mhcnuggets.py")
×
156
        arguments = ["python", script, input_file, allele, str(epitope_length), class_type, tmp_output_file.name]
×
157
        if tmp_dir:
×
158
            arguments.extend(['--tmp-dir', tmp_dir])
×
159
        stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
160
        try:
×
161
            response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
162
        except:
×
163
            stderr_fh.close()
×
164
            with open(stderr_fh.name, 'r') as fh:
×
165
                err = fh.read()
×
166
            os.unlink(stderr_fh.name)
×
167
            raise Exception("An error occurred while calling MHCnuggets:\n{}".format(err))
×
168
        stderr_fh.close()
×
169
        os.unlink(stderr_fh.name)
×
170
        tmp_output_file.close()
×
171
        df = pd.read_csv(tmp_output_file.name)
×
172
        os.unlink(tmp_output_file.name)
×
173
        return (df, 'pandas')
×
174

175
class PredictionClass(metaclass=ABCMeta):
3✔
176
    valid_allele_names_dict = {}
3✔
177
    allele_cutoff_dict = {}
3✔
178

179
    @classmethod
3✔
180
    def prediction_classes(cls):
3✔
181
        prediction_classes = []
3✔
182
        if not inspect.isabstract(cls):
3✔
183
            prediction_classes.append(cls)
3✔
184
        for subclass in cls.__subclasses__():
3✔
185
            prediction_classes.extend(subclass.prediction_classes())
3✔
186
        return prediction_classes
3✔
187

188
    @classmethod
3✔
189
    def prediction_methods(cls):
3✔
190
        return sorted([prediction_class.__name__ for prediction_class in cls.prediction_classes()])
3✔
191

192
    @classmethod
3✔
193
    def prediction_methods_with_all(cls):
3✔
194
        methods = cls.prediction_methods()
3✔
195
        methods.extend(['all', 'all_class_i', 'all_class_ii'])
3✔
196
        return methods
3✔
197

198
    @classmethod
3✔
199
    def prediction_class_for_iedb_prediction_method(cls, method):
3✔
200
        prediction_classes = cls.prediction_classes()
3✔
201
        for prediction_class in prediction_classes:
3✔
202
            prediction_class_object = prediction_class()
3✔
203
            if ( issubclass(prediction_class_object.__class__, IEDBMHCI) or issubclass(prediction_class_object.__class__, IEDBMHCII) ) and prediction_class_object.iedb_prediction_method == method:
3✔
204
                return prediction_class_object
3✔
205
        module = getattr(sys.modules[__name__], method)
3✔
206
        return module()
3✔
207

208
    @classmethod
3✔
209
    def prediction_class_name_for_iedb_prediction_method(cls, method):
3✔
210
        return cls.prediction_class_for_iedb_prediction_method(method).__class__.__name__
3✔
211

212
    @classmethod
3✔
213
    def allele_info(cls, prediction_algorithms, name_filter):
3✔
214
        alleles = defaultdict(list)
×
215
        if prediction_algorithms is None:
×
216
            prediction_classes = cls.prediction_classes()
×
217
        else:
218
            prediction_classes = map(lambda a: globals()[a], prediction_algorithms.split(','))
×
219
        for prediction_class in prediction_classes:
×
220
            for allele in prediction_class().valid_allele_names():
×
221
                if name_filter is not None:
×
222
                    if name_filter.lower() in allele.lower():
×
223
                        alleles[allele].append(prediction_class.__name__)
×
224
                else:
225
                    alleles[allele].append(prediction_class.__name__)
×
226
        info = []
×
227
        for allele, prediction_algorithms in alleles.items():
×
228
            info.append({
×
229
                'name': allele,
230
                'prediction_algorithms': prediction_algorithms,
231
            })
232
        return info
×
233

234
    @classmethod
3✔
235
    def all_valid_allele_names(cls):
3✔
236
        valid_alleles = set()
3✔
237
        for prediction_class in cls.prediction_classes():
3✔
238
            valid_alleles.update(prediction_class().valid_allele_names())
3✔
239
        return list(valid_alleles)
3✔
240

241
    @classmethod
3✔
242
    def check_alleles_valid(cls, alleles):
3✔
243
        valid_alleles = cls.all_valid_allele_names()
3✔
244
        for allele in alleles:
3✔
245
            if allele not in valid_alleles:
3✔
246
                sys.exit("Allele %s not valid. Run `pvacseq valid_alleles` for a list of valid allele names." % allele)
×
247

248
    @classmethod
3✔
249
    def allele_to_species_map(self):
3✔
250
        return {
3✔
251
            'HLA' : 'human',
252
            'DP'  : 'human',
253
            'DQ'  : 'human',
254
            'DR'  : 'human',
255
            'Aotr': 'three-striped night monkey',
256
            'Atbe': 'white-fronted spider monkey',
257
            'Atfu': 'black-headed spider monkey',
258
            'Bogr': 'domestic yak',
259
            'BoLA': 'cow',
260
            'Caja': 'common marmoset',
261
            'Ceat': 'sooty mangabey',
262
            'Cemi': 'blue monkey',
263
            'Chae': 'grivet',
264
            'Chsa': 'green monkey',
265
            'Chpy': 'vervet monkey',
266
            'DLA' : 'dog',
267
            'Eqca': 'horse',
268
            'Gaga': 'chicken',
269
            'Gobe': 'eastern gorilla',
270
            'Gogo': 'gorilla',
271
            'H-2' : 'mouse',
272
            'H2'  : 'mouse',
273
            'Hyla': 'lar gibbon',
274
            'Lero': 'golden lion tamarin',
275
            'Maar': 'stump-tailed macaque',
276
            'Maas': 'assam macaque',
277
            'Mafa': 'crab-eating macaque',
278
            'Malo': 'northern pig-tailed macaque',
279
            'Mamu': 'rhesus macaque',
280
            'Mane': 'southern pig-tailed macaque',
281
            'Math': 'tibetan macaque',
282
            'Onmy': 'rainbow trout',
283
            'Ovar': 'sheep',
284
            'Paan': 'olive baboon',
285
            'Pacy': 'yellow baboon',
286
            'Paha': 'hamadryas baboon',
287
            'Papa': 'bonobo',
288
            'Patr': 'chimpanzee',
289
            'Pipi': 'white-faced saki',
290
            'Poab': 'sumatran orangutan',
291
            'Popy': 'bornean orangutan',
292
            'Rano': 'norway rat',
293
            'Safu': 'brown-mantled tamarin',
294
            'Sage': "Geoffroy's tamarin",
295
            'Sala': 'white-lipped tamarin',
296
            'Samy': 'moustached tamarin',
297
            'Saoe': 'cottontop tamarin',
298
            'Sasa': 'atlantic salmon',
299
            'Sasc': 'common squirrel monkey',
300
            'SLA' : 'pig',
301
        }
302

303
    @classmethod
3✔
304
    def species_for_allele(self, allele):
3✔
305
        species = [v for k,v in PredictionClass.allele_to_species_map().items() if allele.startswith(k)]
3✔
306
        if len(species) == 1:
3✔
307
            return species[0]
3✔
308
        elif len(species) == 0:
×
309
            raise Exception("Unable to determine species for allele {}".format(allele))
×
310
        else:
311
            raise Exception("Multiple matching species found for allele {}".format(allele))
×
312

313
    @classmethod
3✔
314
    def parse_allele_cutoff_file(cls):
3✔
315
        base_dir                = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
316
        iedb_alleles_dir        = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles')
3✔
317
        allele_cutoff_file_name = os.path.join(iedb_alleles_dir, "cutoffs.csv")
3✔
318
        cutoffs = {}
3✔
319
        with open(allele_cutoff_file_name) as allele_cutoff_file:
3✔
320
            csv_reader = csv.DictReader(allele_cutoff_file)
3✔
321
            for row in csv_reader:
3✔
322
                cutoffs[row['allele']] = row['allele_specific_cutoff']
3✔
323
        return cutoffs
3✔
324

325
    @classmethod
3✔
326
    def print_all_allele_cutoffs(cls):
3✔
327
        if not cls.allele_cutoff_dict:
3✔
328
            cls.allele_cutoff_dict = cls.parse_allele_cutoff_file()
×
329
        for allele, cutoff in sorted(cls.allele_cutoff_dict.items()):
3✔
330
            print("%s\t%s" % (allele, cutoff))
3✔
331

332
    @classmethod
3✔
333
    def cutoff_for_allele(cls, allele):
3✔
334
        if not cls.allele_cutoff_dict:
3✔
335
            cls.allele_cutoff_dict = cls.parse_allele_cutoff_file()
3✔
336
        return cls.allele_cutoff_dict.get(allele, None)
3✔
337

338
    @abstractmethod
3✔
339
    def valid_allele_names(self):
3✔
340
        pass
×
341

342
    @property
3✔
343
    @abstractmethod
3✔
344
    def needs_epitope_length(self):
3✔
345
        pass
×
346

347
    def check_allele_valid(self, allele):
3✔
348
        valid_alleles = self.valid_allele_names()
×
349
        if allele not in valid_alleles:
×
350
            sys.exit("Allele %s not valid for method %s. Run `pvacseq valid_alleles %s` for a list of valid allele names." % (allele, self.__class__.__name__, self.__class__.__name__))
×
351

352

353
class MHCI(PredictionClass, metaclass=ABCMeta):
3✔
354
    @property
3✔
355
    def needs_epitope_length(self):
3✔
356
        return True
×
357

358
class DeepImmuno(MHCI):
3✔
359
    def valid_allele_names(self):
3✔
360
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
361
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
362
        alleles_file_name = os.path.join(alleles_dir, "DeepImmuno.tsv")
3✔
363
        alleles           = []
3✔
364
        with open(alleles_file_name) as alleles_file:
3✔
365
            tsv_reader = csv.DictReader(alleles_file, delimiter='\t')
3✔
366
            for row in tsv_reader:
3✔
367
                alleles.append(row['HLA'])
3✔
368
        return alleles
3✔
369

370
    def check_length_valid_for_allele(self, length, allele):
3✔
371
        return True
×
372

373
    def valid_lengths_for_allele(self, allele):
3✔
374
        return [9,10]
×
375

376
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
377
        results = pd.DataFrame()
×
378
        all_epitopes = []
×
379
        for record in SeqIO.parse(input_file, "fasta"):
×
380
            seq_num = record.id
×
381
            peptide = str(record.seq)
×
382
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
383
            all_epitopes.extend(epitopes.values())
×
384
        all_epitopes = list(set(all_epitopes))
×
385

386
        if len(all_epitopes) > 0:
×
387
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
388
            output_dir = tempfile.TemporaryDirectory(dir=tmp_dir)
×
389
            for epitope in all_epitopes:
×
390
                tmp_input_file.write("{},{}\n".format(epitope, allele.replace(':', '')))
×
391
            tmp_input_file.close()
×
392
            arguments = ['deepimmuno-cnn', '--mode', 'multiple', '--intdir', tmp_input_file.name, '--outdir', output_dir.name]
×
393
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
394
            try:
×
395
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
396
            except:
×
397
                stderr_fh.close()
×
398
                with open(stderr_fh.name, 'r') as fh:
×
399
                    err = fh.read()
×
400
                os.unlink(stderr_fh.name)
×
401
                raise Exception("An error occurred while calling DeepImmuno:\n{}".format(err))
×
402
            stderr_fh.close()
×
403
            os.unlink(stderr_fh.name)
×
404
            os.unlink(tmp_input_file.name)
×
405
            tmp_output_file_name = os.path.join(output_dir.name, "deepimmuno-cnn-result.txt")
×
406
            df = pd.read_csv(tmp_output_file_name, sep="\t")
×
407
            df.rename(columns={
×
408
                'HLA': 'allele',
409
            }, inplace=True)
410
            output_dir.cleanup()
×
411
            for record in SeqIO.parse(input_file, "fasta"):
×
412
                seq_num = record.id
×
413
                peptide = str(record.seq)
×
414
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
415
                for start, epitope in epitopes.items():
×
416
                    epitope_df = df[df['peptide'] == epitope]
×
417
                    epitope_df['seq_num'] = seq_num
×
418
                    epitope_df['start'] = start
×
419
                    results = pd.concat((results, epitope_df), axis=0)
×
420
        return (results, 'pandas')
×
421

422

423
class BigMHC(metaclass=ABCMeta):
3✔
424
    def valid_allele_names(self):
3✔
425
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
426
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
427
        alleles_file_name = os.path.join(alleles_dir, "BigMHC.txt")
3✔
428
        with open(alleles_file_name, 'r') as fh:
3✔
429
            return list(filter(None, fh.read().split('\n')))
3✔
430

431
    def check_length_valid_for_allele(self, length, allele):
3✔
432
        return True
×
433

434
    def valid_lengths_for_allele(self, allele):
3✔
435
        return [8,9,10,11,12,13,14,15]
×
436

437
    def predict_bigmhc(self, bigmhc_type, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
438
        results = pd.DataFrame()
×
439
        all_epitopes = []
×
440
        for record in SeqIO.parse(input_file, "fasta"):
×
441
            seq_num = record.id
×
442
            peptide = str(record.seq)
×
443
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
444
            all_epitopes.extend(epitopes.values())
×
445
        all_epitopes = list(set(all_epitopes))
×
446

447
        if len(all_epitopes) > 0:
×
448
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
449
            for epitope in all_epitopes:
×
450
                tmp_input_file.write("{}\n".format(epitope))
×
451
            tmp_input_file.close()
×
452
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
×
453
            arguments = ['bigmhc_predict', '-a', allele, '-i', tmp_input_file.name, '-p', '0', '-c', '0', '-o', tmp_output_file.name, '-m', bigmhc_type, '-d', 'cpu']
×
454
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
455
            try:
×
456
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
457
            except:
×
458
                stderr_fh.close()
×
459
                with open(stderr_fh.name, 'r') as fh:
×
460
                    err = fh.read()
×
461
                os.unlink(stderr_fh.name)
×
462
                raise Exception("An error occurred while calling BigMHC:\n{}".format(err))
×
463
            stderr_fh.close()
×
464
            os.unlink(stderr_fh.name)
×
465
            os.unlink(tmp_input_file.name)
×
466
            tmp_output_file.close()
×
467
            df = pd.read_csv(tmp_output_file.name)
×
468
            df.rename(columns={
×
469
                'pep': 'peptide',
470
                'mhc': 'allele',
471
            }, inplace=True)
472
            os.unlink(tmp_output_file.name)
×
473
            for record in SeqIO.parse(input_file, "fasta"):
×
474
                seq_num = record.id
×
475
                peptide = str(record.seq)
×
476
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
477
                for start, epitope in epitopes.items():
×
478
                    epitope_df = df[df['peptide'] == epitope]
×
479
                    epitope_df['seq_num'] = seq_num
×
480
                    epitope_df['start'] = start
×
481
                    results = pd.concat((results, epitope_df), axis=0)
×
482
        return (results, 'pandas')
×
483

484
class BigMHC_EL(BigMHC, MHCI):
3✔
485
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
486
        return self.predict_bigmhc('el', input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None)
×
487

488
class BigMHC_IM(BigMHC, MHCI):
3✔
489
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
490
        return self.predict_bigmhc('im', input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None)
×
491

492
class MHCflurry(MHCI):
3✔
493
    def valid_allele_names(self):
3✔
494
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
495
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
496
        alleles_file_name = os.path.join(alleles_dir, "MHCflurry.txt")
3✔
497
        with open(alleles_file_name, 'r') as fh:
3✔
498
            return list(filter(None, fh.read().split('\n')))
3✔
499

500
    def check_length_valid_for_allele(self, length, allele):
3✔
501
        return True
×
502

503
    def valid_lengths_for_allele(self, allele):
3✔
504
        return [8,9,10,11,12,13,14,15]
×
505

506
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
507
        results = pd.DataFrame()
×
508
        all_epitopes = []
×
509
        for record in SeqIO.parse(input_file, "fasta"):
×
510
            seq_num = record.id
×
511
            peptide = str(record.seq)
×
512
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
513
            all_epitopes.extend(epitopes.values())
×
514

515
        all_epitopes = list(set(all_epitopes))
×
516
        if len(all_epitopes) > 0:
×
517
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
×
518
            arguments = ["mhcflurry-predict", "--alleles", allele, "--out", tmp_output_file.name, "--peptides"]
×
519
            arguments.extend(all_epitopes)
×
520
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
521
            try:
×
522
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
523
            except:
×
524
                stderr_fh.close()
×
525
                with open(stderr_fh.name, 'r') as fh:
×
526
                    err = fh.read()
×
527
                os.unlink(stderr_fh.name)
×
528
                raise Exception("An error occurred while calling MHCflurry:\n{}".format(err))
×
529
            stderr_fh.close()
×
530
            os.unlink(stderr_fh.name)
×
531
            tmp_output_file.close()
×
532
            df = pd.read_csv(tmp_output_file.name)
×
533
            os.unlink(tmp_output_file.name)
×
534
            df.rename(columns={
×
535
                'mhcflurry_prediction': 'ic50',
536
                'mhcflurry_affinity': 'ic50',
537
                'mhcflurry_prediction_percentile': 'percentile',
538
                'mhcflurry_affinity_percentile': 'percentile'
539
            }, inplace=True)
540
            for record in SeqIO.parse(input_file, "fasta"):
×
541
                seq_num = record.id
×
542
                peptide = str(record.seq)
×
543
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
544
                for start, epitope in epitopes.items():
×
545
                    epitope_df = df[df['peptide'] == epitope]
×
546
                    epitope_df['seq_num'] = seq_num
×
547
                    epitope_df['start'] = start
×
548
                    results = pd.concat((results, epitope_df), axis=0)
×
549
        return (results, 'pandas')
×
550

551
class MHCflurryEL(MHCflurry):
3✔
552
    pass
3✔
553

554
class MixMHCpred(MHCI):
3✔
555
    def valid_allele_names(self):
3✔
556
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
557
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
558
        alleles_file_name = os.path.join(alleles_dir, "MixMHCpred.txt")
3✔
559
        with open(alleles_file_name, 'r') as fh:
3✔
560
            return list(filter(None, fh.read().split('\n')))
3✔
561

562
    def check_length_valid_for_allele(self, length, allele):
3✔
NEW
563
        return True
×
564

565
    def valid_lengths_for_allele(self, allele):
3✔
NEW
566
        return [8,9,10,11,12,13,14]
×
567

568
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
NEW
569
        results = pd.DataFrame()
×
NEW
570
        all_epitopes = []
×
NEW
571
        for record in SeqIO.parse(input_file, "fasta"):
×
NEW
572
            seq_num = record.id
×
NEW
573
            peptide = str(record.seq)
×
NEW
574
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
NEW
575
            all_epitopes.extend(epitopes.values())
×
576

NEW
577
        all_epitopes = list(set(all_epitopes))
×
NEW
578
        if len(all_epitopes) > 0:
×
NEW
579
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
NEW
580
            for epitope in all_epitopes:
×
NEW
581
                tmp_input_file.write("{}\n".format(epitope))
×
NEW
582
            tmp_input_file.close()
×
NEW
583
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
×
NEW
584
            arguments = ["MixMHCpred", "-i", tmp_input_file.name, "-o", tmp_output_file.name, "-a", allele]
×
NEW
585
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
NEW
586
            try:
×
NEW
587
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
NEW
588
            except:
×
NEW
589
                stderr_fh.close()
×
NEW
590
                with open(stderr_fh.name, 'r') as fh:
×
NEW
591
                    err = fh.read()
×
NEW
592
                os.unlink(stderr_fh.name)
×
NEW
593
                raise Exception("An error occurred while calling MixMHCpred:\n{}".format(err))
×
NEW
594
            stderr_fh.close()
×
NEW
595
            os.unlink(stderr_fh.name)
×
NEW
596
            tmp_output_file.close()
×
NEW
597
            df = pd.read_csv(tmp_output_file.name, sep="\t", skiprows=11)
×
NEW
598
            os.unlink(tmp_output_file.name)
×
NEW
599
            df.rename(columns={
×
600
                'Score_bestAllele': 'score',
601
                '%Rank_bestAllele': 'percentile',
602
                'Peptide': 'peptide',
603
            }, inplace=True)
NEW
604
            for record in SeqIO.parse(input_file, "fasta"):
×
NEW
605
                seq_num = record.id
×
NEW
606
                peptide = str(record.seq)
×
NEW
607
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
NEW
608
                for start, epitope in epitopes.items():
×
NEW
609
                    epitope_df = df[df['peptide'] == epitope]
×
NEW
610
                    epitope_df['seq_num'] = seq_num
×
NEW
611
                    epitope_df['start'] = start
×
NEW
612
                    epitope_df['allele'] = allele
×
NEW
613
                    results = pd.concat((results, epitope_df), axis=0)
×
NEW
614
        return (results, 'pandas')
×
615

616
class PRIME(MHCI):
3✔
617
    def valid_allele_names(self):
3✔
618
        base_dir          = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
619
        alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
620
        alleles_file_name = os.path.join(alleles_dir, "PRIME.txt")
3✔
621
        with open(alleles_file_name, 'r') as fh:
3✔
622
            return list(filter(None, fh.read().split('\n')))
3✔
623

624
    def check_length_valid_for_allele(self, length, allele):
3✔
NEW
625
        return True
×
626

627
    def valid_lengths_for_allele(self, allele):
3✔
NEW
628
        return [8,9,10,11,12,13,14]
×
629

630
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
NEW
631
        results = pd.DataFrame()
×
NEW
632
        all_epitopes = []
×
NEW
633
        for record in SeqIO.parse(input_file, "fasta"):
×
NEW
634
            seq_num = record.id
×
NEW
635
            peptide = str(record.seq)
×
NEW
636
            epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
NEW
637
            all_epitopes.extend(epitopes.values())
×
638

NEW
639
        all_epitopes = list(set(all_epitopes))
×
NEW
640
        if len(all_epitopes) > 0:
×
NEW
641
            tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
NEW
642
            for epitope in all_epitopes:
×
NEW
643
                tmp_input_file.write("{}\n".format(epitope))
×
NEW
644
            tmp_input_file.close()
×
NEW
645
            tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
×
NEW
646
            arguments = ["PRIME", "-i", tmp_input_file.name, "-o", tmp_output_file.name, "-a", allele]
×
NEW
647
            stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
×
NEW
648
            try:
×
NEW
649
                response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
×
NEW
650
            except:
×
NEW
651
                stderr_fh.close()
×
NEW
652
                with open(stderr_fh.name, 'r') as fh:
×
NEW
653
                    err = fh.read()
×
NEW
654
                os.unlink(stderr_fh.name)
×
NEW
655
                raise Exception("An error occurred while calling PRIME:\n{}".format(err))
×
NEW
656
            stderr_fh.close()
×
NEW
657
            os.unlink(stderr_fh.name)
×
NEW
658
            tmp_output_file.close()
×
NEW
659
            df = pd.read_csv(tmp_output_file.name, sep="\t", skiprows=11)
×
NEW
660
            os.unlink(tmp_output_file.name)
×
NEW
661
            df.rename(columns={
×
662
                'Score_bestAllele': 'score',
663
                '%Rank_bestAllele': 'percentile',
664
                'Peptide': 'peptide',
665
            }, inplace=True)
NEW
666
            for record in SeqIO.parse(input_file, "fasta"):
×
NEW
667
                seq_num = record.id
×
NEW
668
                peptide = str(record.seq)
×
NEW
669
                epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
×
NEW
670
                for start, epitope in epitopes.items():
×
NEW
671
                    epitope_df = df[df['peptide'] == epitope]
×
NEW
672
                    epitope_df['seq_num'] = seq_num
×
NEW
673
                    epitope_df['start'] = start
×
NEW
674
                    epitope_df['allele'] = allele
×
NEW
675
                    results = pd.concat((results, epitope_df), axis=0)
×
NEW
676
        return (results, 'pandas')
×
677

678
class MHCnuggetsI(MHCI, MHCnuggets):
3✔
679
    def valid_allele_names(self):
3✔
680
        return self.valid_allele_names_for_class('class_i')
3✔
681

682
    def valid_lengths_for_allele(self, allele):
3✔
683
        return [8,9,10,11,12,13,14,15]
×
684

685
    def mhcnuggets_allele(self, allele):
3✔
686
        return allele.replace('*', '')
×
687

688
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
689
        return MHCnuggets.predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, 'I', tmp_dir=tmp_dir)
×
690

691
class IEDBMHCI(MHCI, IEDB, metaclass=ABCMeta):
3✔
692
    @property
3✔
693
    def url(self):
3✔
694
        return 'http://tools-cluster-interface.iedb.org/tools_api/mhci/'
3✔
695

696
    def parse_iedb_allele_file(self):
3✔
697
        #Ultimately we probably want this method to call out to IEDB but their command is currently broken
698
        #curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
699
        base_dir               = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
700
        iedb_alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
3✔
701
        iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % self.iedb_prediction_method)
3✔
702
        alleles = {}
3✔
703
        with open(iedb_alleles_file_name) as iedb_alleles_file:
3✔
704
            tsv_reader = csv.DictReader(iedb_alleles_file, delimiter='\t')
3✔
705
            for row in tsv_reader:
3✔
706
                allele = row['MHC']
3✔
707
                if allele not in alleles.keys():
3✔
708
                    alleles[allele] = []
3✔
709
                alleles[allele].append(int(row['PeptideLength']))
3✔
710
        return alleles
3✔
711

712
    def valid_allele_names(self):
3✔
713
        method = self.iedb_prediction_method
3✔
714
        if not self.valid_allele_names_dict:
3✔
715
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
3✔
716
        return self.valid_allele_names_dict.keys()
3✔
717

718
    def valid_lengths_for_allele(self, allele):
3✔
719
        method = self.iedb_prediction_method
3✔
720
        if not self.valid_allele_names_dict:
3✔
721
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
×
722
        return self.valid_allele_names_dict[allele]
3✔
723

724
    def check_length_valid_for_allele(self, length, allele):
3✔
725
        valid_lengths = self.valid_lengths_for_allele(allele)
×
726
        if length not in valid_lengths:
×
727
            sys.exit("Length %s not valid for allele %s and method %s." % (length, allele, self.iedb_prediction_method))
×
728

729
    def iedb_executable_params(self, iedb_executable_path, method, allele, input_file, epitope_length):
3✔
730
        return [iedb_executable_path, method, allele, str(epitope_length), input_file]
×
731

732
class NetMHC(IEDBMHCI):
3✔
733
    @property
3✔
734
    def iedb_prediction_method(self):
3✔
735
        return 'ann'
3✔
736

737
class NetMHCpan(IEDBMHCI):
3✔
738
    @property
3✔
739
    def iedb_prediction_method(self):
3✔
740
        return 'netmhcpan'
3✔
741

742
class NetMHCpanEL(IEDBMHCI):
3✔
743
    @property
3✔
744
    def iedb_prediction_method(self):
3✔
745
        return 'netmhcpan_el'
3✔
746

747
class SMMPMBEC(IEDBMHCI):
3✔
748
    @property
3✔
749
    def iedb_prediction_method(self):
3✔
750
        return 'smmpmbec'
3✔
751

752
class SMM(IEDBMHCI):
3✔
753
    @property
3✔
754
    def iedb_prediction_method(self):
3✔
755
        return 'smm'
3✔
756

757
class NetMHCcons(IEDBMHCI):
3✔
758
    @property
3✔
759
    def iedb_prediction_method(self):
3✔
760
        return 'netmhccons'
3✔
761

762
class PickPocket(IEDBMHCI):
3✔
763
    @property
3✔
764
    def iedb_prediction_method(self):
3✔
765
        return 'pickpocket'
3✔
766

767
class MHCII(PredictionClass, metaclass=ABCMeta):
3✔
768
    @property
3✔
769
    def needs_epitope_length(self):
3✔
770
        return False
×
771

772
class MHCnuggetsII(MHCII, MHCnuggets):
3✔
773
    def valid_allele_names(self):
3✔
774
        return self.valid_allele_names_for_class('class_ii')
3✔
775

776
    def valid_lengths_for_allele(self, allele):
3✔
777
        return [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
×
778

779
    def mhcnuggets_allele(self,allele):
3✔
780
        return "HLA-{}".format(allele).replace('*', '')
×
781

782
    def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
3✔
783
        return MHCnuggets.predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, 'II', tmp_dir=tmp_dir)
×
784

785
class IEDBMHCII(MHCII, IEDB, metaclass=ABCMeta):
3✔
786
    @property
3✔
787
    def url(self):
3✔
788
        return 'http://tools-cluster-interface.iedb.org/tools_api/mhcii/'
3✔
789

790
    def parse_iedb_allele_file(self):
3✔
791
        #Ultimately we probably want this method to call out to IEDB but their command is currently broken
792
        #curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
793
        file_name = next(
3✔
794
            (name for name in ["netmhciipan", "netmhciipan_el"] if name in self.iedb_prediction_method),
795
            self.iedb_prediction_method
796
        )
797
        base_dir               = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
3✔
798
        iedb_alleles_dir       = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_ii')
3✔
799
        iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % file_name)
3✔
800
        alleles = []
3✔
801
        with open(iedb_alleles_file_name) as iedb_alleles_file:
3✔
802
            for row in iedb_alleles_file:
3✔
803
                alleles.append(row.rstrip())
3✔
804
        return alleles
3✔
805

806
    def valid_lengths_for_allele(self, allele):
3✔
807
        return [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
3✔
808

809
    def valid_allele_names(self):
3✔
810
        method = self.iedb_prediction_method
3✔
811
        if not self.valid_allele_names_dict:
3✔
812
            self.valid_allele_names_dict = self.parse_iedb_allele_file()
3✔
813
        return self.valid_allele_names_dict
3✔
814

815
    def iedb_executable_params(self, iedb_executable_path, method, allele, input_file, epitope_length):
3✔
816
        allele = allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB')
×
817
        return [iedb_executable_path, method, allele, input_file, str(epitope_length)]
×
818

819
class NetMHCIIVersion:
3✔
820
    netmhciipan_version = None
3✔
821

822
class NetMHCIIpan(IEDBMHCII):
3✔
823
    @property
3✔
824
    def iedb_prediction_method(self):
3✔
825
        if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
3✔
826
            return 'netmhciipan_ba-' + NetMHCIIVersion.netmhciipan_version
×
827
        return 'netmhciipan_ba'
3✔
828

829
class NetMHCIIpanEL(IEDBMHCII):
3✔
830
    @property
3✔
831
    def iedb_prediction_method(self):
3✔
832
        if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
3✔
833
            return 'netmhciipan_el-' + NetMHCIIVersion.netmhciipan_version
×
834
        return 'netmhciipan_el'
3✔
835

836
class NNalign(IEDBMHCII):
3✔
837
    @property
3✔
838
    def iedb_prediction_method(self):
3✔
839
        return 'nn_align'
3✔
840

841
class SMMalign(IEDBMHCII):
3✔
842
    @property
3✔
843
    def iedb_prediction_method(self):
3✔
844
        return 'smm_align'
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc