19267929486

Committed 11 Nov 2025 01:58PM UTC coverage: 83.157%. First build

Build # 19267929486

Build Type

Pull #1334

github

Committed by

web-flow

Commit Message

Merge 96524b86a into 0f77b13e8

Pull Request Pull Request #1334: Update top_score_metric2 to take a list of metrics to prioritize

Run Details

209 of 212 new or added lines in 9 files covered. (98.58%)

9287 of 11168 relevant lines covered (83.16%)

2.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.36

/pvactools/lib/run_utils.py

import sys
import os
import csv
import binascii
import re
from itertools import islice
import argparse
import pandas as pd

def combine_reports(input_files, output_file):
    fieldnames = []
    for input_file in input_files:
        with open(input_file, 'r') as input_file_handle:
            reader = csv.DictReader(input_file_handle, delimiter='\t')
            if len(fieldnames) == 0:
                fieldnames = reader.fieldnames
            else:
                for fieldname in reader.fieldnames:
                    if fieldname not in fieldnames:
                        fieldnames.append(fieldname)

    with open(output_file, 'w') as fout:
        writer = csv.DictWriter(fout, delimiter="\t", restval='NA', fieldnames=fieldnames)
        writer.writeheader()
        for input_file in input_files:
            with open(input_file, 'r') as input_file_handle:
                reader = csv.DictReader(input_file_handle, delimiter='\t')
                for row in reader:
                    writer.writerow(row)

def change_permissions_recursive(path, dir_mode, file_mode):
    for root, dirs, files in os.walk(path, topdown=False):
        for dir in [os.path.join(root,d) for d in dirs]:
            os.chmod(dir, dir_mode)
        for file in [os.path.join(root, f) for f in files]:
            os.chmod(file, file_mode)

def is_gz_file(filepath):
    with open(filepath, 'rb') as test_f:
        return binascii.hexlify(test_f.read(2)) == b'1f8b'

def split_file(reader, lines):
    i = iter(reader)
    piece = list(islice(i, lines))
    while piece:
        yield piece
        piece = list(islice(i, lines))

def construct_index(count, gene, transcript, variant_type, position):
    return '{}.{}.{}.{}.{}'.format(count, gene, transcript, variant_type, position)

def float_range(minimum, maximum):
    """Return function handle of an argument type function for
       ArgumentParser checking a float range: minimum <= arg <= maximum
         minimum - minimum acceptable argument
         maximum - maximum acceptable argument"""

    # Define the function with default arguments
    def float_range_checker(arg):
        """New Type function for argparse - a float within predefined range."""

        try:
            f = float(arg)
        except ValueError:
            raise argparse.ArgumentTypeError("must be a floating point number")
        if f < minimum or f > maximum:
            raise argparse.ArgumentTypeError("must be in range [" + str(minimum) + " .. " + str(maximum)+"]")
        return f

    # Return function handle to checking function
    return float_range_checker

def aggregate_report_evaluations():
    """Return function handle of an argument type function for
       ArgumentParser checking of the aggregate report evaluation values.
       Valid values are: ['Accept', 'Reject', 'Pending', 'Review']"""

    valid_values = ['Accept', 'Reject', 'Pending', 'Review']

    def aggregate_report_evaluation_checker(arg):
        arg_list = arg.split(",")
        for argument in arg_list:
            if argument not in valid_values:
                raise argparse.ArgumentTypeError(
                    "Invalid evaluation '{}'. Valid values are: {}".format(argument, ", ".join(valid_values))
                )
        return arg_list

    return aggregate_report_evaluation_checker

def transcript_prioritization_strategy():
    """Return function handle of an argument type function for
       ArgumentParser checking of the transcript prioritization strategy
       checking that the specified criteria are in the list of: ['canonical', 'mane_select', 'tsl']"""

    # Define the function with default arguments
    def transcript_prioritization_strategy_checker(arg):
        """New Type function for argparse - a comma-separated list with predefined valid values."""

        arg_list = arg.split(",")
        for argument in arg_list:
            if argument not in ['canonical', 'mane_select', 'tsl']:
                raise argparse.ArgumentTypeError("List element must be one of 'canonical', 'mane_select', 'tsl', not {}".format(argument))
        return arg_list

    # Return function handle to checking function
    return transcript_prioritization_strategy_checker

def top_score_metric2():
    """Return function handle of an argument type function for
       ArgumentParser checking of the top score metric2
       checking that the specified criteria are in the list of: ['ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile']"""

    # Define the function with default arguments
    def top_score_metric2_checker(arg):
        """New Type function for argparse - a comma-separated list with predefined valid values."""

        arg_list = arg.split(",")
        for argument in arg_list:
            if argument not in ['ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile']:
                raise argparse.ArgumentTypeError("List element must be one of 'ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile', not {}".format(argument))
        return arg_list

    # Return function handle to checking function
    return top_score_metric2_checker

def pvacsplice_anchors():
    """Return function handle of an argument type function for
       ArgumentParser checking of the pVACsplice anchors
       checking that the specified criteria are in the list of: ['A', 'D', 'NDA']"""

    # Define the function with default arguments
    def pvacsplice_anchors_checker(arg):
        """New Type function for argparse - a comma-separated list with predefined valid values."""

        arg_list = arg.split(",")
        for argument in arg_list:
            if argument not in ['A', 'D', 'NDA']:
                raise argparse.ArgumentTypeError("List element must be one of 'A', 'D', 'NDA', not {}".format(argument))
        return arg_list

    # Return function handle to checking function
    return pvacsplice_anchors_checker

def supported_amino_acids():
    return ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]

def determine_neoepitopes(sequence, length):
    epitopes = {}
    for i in range(0, len(sequence)-length+1):
        epitopes[i+1] = sequence[i:i+length]
    return epitopes

def get_mutated_peptide_with_flanking_sequence(wt_peptide, mt_peptide, flanking_length):
    wt_epitopes = determine_neoepitopes(wt_peptide, flanking_length+1)
    mt_epitopes = determine_neoepitopes(mt_peptide, flanking_length+1)
    for start, (wt_epitope, mt_epitope) in enumerate(zip(list(wt_epitopes.values()), list(mt_epitopes.values()))):
        if wt_epitope != mt_epitope:
            break
    for i, (wt_epitope, mt_epitope) in enumerate(zip(reversed(list(wt_epitopes.values())), reversed(list(mt_epitopes.values())))):
        if wt_epitope != mt_epitope:
            stop = len(mt_epitopes) - i + flanking_length
            break
    mutant_subsequence = mt_peptide[start:stop]
    supported_aas = supported_amino_acids()
    if mutant_subsequence[0] not in supported_aas:
        mutant_subsequence = mutant_subsequence[1:]
    if mutant_subsequence[-1] not in supported_aas:
        mutant_subsequence = mutant_subsequence[0:-1]
    if not all([c in supported_aas for c in mutant_subsequence]):
        print("Warning. Mutant sequence contains unsupported amino acid. Skipping entry {}".format(line['index']))
        return
    return mutant_subsequence

def get_mutated_frameshift_peptide_with_flanking_sequence(wt_peptide, mt_peptide, flanking_length):
    wt_epitopes = determine_neoepitopes(wt_peptide, flanking_length+1)
    mt_epitopes = determine_neoepitopes(mt_peptide, flanking_length+1)
    for start, (wt_epitope, mt_epitope) in enumerate(zip(list(wt_epitopes.values()), list(mt_epitopes.values()))):
        if wt_epitope != mt_epitope:
            break
    mutant_subsequence = mt_peptide[start:]
    supported_aas = supported_amino_acids()
    if mutant_subsequence[0] not in supported_aas:
        mutant_subsequence = mutant_subsequence[1:]
    if mutant_subsequence[-1] not in supported_aas:
        mutant_subsequence = mutant_subsequence[0:-1]
    if not all([c in supported_aas for c in mutant_subsequence]):
        print("Warning. Mutant sequence contains unsupported amino acid. Skipping entry {}".format(line['index']))
        return
    return mutant_subsequence

def is_preferred_transcript(mutation, transcript_prioritization_strategy, maximum_transcript_support_level):
    if not isinstance(mutation, pd.Series):
        mutation = pd.Series(mutation)
        if mutation['Canonical'] != 'Not Run':
            mutation['Canonical'] = eval(mutation['Canonical'])
        if mutation['MANE Select'] != 'Not Run':
            mutation['MANE Select'] = eval(mutation['MANE Select'])
    if 'mane_select' in transcript_prioritization_strategy:
        if mutation['MANE Select'] == 'Not Run':
            return True
        elif mutation['MANE Select']:
            return True
    if 'canonical' in transcript_prioritization_strategy:
        if mutation['Canonical'] == 'Not Run':
            return True
        elif mutation['Canonical']:
            return True
    if 'tsl' in transcript_prioritization_strategy:
        col = 'TSL' if 'TSL' in mutation else 'Transcript Support Level'
        if pd.isna(mutation[col]):
            return False
        elif mutation[col] == 'NA':
            return False
        elif mutation[col] == 'Not Supported':
            return True
        elif int(mutation[col]) <= maximum_transcript_support_level:
            return True
    return False

def metrics_to_column(tool, metric1, metric2):
    pretty_metric1 = {
        'median': 'Median',
        'lowest': 'Best'
    }
    pretty_metric2 = {
        'ic50': 'IC50 Score',
        'combined_percentile': 'Percentile',
        'binding_percentile': 'IC50 Percentile',
        'immunogenicity_percentile': 'Immunogenicity Percentile',
        'presentation_percentile': 'Presentation Percentile'
    }

    if tool == 'pvacseq':
        return f"{pretty_metric1[metric1]} MT {pretty_metric2[metric2]}"
    else:
        return f"{pretty_metric1[metric1]} {pretty_metric2[metric2]}"

def metric2_to_aggregate_column(metric2):
    pretty_metric2 = {
        'ic50': 'IC50 MT',
        'combined_percentile': '%ile MT',
        'binding_percentile': 'IC50 %ile MT',
        'immunogenicity_percentile': 'IM %ile MT',
        'presentation_percentile': 'Pres %ile MT'
    }
    return pretty_metric2[metric2]

1	import sys	3✔
2	import os	3✔
3	import csv	3✔
4	import binascii	3✔
5	import re	3✔
6	from itertools import islice	3✔
7	import argparse	3✔
8	import pandas as pd	3✔
9
10	def combine_reports(input_files, output_file):	3✔
11	fieldnames = []	3✔
12	for input_file in input_files:	3✔
13	with open(input_file, 'r') as input_file_handle:	3✔
14	reader = csv.DictReader(input_file_handle, delimiter='\t')	3✔
15	if len(fieldnames) == 0:	3✔
16	fieldnames = reader.fieldnames	3✔
17	else:
18	for fieldname in reader.fieldnames:	3✔
19	if fieldname not in fieldnames:	3✔
20	fieldnames.append(fieldname)	3✔
21
22	with open(output_file, 'w') as fout:	3✔
23	writer = csv.DictWriter(fout, delimiter="\t", restval='NA', fieldnames=fieldnames)	3✔
24	writer.writeheader()	3✔
25	for input_file in input_files:	3✔
26	with open(input_file, 'r') as input_file_handle:	3✔
27	reader = csv.DictReader(input_file_handle, delimiter='\t')	3✔
28	for row in reader:	3✔
29	writer.writerow(row)	3✔
30
31	def change_permissions_recursive(path, dir_mode, file_mode):	3✔
32	for root, dirs, files in os.walk(path, topdown=False):	3✔
33	for dir in [os.path.join(root,d) for d in dirs]:	3✔
34	os.chmod(dir, dir_mode)	3✔
35	for file in [os.path.join(root, f) for f in files]:	3✔
36	os.chmod(file, file_mode)	3✔
37
38	def is_gz_file(filepath):	3✔
39	with open(filepath, 'rb') as test_f:	3✔
40	return binascii.hexlify(test_f.read(2)) == b'1f8b'	3✔
41
42	def split_file(reader, lines):	3✔
43	i = iter(reader)	3✔
44	piece = list(islice(i, lines))	3✔
45	while piece:	3✔
46	yield piece	3✔
47	piece = list(islice(i, lines))	3✔
48
49	def construct_index(count, gene, transcript, variant_type, position):	3✔
50	return '{}.{}.{}.{}.{}'.format(count, gene, transcript, variant_type, position)	3✔
51
52	def float_range(minimum, maximum):	3✔
53	"""Return function handle of an argument type function for
54	ArgumentParser checking a float range: minimum <= arg <= maximum
55	minimum - minimum acceptable argument
56	maximum - maximum acceptable argument"""
57
58	# Define the function with default arguments
59	def float_range_checker(arg):	3✔
60	"""New Type function for argparse - a float within predefined range."""
61
62	try:	3✔
63	f = float(arg)	3✔
64	except ValueError:	3✔
65	raise argparse.ArgumentTypeError("must be a floating point number")	3✔
66	if f < minimum or f > maximum:	3✔
67	raise argparse.ArgumentTypeError("must be in range [" + str(minimum) + " .. " + str(maximum)+"]")	3✔
68	return f	3✔
69
70	# Return function handle to checking function
71	return float_range_checker	3✔
72
73	def aggregate_report_evaluations():	3✔
74	"""Return function handle of an argument type function for
75	ArgumentParser checking of the aggregate report evaluation values.
76	Valid values are: ['Accept', 'Reject', 'Pending', 'Review']"""
77
78	valid_values = ['Accept', 'Reject', 'Pending', 'Review']	3✔
79
80	def aggregate_report_evaluation_checker(arg):	3✔
81	arg_list = arg.split(",")	3✔
82	for argument in arg_list:	3✔
83	if argument not in valid_values:	3✔
84	raise argparse.ArgumentTypeError(	×
85	"Invalid evaluation '{}'. Valid values are: {}".format(argument, ", ".join(valid_values))
86	)
87	return arg_list	3✔
88
89	return aggregate_report_evaluation_checker	3✔
90
91	def transcript_prioritization_strategy():	3✔
92	"""Return function handle of an argument type function for
93	ArgumentParser checking of the transcript prioritization strategy
94	checking that the specified criteria are in the list of: ['canonical', 'mane_select', 'tsl']"""
95
96	# Define the function with default arguments
97	def transcript_prioritization_strategy_checker(arg):	3✔
98	"""New Type function for argparse - a comma-separated list with predefined valid values."""
99
100	arg_list = arg.split(",")	×
101	for argument in arg_list:	×
102	if argument not in ['canonical', 'mane_select', 'tsl']:	×
103	raise argparse.ArgumentTypeError("List element must be one of 'canonical', 'mane_select', 'tsl', not {}".format(argument))	×
104	return arg_list	×
105
106	# Return function handle to checking function
107	return transcript_prioritization_strategy_checker	3✔
108
109	def top_score_metric2():	3✔
110	"""Return function handle of an argument type function for
111	ArgumentParser checking of the top score metric2
112	checking that the specified criteria are in the list of: ['ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile']"""
113
114	# Define the function with default arguments
115	def top_score_metric2_checker(arg):	3✔
116	"""New Type function for argparse - a comma-separated list with predefined valid values."""
117
118	arg_list = arg.split(",")	3✔
119	for argument in arg_list:	3✔
120	if argument not in ['ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile']:	3✔
NEW 121	raise argparse.ArgumentTypeError("List element must be one of 'ic50', 'combined_percentile', 'binding_percentile', 'immunogenicity_percentile', 'presentation_percentile', not {}".format(argument))	×
122	return arg_list	3✔
123
124	# Return function handle to checking function
125	return top_score_metric2_checker	3✔
126
127	def pvacsplice_anchors():	3✔
128	"""Return function handle of an argument type function for
129	ArgumentParser checking of the pVACsplice anchors
130	checking that the specified criteria are in the list of: ['A', 'D', 'NDA']"""
131
132	# Define the function with default arguments
133	def pvacsplice_anchors_checker(arg):	3✔
134	"""New Type function for argparse - a comma-separated list with predefined valid values."""
135
136	arg_list = arg.split(",")	3✔
137	for argument in arg_list:	3✔
138	if argument not in ['A', 'D', 'NDA']:	3✔
139	raise argparse.ArgumentTypeError("List element must be one of 'A', 'D', 'NDA', not {}".format(argument))	3✔
140	return arg_list	3✔
141
142	# Return function handle to checking function
143	return pvacsplice_anchors_checker	3✔
144
145	def supported_amino_acids():	3✔
146	return ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]	3✔
147
148	def determine_neoepitopes(sequence, length):	3✔
149	epitopes = {}	3✔
150	for i in range(0, len(sequence)-length+1):	3✔
151	epitopes[i+1] = sequence[i:i+length]	3✔
152	return epitopes	3✔
153
154	def get_mutated_peptide_with_flanking_sequence(wt_peptide, mt_peptide, flanking_length):	3✔
155	wt_epitopes = determine_neoepitopes(wt_peptide, flanking_length+1)	3✔
156	mt_epitopes = determine_neoepitopes(mt_peptide, flanking_length+1)	3✔
157	for start, (wt_epitope, mt_epitope) in enumerate(zip(list(wt_epitopes.values()), list(mt_epitopes.values()))):	3✔
158	if wt_epitope != mt_epitope:	3✔
159	break	3✔
160	for i, (wt_epitope, mt_epitope) in enumerate(zip(reversed(list(wt_epitopes.values())), reversed(list(mt_epitopes.values())))):	3✔
161	if wt_epitope != mt_epitope:	3✔
162	stop = len(mt_epitopes) - i + flanking_length	3✔
163	break	3✔
164	mutant_subsequence = mt_peptide[start:stop]	3✔
165	supported_aas = supported_amino_acids()	3✔
166	if mutant_subsequence[0] not in supported_aas:	3✔
167	mutant_subsequence = mutant_subsequence[1:]	×
168	if mutant_subsequence[-1] not in supported_aas:	3✔
169	mutant_subsequence = mutant_subsequence[0:-1]	×
170	if not all([c in supported_aas for c in mutant_subsequence]):	3✔
171	print("Warning. Mutant sequence contains unsupported amino acid. Skipping entry {}".format(line['index']))	×
172	return	×
173	return mutant_subsequence	3✔
174
175	def get_mutated_frameshift_peptide_with_flanking_sequence(wt_peptide, mt_peptide, flanking_length):	3✔
176	wt_epitopes = determine_neoepitopes(wt_peptide, flanking_length+1)	3✔
177	mt_epitopes = determine_neoepitopes(mt_peptide, flanking_length+1)	3✔
178	for start, (wt_epitope, mt_epitope) in enumerate(zip(list(wt_epitopes.values()), list(mt_epitopes.values()))):	3✔
179	if wt_epitope != mt_epitope:	3✔
180	break	3✔
181	mutant_subsequence = mt_peptide[start:]	3✔
182	supported_aas = supported_amino_acids()	3✔
183	if mutant_subsequence[0] not in supported_aas:	3✔
184	mutant_subsequence = mutant_subsequence[1:]	×
185	if mutant_subsequence[-1] not in supported_aas:	3✔
186	mutant_subsequence = mutant_subsequence[0:-1]	×
187	if not all([c in supported_aas for c in mutant_subsequence]):	3✔
188	print("Warning. Mutant sequence contains unsupported amino acid. Skipping entry {}".format(line['index']))	×
189	return	×
190	return mutant_subsequence	3✔
191
192	def is_preferred_transcript(mutation, transcript_prioritization_strategy, maximum_transcript_support_level):	3✔
193	if not isinstance(mutation, pd.Series):	3✔
194	mutation = pd.Series(mutation)	3✔
195	if mutation['Canonical'] != 'Not Run':	3✔
196	mutation['Canonical'] = eval(mutation['Canonical'])	3✔
197	if mutation['MANE Select'] != 'Not Run':	3✔
198	mutation['MANE Select'] = eval(mutation['MANE Select'])	3✔
199	if 'mane_select' in transcript_prioritization_strategy:	3✔
200	if mutation['MANE Select'] == 'Not Run':	3✔
201	return True	3✔
202	elif mutation['MANE Select']:	3✔
203	return True	3✔
204	if 'canonical' in transcript_prioritization_strategy:	3✔
205	if mutation['Canonical'] == 'Not Run':	3✔
206	return True	×
207	elif mutation['Canonical']:	3✔
208	return True	3✔
209	if 'tsl' in transcript_prioritization_strategy:	3✔
210	col = 'TSL' if 'TSL' in mutation else 'Transcript Support Level'	3✔
211	if pd.isna(mutation[col]):	3✔
212	return False	3✔
213	elif mutation[col] == 'NA':	3✔
214	return False	3✔
215	elif mutation[col] == 'Not Supported':	3✔
216	return True	3✔
217	elif int(mutation[col]) <= maximum_transcript_support_level:	3✔
218	return True	3✔
219	return False	3✔
220
221	def metrics_to_column(tool, metric1, metric2):	3✔
222	pretty_metric1 = {	3✔
223	'median': 'Median',
224	'lowest': 'Best'
225	}
226	pretty_metric2 = {	3✔
227	'ic50': 'IC50 Score',
228	'combined_percentile': 'Percentile',
229	'binding_percentile': 'IC50 Percentile',
230	'immunogenicity_percentile': 'Immunogenicity Percentile',
231	'presentation_percentile': 'Presentation Percentile'
232	}
233
234	if tool == 'pvacseq':	3✔
235	return f"{pretty_metric1[metric1]} MT {pretty_metric2[metric2]}"	3✔
236	else:
237	return f"{pretty_metric1[metric1]} {pretty_metric2[metric2]}"	3✔
238
239	def metric2_to_aggregate_column(metric2):	3✔
240	pretty_metric2 = {	3✔
241	'ic50': 'IC50 MT',
242	'combined_percentile': '%ile MT',
243	'binding_percentile': 'IC50 %ile MT',
244	'immunogenicity_percentile': 'IM %ile MT',
245	'presentation_percentile': 'Pres %ile MT'
246	}
247	return pretty_metric2[metric2]	3✔

griffithlab / pVACtools / 19267929486

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous