• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

openvax / vaxrank / 9023903741

09 May 2024 09:39PM UTC coverage: 89.647% (-6.6%) from 96.289%
9023903741

Pull #200

github

web-flow
Merge 9871df79b into a95e4e190
Pull Request #200: Make Vaxrank's epitope/vaccine peptide scoring/filtering/ranking more configurable via YAML

85 of 90 new or added lines in 6 files covered. (94.44%)

83 existing lines in 8 files now uncovered.

762 of 850 relevant lines covered (89.65%)

2.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.04
/vaxrank/cli.py
1
# Licensed under the Apache License, Version 2.0 (the "License");
2
# you may not use this file except in compliance with the License.
3
# You may obtain a copy of the License at
4
#
5
#       http://www.apache.org/licenses/LICENSE-2.0
6
#
7
# Unless required by applicable law or agreed to in writing, software
8
# distributed under the License is distributed on an "AS IS" BASIS,
9
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
# See the License for the specific language governing permissions and
11
# limitations under the License.
12

13
import sys
3✔
14
import logging
3✔
15
import logging.config
3✔
16
import pkg_resources
3✔
17

18
from argparse import ArgumentParser
3✔
19

20
from isovar import isovar_results_to_dataframe
3✔
21
from isovar.cli import (make_isovar_arg_parser, run_isovar_from_parsed_args,)
3✔
22
from mhctools.cli import (
3✔
23
    add_mhc_args,
24
    mhc_alleles_from_args,
25
    mhc_binding_predictor_from_args,
26
)
27

28
import pandas as pd
3✔
29
import serializable
3✔
30
from varcode.cli import variant_collection_from_args
3✔
31

32
from . import __version__
3✔
33
from .core_logic import run_vaxrank
3✔
34
from .gene_pathway_check import GenePathwayCheck
3✔
35
from .report import (
3✔
36
    make_ascii_report,
37
    make_html_report,
38
    make_pdf_report,
39
    make_csv_report,
40
    make_minimal_neoepitope_report,
41
    TemplateDataCreator,
42
)
43
from .patient_info import PatientInfo
3✔
44
from .config import Config 
3✔
45

46
logger = logging.getLogger(__name__)
3✔
47

48

49
def make_vaxrank_arg_parser():
3✔
50
    # create common parser with the --version flag
51
    parent_parser = ArgumentParser('parent', add_help=False)
3✔
52
    parent_parser.add_argument('--version', action='version', version='Vaxrank %s' % (__version__,))
3✔
53

54
    # inherit commandline options from Isovar
55
    arg_parser = make_isovar_arg_parser(
3✔
56
        prog="vaxrank",
57
        description=(
58
            "Select personalized vaccine peptides from cancer variants, "
59
            "expression data, and patient HLA type."),
60
        parents=[parent_parser],
61
    )
62
    add_mhc_args(arg_parser)
3✔
63
    add_vaccine_peptide_args(arg_parser)
3✔
64
    add_output_args(arg_parser)
3✔
65
    add_optional_output_args(arg_parser)
3✔
66
    add_supplemental_report_args(arg_parser)
3✔
67
    return arg_parser
3✔
68

69

70
def cached_run_arg_parser():
3✔
UNCOV
71
    arg_parser = ArgumentParser(
×
72
        prog="vaxrank",
73
        description=(
74
            "Select personalized vaccine peptides from cancer variants, "
75
            "expression data, and patient HLA type."),
76
    )
UNCOV
77
    arg_parser.add_argument(
×
78
        "--input-json-file",
79
        default="",
80
        help="Path to JSON file containing results of vaccine peptide report")
UNCOV
81
    add_output_args(arg_parser)
×
UNCOV
82
    add_optional_output_args(arg_parser)
×
UNCOV
83
    add_supplemental_report_args(arg_parser)
×
UNCOV
84
    return arg_parser
×
85

86

87

88
# Lets the user specify whether they want to see particular sections in the report.
89
def add_optional_output_args(arg_parser):
3✔
90
    manufacturability_args = arg_parser.add_mutually_exclusive_group(required=False)
3✔
91
    manufacturability_args.add_argument(
3✔
92
        "--include-manufacturability-in-report",
93
        dest="manufacturability",
94
        action="store_true")
95

96
    manufacturability_args.add_argument(
3✔
97
        "--no-manufacturability-in-report",
98
        dest="manufacturability",
99
        action="store_false")
100
    arg_parser.set_defaults(manufacturability=True)
3✔
101

102
    wt_epitope_args = arg_parser.add_mutually_exclusive_group(required=False)
3✔
103
    wt_epitope_args.add_argument(
3✔
104
        "--include-non-overlapping-epitopes-in-report",
105
        dest="wt_epitopes",
106
        action="store_true",
107
        help="Set to true to include a report section for each vaccine peptide containing "
108
             "strong binders that do not overlap the mutation")
109

110
    wt_epitope_args.add_argument(
3✔
111
        "--no-non-overlapping-epitopes-in-report",
112
        dest="wt_epitopes",
113
        action="store_false",
114
        help="Set to false to exclude report information for each vaccine peptide about "
115
             "strong binders that do not overlap the mutation")
116
    arg_parser.set_defaults(wt_epitopes=True)
3✔
117

118

119
def add_output_args(arg_parser):
3✔
120
    output_args_group = arg_parser.add_argument_group("Output options")
3✔
121

122
    output_args_group.add_argument(
3✔
123
        "--output-patient-id",
124
        default="",
125
        help="Patient ID to use in report")
126

127
    output_args_group.add_argument(
3✔
128
        "--output-csv",
129
        default="",
130
        help="Name of CSV file which contains predicted sequences")
131

132
    output_args_group.add_argument(
3✔
133
        "--output-ascii-report",
134
        default="",
135
        help="Path to ASCII vaccine peptide report")
136

137
    output_args_group.add_argument(
3✔
138
        "--output-html-report",
139
        default="",
140
        help="Path to HTML vaccine peptide report")
141

142
    output_args_group.add_argument(
3✔
143
        "--output-pdf-report",
144
        default="",
145
        help="Path to PDF vaccine peptide report")
146

147
    output_args_group.add_argument(
3✔
148
        "--output-json-file",
149
        default="",
150
        help="Path to JSON file containing results of vaccine peptide report")
151

152
    output_args_group.add_argument(
3✔
153
        "--output-xlsx-report",
154
        default="",
155
        help="Path to XLSX vaccine peptide report worksheet, one sheet per variant. This is meant "
156
             "for use by the vaccine manufacturer.")
157

158
    output_args_group.add_argument(
3✔
159
        "--output-neoepitope-report",
160
        default="",
161
        help="Path to XLSX neoepitope report, containing information focusing on short peptide "
162
             "sequences.")
163

164
    output_args_group.add_argument(
3✔
165
        "--output-reviewed-by",
166
        default="",
167
        help="Comma-separated list of reviewer names")
168

169
    output_args_group.add_argument(
3✔
170
        "--output-final-review",
171
        default="",
172
        help="Name of final reviewer of report")
173

174
    output_args_group.add_argument(
3✔
175
        "--output-passing-variants-csv",
176
        default="",
177
        help="Path to CSV file containing some metadata about every variant that has passed all "
178
             "variant caller filters")
179

180
    output_args_group.add_argument(
3✔
181
        "--output-isovar-csv",
182
        default="",
183
        help="Path to CSV file containing raw RNA counts and filtering metadata "
184
             "for all variants (generated by Isovar)")
185

186
    output_args_group.add_argument(
3✔
187
        "--log-path",
188
        default="python.log",
189
        help="File path to write the vaxrank Python log to")
190

191
    output_args_group.add_argument(
3✔
192
        "--max-mutations-in-report",
193
        default=None,
194
        type=int,
195
        help="Number of mutations to report")
196

197

198
def add_vaccine_peptide_args(arg_parser):
3✔
199
    vaccine_peptide_group = arg_parser.add_argument_group("Vaccine peptide options")
3✔
200
    vaccine_peptide_group.add_argument(
3✔
201
        "--vaccine-peptide-length",
202
        default=25,
203
        type=int,
204
        help="Number of amino acids in the vaccine peptides. (default: %(default)s)")
205

206
    vaccine_peptide_group.add_argument(
3✔
207
        "--padding-around-mutation",
208
        default=5,
209
        type=int,
210
        help=(
211
            "Number of off-center windows around the mutation to consider "
212
            "as vaccine peptides. (default: %(default)s)"
213
        ))
214

215
    vaccine_peptide_group.add_argument(
3✔
216
        "--max-vaccine-peptides-per-mutation",
217
        default=1,
218
        type=int,
219
        help=(
220
            "Number of vaccine peptides to generate for each mutation. "
221
            "(default: %(default)s)"
222
        ))
223

224
    vaccine_peptide_group.add_argument(
3✔
225
        "--min-epitope-score",
226
        default=1e-10,
227
        type=float,
228
        help=(
229
            "Ignore predicted MHC ligands whose normalized binding score "
230
            "falls below this threshold. (default: %(default)s)"))
231

232
    vaccine_peptide_group.add_argument(
3✔
233
        "--num-epitopes-per-vaccine-peptide",
234
        type=int,
235
        help=(
236
            "Maximum number of mutant epitopes to consider when scoring "
237
            "each vaccine peptide. (default: %(default)s)"))
238

239

240
def add_supplemental_report_args(arg_parser):
3✔
241
    report_args_group = arg_parser.add_argument_group("Supplemental report options")
3✔
242
    report_args_group.add_argument(
3✔
243
        "--cosmic_vcf_filename",
244
        default="",
245
        help="Local path to COSMIC vcf")
246

247

248
def check_args(args):
3✔
249
    if not (args.output_csv or
3✔
250
            args.output_ascii_report or
251
            args.output_html_report or
252
            args.output_pdf_report or
253
            args.output_json_file or
254
            args.output_xlsx_report or
255
            args.output_neoepitope_report or
256
            args.output_passing_variants_csv or
257
            args.output_isovar_csv):
UNCOV
258
        raise ValueError(
×
259
            "Must specify at least one of: --output-csv, "
260
            "--output-xlsx-report, "
261
            "--output-ascii-report, "
262
            "--output-html-report, "
263
            "--output-pdf-report, "
264
            "--output-neoepitope-report, "
265
            "--output-json-file, "
266
            "--output-passing-variants-csv, "
267
            "--output-isovar-csv")
268

269
def run_vaxrank_from_parsed_args(args):
3✔
270
    mhc_predictor = mhc_binding_predictor_from_args(args)
3✔
271

272
    args.protein_sequence_length = (
3✔
273
            args.vaccine_peptide_length + 2 * args.padding_around_mutation
274
    )
275

276
    # Vaxrank is going to evaluate multiple vaccine peptides containing
277
    # the same mutation so need a longer sequence from Isovar
278
    isovar_results = run_isovar_from_parsed_args(args)
3✔
279

280
    if args.output_isovar_csv:
3✔
281
        df = isovar_results_to_dataframe(isovar_results)
3✔
282
        df.to_csv(args.output_isovar_csv, index=False)
3✔
283

284
    config = Config(min_epitope_score=args.min_epitope_score)
3✔
285
    return run_vaxrank(
3✔
286
        isovar_results=isovar_results,
287
        mhc_predictor=mhc_predictor,
288
        vaccine_peptide_length=args.vaccine_peptide_length,
289
        max_vaccine_peptides_per_variant=args.max_vaccine_peptides_per_mutation,
290
        num_mutant_epitopes_to_keep=args.num_epitopes_per_vaccine_peptide,
291
        config=config)
292

293
def ranked_vaccine_peptides_with_metadata_from_parsed_args(args):
3✔
294
    """
295
    Computes all the data needed for report generation.
296

297
    Parameters
298
    ----------
299
    args : Namespace
300
      Parsed user args from this run
301

302
    Returns a dictionary containing 3 items:
303
    - ranked variant/vaccine peptide list
304
    - a dictionary of command-line arguments used to generate it
305
    - patient info object
306
    """
307

308
    if hasattr(args, 'input_json_file'):
3✔
UNCOV
309
        with open(args.input_json_file) as f:
×
310

UNCOV
311
            data = serializable.from_json(f.read())
×
312
            # the JSON data from the previous run will have the older args saved, which may need to
313
            # be overridden with args from this run (which all be output related)
UNCOV
314
            data['args'].update(vars(args))
×
315

316
            # if we need to truncate the variant list based on max_mutations_in_report, do that here
UNCOV
317
            if len(data['variants']) > args.max_mutations_in_report:
×
UNCOV
318
                data['variants'] = data['variants'][:args.max_mutations_in_report]
×
UNCOV
319
            return data
×
320
    # get various things from user args
321
    mhc_alleles = mhc_alleles_from_args(args)
3✔
322
    logger.info("MHC alleles: %s", mhc_alleles)
3✔
323

324
    variants = variant_collection_from_args(args)
3✔
325
    logger.info("Variants: %s", variants)
3✔
326

327
    vaxrank_results = run_vaxrank_from_parsed_args(args)
3✔
328

329
    variants_count_dict = vaxrank_results.variant_counts()
3✔
330
    assert len(variants) == variants_count_dict['num_total_variants'], \
3✔
331
        "Len(variants) is %d but variants_count_dict came back with %d" % (
332
            len(variants), variants_count_dict['num_total_variants'])
333

334
    if args.output_passing_variants_csv:
3✔
335
        variant_metadata_dicts = vaxrank_results.variant_properties(
3✔
336
            gene_pathway_check=GenePathwayCheck())
337
        df = pd.DataFrame(variant_metadata_dicts)
3✔
338
        df.to_csv(args.output_passing_variants_csv, index=False)
3✔
339

340
    ranked_variants_with_vaccine_peptides = vaxrank_results.ranked_vaccine_peptides
3✔
341
    ranked_variants_with_vaccine_peptides_for_report = \
3✔
342
        ranked_variants_with_vaccine_peptides[:args.max_mutations_in_report]
343
    patient_info = PatientInfo(
3✔
344
        patient_id=args.output_patient_id,
345
        vcf_paths=variants.sources,
346
        bam_path=args.bam,
347
        mhc_alleles=mhc_alleles,
348
        num_somatic_variants=variants_count_dict['num_total_variants'],
349
        num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'],
350
        num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'],
351
        num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides']
352
    )
353
    # return variants, patient info, and command-line args
354
    data = {
3✔
355
        # TODO:
356
        #  change this field to 'ranked_variants_with_vaccine_peptides'
357
        #  but figure out how to do it in a backwards compatible way
358
        'variants': ranked_variants_with_vaccine_peptides_for_report,
359
        'patient_info': patient_info,
360
        'args': vars(args),
361
    }
362
    logger.info('About to save args: %s', data['args'])
3✔
363

364
    # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run,
365
    # most of which is core logic. the formatting is super fast, and it can
366
    # be useful to save the data to be able to iterate just on the formatting
367
    if args.output_json_file:
3✔
368
        with open(args.output_json_file, 'w') as f:
3✔
369
            f.write(serializable.to_json(data))
3✔
370
            logger.info('Wrote JSON report data to %s', args.output_json_file)
3✔
371

372
    return data
3✔
373

374
def configure_logging(args):
3✔
375
    logging.config.fileConfig(
3✔
376
        pkg_resources.resource_filename(
377
            __name__,
378
            'logging.conf'),
379
        defaults={'logfilename': args.log_path})
380

381
def choose_arg_parser(args_list):
3✔
382
    # TODO: replace this with a command sub-parser
383
    if "--input-json-file" in args_list:
3✔
UNCOV
384
        return cached_run_arg_parser()
×
385
    else:
386
        return make_vaxrank_arg_parser()
3✔
387

388
def parse_vaxrank_args(args_list):
3✔
389
    arg_parser = choose_arg_parser(args_list)
3✔
390
    return arg_parser.parse_args(args_list)
3✔
391

392
def main(args_list=None):
3✔
393
    """
394
    Script to generate vaccine peptide predictions from somatic cancer variants,
395
    patient HLA type, and tumor RNA-seq data.
396

397
    Example usage:
398
        vaxrank
399
            --vcf somatic.vcf \
400
            --bam rnaseq.bam \
401
            --vaccine-peptide-length 25 \
402
            --output-csv vaccine-peptides.csv
403
    """
404
    if args_list is None:
3✔
UNCOV
405
        args_list = sys.argv[1:]
×
406

407
    args = parse_vaxrank_args(args_list)
3✔
408
    configure_logging(args)
3✔
409
    logger.info(args)
3✔
410
    check_args(args)
3✔
411

412
    data = ranked_vaccine_peptides_with_metadata_from_parsed_args(args)
3✔
413

414
    ranked_variants_with_vaccine_peptides = data['variants']
3✔
415
    patient_info = data['patient_info']
3✔
416
    args_for_report = data['args']
3✔
417

418
    ###################
419
    # CSV-based reports
420
    ###################
421
    if args.output_csv or args.output_xlsx_report:
3✔
422
        make_csv_report(
3✔
423
            ranked_variants_with_vaccine_peptides,
424
            excel_report_path=args.output_xlsx_report,
425
            csv_report_path=args.output_csv)
426

427
    if args.output_neoepitope_report:
3✔
UNCOV
428
        make_minimal_neoepitope_report(
×
429
            ranked_variants_with_vaccine_peptides,
430
            num_epitopes_per_peptide=args.num_epitopes_per_vaccine_peptide,
431
            excel_report_path=args.output_neoepitope_report)
432

433
    ########################
434
    # Template-based reports
435
    ########################
436

437
    if not (args.output_ascii_report or args.output_html_report or args.output_pdf_report):
3✔
438
        return
3✔
439

440
    input_json_file = args.input_json_file if hasattr(args, 'input_json_file') else None
3✔
441
    template_data_creator = TemplateDataCreator(
3✔
442
        ranked_variants_with_vaccine_peptides=ranked_variants_with_vaccine_peptides,
443
        patient_info=patient_info,
444
        final_review=args.output_final_review,
445
        reviewers=args.output_reviewed_by,
446
        args_for_report=args_for_report,
447
        input_json_file=input_json_file,
448
        cosmic_vcf_filename=args.cosmic_vcf_filename)
449

450
    template_data = template_data_creator.compute_template_data()
3✔
451

452
    if args.output_ascii_report:
3✔
453
        make_ascii_report(
3✔
454
            template_data=template_data,
455
            ascii_report_path=args.output_ascii_report)
456

457
    if args.output_html_report:
3✔
458
        make_html_report(
3✔
459
            template_data=template_data,
460
            html_report_path=args.output_html_report)
461

462
    if args.output_pdf_report:
3✔
463
        make_pdf_report(
3✔
464
            template_data=template_data,
465
            pdf_report_path=args.output_pdf_report)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc