• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

openvax / varcode / 9781104697

03 Jul 2024 03:56PM UTC coverage: 88.33% (-6.4%) from 94.753%
9781104697

Pull #253

github

kodysy02
Defaulting convert_ucsc_contig_names to True for consistency with past behavior
Pull Request #253: Defaulting convert_ucsc_contig_names to True for consistency with past behavior

1 of 1 new or added line in 1 file covered. (100.0%)

144 existing lines in 17 files now uncovered.

1544 of 1748 relevant lines covered (88.33%)

2.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.3
/varcode/effects/effect_prediction_coding.py
1
# Licensed under the Apache License, Version 2.0 (the "License");
2
# you may not use this file except in compliance with the License.
3
# You may obtain a copy of the License at
4
#
5
#     http://www.apache.org/licenses/LICENSE-2.0
6
#
7
# Unless required by applicable law or agreed to in writing, software
8
# distributed under the License is distributed on an "AS IS" BASIS,
9
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
# See the License for the specific language governing permissions and
11
# limitations under the License.
12

13
from .effect_prediction_coding_frameshift import predict_frameshift_coding_effect
3✔
14
from .effect_prediction_coding_in_frame import predict_in_frame_coding_effect
3✔
15

16

17
def predict_variant_coding_effect_on_transcript(
3✔
18
        variant,
19
        transcript,
20
        trimmed_cdna_ref,
21
        trimmed_cdna_alt,
22
        transcript_offset):
23
    """
24
    Given a minimal cDNA ref/alt nucleotide string pair and an offset into a
25
    given transcript, determine the coding effect of this nucleotide substitution
26
    onto the translated protein.
27

28
    Parameters
29
    ----------
30
    variant : Variant
31

32
    transcript : Transcript
33

34
    trimmed_cdna_ref : str
35
        Reference nucleotides we expect to find in the transcript's CDS
36

37
    trimmed_cdna_alt : str
38
        Alternate nucleotides we're replacing the reference with
39

40
    transcript_offset : int
41
        Offset into the full transcript sequence of the ref->alt substitution
42
    """
43
    if not transcript.complete:
3✔
UNCOV
44
        raise ValueError(
×
45
            ("Can't annotate coding effect for %s"
46
             " on incomplete transcript %s" % (variant, transcript)))
47

48
    sequence = transcript.sequence
3✔
49

50
    n_ref = len(trimmed_cdna_ref)
3✔
51
    n_alt = len(trimmed_cdna_alt)
3✔
52

53
    # reference nucleotides found on the transcript, if these don't match
54
    # what we were told to expect from the variant then raise an exception
55
    ref_nucleotides_from_transcript = str(
3✔
56
        sequence[transcript_offset:transcript_offset + n_ref])
57

58
    # Make sure that the reference sequence agrees with what we expected
59
    # from the VCF
60
    assert ref_nucleotides_from_transcript == trimmed_cdna_ref, \
3✔
61
        "%s: expected ref '%s' at offset %d of %s, transcript has '%s'" % (
62
            variant,
63
            trimmed_cdna_ref,
64
            transcript_offset,
65
            transcript,
66
            ref_nucleotides_from_transcript)
67

68
    start_codon_offset = transcript.first_start_codon_spliced_offset
3✔
69
    stop_codon_offset = transcript.last_stop_codon_spliced_offset
3✔
70

71
    cds_len = stop_codon_offset - start_codon_offset + 1
3✔
72

73
    if cds_len < 3:
3✔
UNCOV
74
        raise ValueError(
×
75
            "Coding sequence for %s is too short: '%s'" % (
76
                transcript,
77
                transcript.sequence[start_codon_offset:stop_codon_offset + 1]))
78

79
    if n_ref == 0 and transcript.strand == "-":
3✔
80
        # By convention, genomic insertions happen *after* their base 1 position on
81
        # a chromosome. On the reverse strand, however, an insertion has to go
82
        # before the nucleotide at some transcript offset.
83
        # Example:
84
        #    chromosome sequence:
85
        #        TTT|GATCTCGTA|CCC
86
        #    transcript on reverse strand:
87
        #        CCC|ATGCTCTAG|TTT
88
        #    where the CDS is emphasized:
89
        #            ATGCTCTAG
90
        # If we have a genomic insertion g.6insATT
91
        # the genomic sequence becomes:
92
        #       TTT|GAT_ATT_CTCGTA|CCC
93
        # (insert the "ATT" after the "T" at position 6)
94
        # On the reverse strand this becomes:
95
        #       CCC|ATGCTC_TTA_TAG|TTT
96
        # (insert the "ATT" *before* the "T" at position 10)
97
        #
98
        # To preserve the interpretation of the start offset as the base
99
        # before the insertion, need to subtract one
100
        cds_offset = transcript_offset - start_codon_offset - 1
3✔
101
    else:
102
        cds_offset = transcript_offset - start_codon_offset
3✔
103

104
    assert cds_offset < cds_len, \
3✔
105
        "Expected CDS offset (%d) < |CDS| (%d) for %s on %s" % (
106
            cds_offset, cds_len, variant, transcript)
107

108
    sequence_from_start_codon = str(sequence[start_codon_offset:])
3✔
109

110
    # is this an in-frame mutations?
111
    if (n_ref - n_alt) % 3 == 0:
3✔
112
        return predict_in_frame_coding_effect(
3✔
113
            variant=variant,
114
            transcript=transcript,
115
            trimmed_cdna_ref=trimmed_cdna_ref,
116
            trimmed_cdna_alt=trimmed_cdna_alt,
117
            cds_offset=cds_offset,
118
            sequence_from_start_codon=sequence_from_start_codon)
119
    else:
120
        return predict_frameshift_coding_effect(
3✔
121
            variant=variant,
122
            transcript=transcript,
123
            trimmed_cdna_ref=trimmed_cdna_ref,
124
            trimmed_cdna_alt=trimmed_cdna_alt,
125
            cds_offset=cds_offset,
126
            sequence_from_start_codon=sequence_from_start_codon)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc