• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

openvax / pyensembl / 8833745321

25 Apr 2024 01:44PM UTC coverage: 83.019% (-13.0%) from 96.024%
8833745321

push

github

iskandr
fixed pyensembl list with lower min version

2 of 2 new or added lines in 2 files covered. (100.0%)

211 existing lines in 11 files now uncovered.

1320 of 1590 relevant lines covered (83.02%)

2.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.82
/pyensembl/sequence_data.py
1
# Licensed under the Apache License, Version 2.0 (the "License");
2
# you may not use this file except in compliance with the License.
3
# You may obtain a copy of the License at
4
#
5
#     http://www.apache.org/licenses/LICENSE-2.0
6
#
7
# Unless required by applicable law or agreed to in writing, software
8
# distributed under the License is distributed on an "AS IS" BASIS,
9
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
# See the License for the specific language governing permissions and
11
# limitations under the License.
12

13
from os import remove
3✔
14
from os.path import exists, abspath, split, join
3✔
15
import logging
3✔
16
from collections import Counter
3✔
17
import pickle
3✔
18
from .common import load_pickle, dump_pickle
3✔
19
from .fasta import parse_fasta_dictionary
3✔
20

21

22
logger = logging.getLogger(__name__)
3✔
23

24

25
class SequenceData(object):
3✔
26
    """
27
    Container for reference nucleotide and amino acid sequenes.
28
    """
29

30
    def __init__(self, fasta_paths, cache_directory_path=None):
3✔
31
        if type(fasta_paths) is str:
3✔
32
            fasta_paths = [fasta_paths]
×
33

34
        self.fasta_paths = [abspath(path) for path in fasta_paths]
3✔
35
        self.fasta_directory_paths = [split(path)[0] for path in self.fasta_paths]
3✔
36
        self.fasta_filenames = [split(path)[1] for path in self.fasta_paths]
3✔
37
        if cache_directory_path:
3✔
38
            self.cache_directory_paths = [cache_directory_path] * len(self.fasta_paths)
3✔
39
        else:
40
            self.cache_directory_paths = self.fasta_directory_paths
3✔
41
        for path in self.fasta_paths:
3✔
42
            if not exists(path):
3✔
43
                raise ValueError("Couldn't find FASTA file %s" % (path,))
×
44
        self.fasta_dictionary_filenames = [
3✔
45
            filename + ".pickle" for filename in self.fasta_filenames
46
        ]
47
        self.fasta_dictionary_pickle_paths = [
3✔
48
            join(cache_path, filename)
49
            for cache_path, filename in zip(
50
                self.cache_directory_paths, self.fasta_dictionary_filenames
51
            )
52
        ]
53
        self._init_lazy_fields()
3✔
54

55
    def _init_lazy_fields(self):
3✔
56
        self._fasta_dictionary = None
3✔
57
        self._fasta_keys = None
3✔
58

59
    def clear_cache(self):
3✔
60
        self._init_lazy_fields()
3✔
61
        for path in self.fasta_dictionary_pickle_paths:
3✔
62
            if exists(path):
3✔
63
                remove(path)
3✔
64

65
    def __str__(self):
3✔
66
        return "SequenceData(fasta_paths=%s)" % (self.fasta_paths,)
×
67

68
    def __repr__(self):
3✔
69
        return str(self)
×
70

71
    def __contains__(self, sequence_id):
3✔
72
        if self._fasta_keys is None:
×
73
            self._fasta_keys = set(self.fasta_dictionary.keys())
×
74
        return sequence_id in self._fasta_keys
×
75

76
    def __eq__(self, other):
3✔
77
        # test to see if self.fasta_paths and other.fasta_paths contain
78
        # the same list of paths, regardless of order
UNCOV
79
        return (other.__class__ is SequenceData) and Counter(
×
80
            self.fasta_paths
81
        ) == Counter(other.fasta_paths)
82

83
    def __hash__(self):
3✔
84
        return hash(self.fasta_paths)
×
85

86
    def _add_to_fasta_dictionary(self, fasta_dictionary_tmp):
3✔
87
        for identifier, sequence in fasta_dictionary_tmp.items():
3✔
88
            if identifier in self._fasta_dictionary:
3✔
UNCOV
89
                logger.warn(
×
90
                    "Sequence identifier %s is duplicated in your FASTA files!"
91
                    % identifier
92
                )
93
                continue
×
94
            self._fasta_dictionary[identifier] = sequence
3✔
95

96
    def _load_or_create_fasta_dictionary_pickle(self):
3✔
97
        self._fasta_dictionary = dict()
3✔
98
        for fasta_path, pickle_path in zip(
3✔
99
            self.fasta_paths, self.fasta_dictionary_pickle_paths
100
        ):
101
            if exists(pickle_path):
3✔
102
                # try loading the cached file
103
                # but we'll fall back on recreating it if loading fails
104
                try:
3✔
105
                    fasta_dictionary_tmp = load_pickle(pickle_path)
3✔
106
                    self._add_to_fasta_dictionary(fasta_dictionary_tmp)
3✔
107
                    logger.info("Loaded sequence dictionary from %s", pickle_path)
3✔
108
                    continue
3✔
UNCOV
109
                except (pickle.UnpicklingError, AttributeError):
×
110
                    # catch either an UnpicklingError or an AttributeError
111
                    # resulting from pickled objects refering to classes
112
                    # that no longer exists
UNCOV
113
                    logger.warn(
×
114
                        "Failed to load %s, attempting to read FASTA directly",
115
                        pickle_path,
116
                    )
117
            logger.info("Parsing sequences from FASTA file at %s", fasta_path)
3✔
118

119
            fasta_dictionary_tmp = parse_fasta_dictionary(fasta_path)
3✔
120
            self._add_to_fasta_dictionary(fasta_dictionary_tmp)
3✔
121
            logger.info("Saving sequence dictionary to %s", pickle_path)
3✔
122
            dump_pickle(fasta_dictionary_tmp, pickle_path)
3✔
123

124
    def index(self, overwrite=False):
3✔
125
        if overwrite:
3✔
UNCOV
126
            self.clear_cache()
×
127
        self._load_or_create_fasta_dictionary_pickle()
3✔
128

129
    @property
3✔
130
    def fasta_dictionary(self):
3✔
131
        if not self._fasta_dictionary:
3✔
132
            self._load_or_create_fasta_dictionary_pickle()
3✔
133
        return self._fasta_dictionary
3✔
134

135
    def get(self, sequence_id):
3✔
136
        """Get sequence associated with given ID or return None if missing"""
137
        return self.fasta_dictionary.get(sequence_id)
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc