• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / cg / 13591285535

28 Feb 2025 03:29PM UTC coverage: 85.639%. First build
13591285535

Pull #4257

github

web-flow
Merge ffcbfc4bb into 7b01f19f6
Pull Request #4257: Refactor Crunchy API

87 of 90 new or added lines in 3 files covered. (96.67%)

26083 of 30457 relevant lines covered (85.64%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.11
/cg/models/compression_data.py
1
"""Class to hold file information about a compression entity"""
2

3
import logging
1✔
4
import os
1✔
5
from datetime import date, datetime
1✔
6
from pathlib import Path
1✔
7

8
from cg.apps.crunchy.files import check_if_update_spring, get_crunchy_metadata, get_file_updated_at
1✔
9
from cg.apps.crunchy.models import CrunchyMetadata
1✔
10
from cg.constants import FASTQ_FIRST_READ_SUFFIX, FASTQ_SECOND_READ_SUFFIX, FileExtensions
1✔
11
from cg.constants.compression import PENDING_PATH_SUFFIX
1✔
12

13
LOG = logging.getLogger(__name__)
1✔
14

15

16
class CompressionData:
1✔
17
    """Holds information about compression data"""
18

19
    def __init__(self, stub: Path = None):
1✔
20
        """Initialise a compression data object
21

22
        The stub is first part of the file name
23
        """
24
        self.stub = stub
1✔
25
        self.stub_string = str(self.stub)
1✔
26

27
    @property
1✔
28
    def pending_path(self) -> Path:
1✔
29
        """Return the path to a compression pending file"""
30
        return self.stub.with_suffix(PENDING_PATH_SUFFIX)
1✔
31

32
    @property
1✔
33
    def spring_path(self) -> Path:
1✔
34
        """Return the path to a SPRING file"""
35
        return self.stub.with_suffix(FileExtensions.SPRING)
1✔
36

37
    @property
1✔
38
    def encrypted_spring_path(self) -> Path:
1✔
39
        """Return the path to a SPRING file"""
40
        return self.stub.with_suffix(FileExtensions.SPRING).with_suffix(FileExtensions.GPG)
×
41

42
    @property
1✔
43
    def spring_metadata_path(self) -> Path:
1✔
44
        """Return the path to a SPRING metadata file"""
45
        return self.stub.with_suffix(".json")
1✔
46

47
    @property
1✔
48
    def analysis_dir(self) -> Path:
1✔
49
        """Return the path to folder where analysis is"""
50
        return self.stub.resolve().parent
1✔
51

52
    @property
1✔
53
    def fastq_first(self) -> Path:
1✔
54
        """Return the path to the first read in pair"""
55
        return Path(self.stub_string + FASTQ_FIRST_READ_SUFFIX)
1✔
56

57
    @property
1✔
58
    def fastq_second(self) -> Path:
1✔
59
        """Return the path to the second read in pair"""
60
        return Path(self.stub_string + FASTQ_SECOND_READ_SUFFIX)
1✔
61

62
    @property
1✔
63
    def run_name(self) -> str:
1✔
64
        """Return the name of the sequencing run identifier"""
65
        return self.stub.name
1✔
66

67
    def pair_exists(self) -> bool:
1✔
68
        """Check that both files in FASTQ pair exists"""
69
        LOG.info("Check if FASTQ pair exists")
1✔
70
        if not self.file_exists_and_is_accessible(self.fastq_first):
1✔
71
            return False
1✔
72
        return bool(self.file_exists_and_is_accessible(self.fastq_second))
1✔
73

74
    @staticmethod
1✔
75
    def is_absolute(file_path: Path) -> bool:
1✔
76
        """Check if file path can be resolved"""
77
        if not file_path.is_absolute():
1✔
78
            LOG.info("Could not resolve full path from HK to %s", file_path)
×
79
            return False
×
80
        return True
1✔
81

82
    @staticmethod
1✔
83
    def file_exists_and_is_accessible(file_path: Path) -> bool:
1✔
84
        """Check if file exists and is accesible"""
85
        try:
1✔
86
            if not file_path.exists():
1✔
87
                LOG.info("%s does not exist", file_path)
1✔
88
                return False
1✔
89
        except PermissionError:
×
90
            LOG.warning("Not permitted to access %s. Skipping", file_path)
×
91
            return False
×
92
        return True
1✔
93

94
    @staticmethod
1✔
95
    def is_symlink(file_path: Path) -> bool:
1✔
96
        """Check if file path is symbolik link"""
97
        LOG.info("Check if %s is a symlink", file_path)
×
98
        return os.path.islink(file_path)
×
99

100
    @staticmethod
1✔
101
    def get_change_date(file_path: Path) -> datetime:
1✔
102
        """Return the time when this file was changed"""
103
        changed_date = datetime.fromtimestamp(file_path.stat().st_mtime)
1✔
104
        LOG.info("File %s was changed %s", file_path, changed_date)
1✔
105
        return changed_date
1✔
106

107
    def spring_exists(self) -> bool:
1✔
108
        """Check if the SPRING file exists"""
109
        LOG.info("Check if SPRING archive file exists")
1✔
110
        return self.file_exists_and_is_accessible(self.spring_path)
1✔
111

112
    def metadata_exists(self) -> bool:
1✔
113
        """Check if the SPRING metadata file exists"""
114
        LOG.info("Check if SPRING metadata file exists")
1✔
115
        return self.file_exists_and_is_accessible(self.spring_metadata_path)
1✔
116

117
    def pending_exists(self) -> bool:
1✔
118
        """Check if the SPRING pending flag file exists"""
119
        LOG.info("Check if pending compression file exists")
1✔
120
        return self.file_exists_and_is_accessible(self.pending_path)
1✔
121

122
    @property
1✔
123
    def is_compression_pending(self) -> bool:
1✔
124
        """Check if compression/decompression has started but not finished."""
125
        if self.pending_exists():
1✔
126
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
127
            return True
1✔
128
        LOG.info("Compression/decompression is not running")
1✔
129
        return False
1✔
130

131
    @property
1✔
132
    def is_fastq_compression_possible(self) -> bool:
1✔
133
        """Check if FASTQ compression is possible.
134

135
        - Compression is running          -> Compression NOT possible
136
        - SPRING file exists on Hasta     -> Compression NOT possible
137
        - Data is external                -> Compression NOT possible
138
        - Not compressed and
139
           not running  -> Compression IS possible
140
        """
141
        if self.is_compression_pending:
1✔
142
            return False
1✔
143

144
        if self.spring_exists():
1✔
145
            LOG.debug("SPRING file found")
1✔
146
            return False
1✔
147

148
        if "external-data" in str(self.fastq_first):
1✔
NEW
149
            LOG.debug("File is external data and should not be compressed")
×
NEW
150
            return False
×
151

152
        LOG.debug("FASTQ compression is possible")
1✔
153

154
        return True
1✔
155

156
    @property
1✔
157
    def is_spring_decompression_possible(self) -> bool:
1✔
158
        """Check if SPRING decompression is possible.
159

160
        There are three possible answers to this question:
161

162
            - Compression/Decompression is running      -> Decompression is NOT possible
163
            - The FASTQ files are not compressed        -> Decompression is NOT possible
164
            - Compression has been performed            -> Decompression IS possible
165

166
        """
167
        if self.pending_exists():
1✔
168
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
169
            return False
1✔
170

171
        if not self.spring_exists():
1✔
172
            LOG.info("No SPRING file found")
1✔
173
            return False
1✔
174

175
        if self.pair_exists():
1✔
176
            LOG.info("FASTQ files already exists")
1✔
177
            return False
1✔
178

179
        LOG.info("Decompression is possible")
1✔
180

181
        return True
1✔
182

183
    @property
1✔
184
    def is_fastq_compression_done(self) -> bool:
1✔
185
        """Check if FASTQ compression is finished.
186

187
        This is checked by controlling that the SPRING files that are produced after FASTQ
188
        compression exists.
189

190
        The following has to be fulfilled for FASTQ compression to be considered done:
191

192
            - A SPRING archive file exists
193
            - A SPRING archive metadata file exists
194
            - The SPRING archive has not been unpacked before FASTQ delta (21 days)
195

196
        Note:
197
        'updated_at' indicates at what date the SPRING archive was unarchived last.
198
        If the SPRING archive has never been unarchived 'updated_at' is None.
199

200
        """
201
        LOG.info("Check if FASTQ compression is finished")
1✔
202
        LOG.info(f"Check if SPRING file {self.spring_path} exists")
1✔
203
        if not self.spring_exists():
1✔
204
            LOG.info(
1✔
205
                f"No SPRING file for {self.run_name}",
206
            )
207
            return False
1✔
208
        LOG.info("SPRING file found")
1✔
209

210
        LOG.info(f"Check if SPRING metadata file {self.spring_metadata_path} exists")
1✔
211
        if not self.metadata_exists():
1✔
212
            LOG.info("No metadata file found")
1✔
213
            return False
1✔
214
        LOG.info("SPRING metadata file found")
1✔
215

216
        # We want this to raise exception if file is malformed
217
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(self.spring_metadata_path)
1✔
218

219
        # Check if the SPRING archive has been unarchived
220
        updated_at: date | None = get_file_updated_at(crunchy_metadata)
1✔
221
        if updated_at is None:
1✔
222
            LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
223
            return True
1✔
224

225
        LOG.info(f"Files where unpacked {updated_at}")
1✔
226

227
        if not check_if_update_spring(updated_at):
1✔
228
            return False
1✔
229

230
        LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
231

232
        return True
1✔
233

234
    @property
1✔
235
    def is_spring_decompression_done(self) -> bool:
1✔
236
        """Check if SPRING decompression if finished.
237

238
        This means that all three files specified in SPRING metadata should exist.
239
        That is
240

241
            - First read in FASTQ pair should exist
242
            - Second read in FASTQ pair should exist
243
            - SPRING archive file should still exist
244
        """
245

246
        spring_metadata_path: Path = self.spring_metadata_path
1✔
247
        LOG.info(f"Check if SPRING metadata file {spring_metadata_path} exists")
1✔
248

249
        if not self.metadata_exists():
1✔
250
            LOG.info("No SPRING metadata file found")
1✔
251
            return False
1✔
252

253
        # We want this to exit hard if the metadata is malformed
254
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(spring_metadata_path)
1✔
255

256
        for file_info in crunchy_metadata.files:
1✔
257
            if not Path(file_info.path).exists():
1✔
258
                LOG.info(f"File {file_info.path} does not exist")
1✔
259
                return False
1✔
260
            if not file_info.updated:
1✔
261
                LOG.info("Files have not been unarchived")
1✔
262
                return False
1✔
263

264
        LOG.info(f"SPRING decompression is done for run {self.run_name}")
1✔
265
        return True
1✔
266

267
    def __str__(self):
1✔
268
        return f"CompressionData(run:{self.run_name})"
×
269

270
    def __repr__(self):
1✔
271
        return f"CompressionData(stub:{self.stub})"
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc