• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / cg / 13654168819

04 Mar 2025 12:54PM UTC coverage: 82.161%. First build
13654168819

Pull #4257

github

web-flow
Merge 6b252084f into 21d491f67
Pull Request #4257: Refactor Crunchy API

93 of 96 new or added lines in 7 files covered. (96.88%)

14683 of 17871 relevant lines covered (82.16%)

1.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.11
/cg/models/compression_data.py
1
"""Class to hold file information about a compression entity"""
2

3
import logging
1✔
4
import os
1✔
5
from datetime import date, datetime
1✔
6
from pathlib import Path
1✔
7

8
from cg.apps.crunchy.files import check_if_update_spring, get_crunchy_metadata, get_file_updated_at
1✔
9
from cg.apps.crunchy.models import CrunchyMetadata
1✔
10
from cg.constants import FASTQ_FIRST_READ_SUFFIX, FASTQ_SECOND_READ_SUFFIX, FileExtensions
1✔
11
from cg.constants.compression import PENDING_PATH_SUFFIX
1✔
12

13
LOG = logging.getLogger(__name__)
1✔
14

15

16
class CompressionData:
1✔
17
    """Holds information about compression data"""
18

19
    def __init__(self, stub: Path = None):
1✔
20
        """Initialise a compression data object
21

22
        The stub is first part of the file name
23
        """
24
        self.stub = stub
1✔
25
        self.stub_string = str(self.stub)
1✔
26

27
    @property
1✔
28
    def pending_path(self) -> Path:
1✔
29
        """Return the path to a compression pending file"""
30
        return self.stub.with_suffix(PENDING_PATH_SUFFIX)
1✔
31

32
    @property
1✔
33
    def spring_path(self) -> Path:
1✔
34
        """Return the path to a SPRING file"""
35
        return self.stub.with_suffix(FileExtensions.SPRING)
1✔
36

37
    @property
1✔
38
    def encrypted_spring_path(self) -> Path:
1✔
39
        """Return the path to a SPRING file"""
40
        return self.stub.with_suffix(FileExtensions.SPRING).with_suffix(FileExtensions.GPG)
×
41

42
    @property
1✔
43
    def spring_metadata_path(self) -> Path:
1✔
44
        """Return the path to a SPRING metadata file"""
45
        return self.stub.with_suffix(".json")
1✔
46

47
    @property
1✔
48
    def analysis_dir(self) -> Path:
1✔
49
        """Return the path to folder where analysis is"""
50
        return self.stub.resolve().parent
1✔
51

52
    @property
1✔
53
    def fastq_first(self) -> Path:
1✔
54
        """Return the path to the first read in pair"""
55
        return Path(self.stub_string + FASTQ_FIRST_READ_SUFFIX)
1✔
56

57
    @property
1✔
58
    def fastq_second(self) -> Path:
1✔
59
        """Return the path to the second read in pair"""
60
        return Path(self.stub_string + FASTQ_SECOND_READ_SUFFIX)
1✔
61

62
    @property
1✔
63
    def run_name(self) -> str:
1✔
64
        """Return the name of the sequencing run identifier"""
65
        return self.stub.name
1✔
66

67
    def pair_exists(self) -> bool:
1✔
68
        """Check that both files in FASTQ pair exists"""
69
        LOG.info("Check if FASTQ pair exists")
1✔
70
        if not self.file_exists_and_is_accessible(self.fastq_first):
1✔
71
            return False
1✔
72
        return bool(self.file_exists_and_is_accessible(self.fastq_second))
1✔
73

74
    @staticmethod
1✔
75
    def is_absolute(file_path: Path) -> bool:
1✔
76
        """Check if file path can be resolved"""
77
        if not file_path.is_absolute():
1✔
78
            LOG.info("Could not resolve full path from HK to %s", file_path)
×
79
            return False
×
80
        return True
1✔
81

82
    @staticmethod
1✔
83
    def file_exists_and_is_accessible(file_path: Path) -> bool:
1✔
84
        """Check if file exists and is accesible"""
85
        try:
1✔
86
            if not file_path.exists():
1✔
87
                LOG.info("%s does not exist", file_path)
1✔
88
                return False
1✔
89
        except PermissionError:
×
90
            LOG.warning("Not permitted to access %s. Skipping", file_path)
×
91
            return False
×
92
        return True
1✔
93

94
    @staticmethod
1✔
95
    def is_symlink(file_path: Path) -> bool:
1✔
96
        """Check if file path is symbolik link"""
97
        LOG.info("Check if %s is a symlink", file_path)
×
98
        return os.path.islink(file_path)
×
99

100
    @staticmethod
1✔
101
    def get_change_date(file_path: Path) -> datetime:
1✔
102
        """Return the time when this file was changed"""
103
        changed_date = datetime.fromtimestamp(file_path.stat().st_mtime)
1✔
104
        LOG.info("File %s was changed %s", file_path, changed_date)
1✔
105
        return changed_date
1✔
106

107
    def spring_exists(self) -> bool:
1✔
108
        """Check if the SPRING file exists"""
109
        LOG.info("Check if SPRING archive file exists")
1✔
110
        return self.file_exists_and_is_accessible(self.spring_path)
1✔
111

112
    def metadata_exists(self) -> bool:
1✔
113
        """Check if the SPRING metadata file exists"""
114
        LOG.info("Check if SPRING metadata file exists")
1✔
115
        return self.file_exists_and_is_accessible(self.spring_metadata_path)
1✔
116

117
    def pending_exists(self) -> bool:
1✔
118
        """Check if the SPRING pending flag file exists"""
119
        LOG.info("Check if pending compression file exists")
1✔
120
        return self.file_exists_and_is_accessible(self.pending_path)
1✔
121

122
    @property
1✔
123
    def is_compression_pending(self) -> bool:
1✔
124
        """Check if compression/decompression has started but not finished."""
125
        if self.pending_exists():
1✔
126
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
127
            return True
1✔
128
        LOG.info("Compression/decompression is not running")
1✔
129
        return False
1✔
130

131
    @property
1✔
132
    def is_fastq_compression_possible(self) -> bool:
1✔
133
        """Check if FASTQ compression is possible.
134

135
        - Compression is running          -> Compression NOT possible
136
        - SPRING file exists on Hasta     -> Compression NOT possible
137
        - Data is external                -> Compression NOT possible
138
        - Not compressed and not running  -> Compression IS possible
139
        """
140
        if self.is_compression_pending:
1✔
141
            return False
1✔
142

143
        if self.spring_exists():
1✔
144
            LOG.debug("SPRING file found")
1✔
145
            return False
1✔
146

147
        if "external-data" in str(self.fastq_first):
1✔
NEW
148
            LOG.debug("File is external data and should not be compressed")
×
NEW
149
            return False
×
150

151
        LOG.debug("FASTQ compression is possible")
1✔
152

153
        return True
1✔
154

155
    @property
1✔
156
    def is_spring_decompression_possible(self) -> bool:
1✔
157
        """Check if SPRING decompression is possible.
158

159
        There are three possible answers to this question:
160

161
            - Compression/Decompression is running      -> Decompression is NOT possible
162
            - The FASTQ files are not compressed        -> Decompression is NOT possible
163
            - Compression has been performed            -> Decompression IS possible
164

165
        """
166
        if self.pending_exists():
1✔
167
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
168
            return False
1✔
169

170
        if not self.spring_exists():
1✔
171
            LOG.info("No SPRING file found")
1✔
172
            return False
1✔
173

174
        if self.pair_exists():
1✔
175
            LOG.info("FASTQ files already exists")
1✔
176
            return False
1✔
177

178
        LOG.info("Decompression is possible")
1✔
179

180
        return True
1✔
181

182
    @property
1✔
183
    def is_fastq_compression_done(self) -> bool:
1✔
184
        """Check if FASTQ compression is finished.
185

186
        This is checked by controlling that the SPRING files that are produced after FASTQ
187
        compression exists.
188

189
        The following has to be fulfilled for FASTQ compression to be considered done:
190

191
            - A SPRING archive file exists
192
            - A SPRING archive metadata file exists
193
            - The SPRING archive has not been unpacked before FASTQ delta (21 days)
194

195
        Note:
196
        'updated_at' indicates at what date the SPRING archive was unarchived last.
197
        If the SPRING archive has never been unarchived 'updated_at' is None.
198

199
        """
200
        LOG.info("Check if FASTQ compression is finished")
1✔
201
        LOG.info(f"Check if SPRING file {self.spring_path} exists")
1✔
202
        if not self.spring_exists():
1✔
203
            LOG.info(
1✔
204
                f"No SPRING file for {self.run_name}",
205
            )
206
            return False
1✔
207
        LOG.info("SPRING file found")
1✔
208

209
        LOG.info(f"Check if SPRING metadata file {self.spring_metadata_path} exists")
1✔
210
        if not self.metadata_exists():
1✔
211
            LOG.info("No metadata file found")
1✔
212
            return False
1✔
213
        LOG.info("SPRING metadata file found")
1✔
214

215
        # We want this to raise exception if file is malformed
216
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(self.spring_metadata_path)
1✔
217

218
        # Check if the SPRING archive has been unarchived
219
        updated_at: date | None = get_file_updated_at(crunchy_metadata)
1✔
220
        if updated_at is None:
1✔
221
            LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
222
            return True
1✔
223

224
        LOG.info(f"Files where unpacked {updated_at}")
1✔
225

226
        if not check_if_update_spring(updated_at):
1✔
227
            return False
1✔
228

229
        LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
230

231
        return True
1✔
232

233
    @property
1✔
234
    def is_spring_decompression_done(self) -> bool:
1✔
235
        """Check if SPRING decompression if finished.
236

237
        This means that all three files specified in SPRING metadata should exist.
238
        That is
239

240
            - First read in FASTQ pair should exist
241
            - Second read in FASTQ pair should exist
242
            - SPRING archive file should still exist
243
        """
244

245
        spring_metadata_path: Path = self.spring_metadata_path
1✔
246
        LOG.info(f"Check if SPRING metadata file {spring_metadata_path} exists")
1✔
247

248
        if not self.metadata_exists():
1✔
249
            LOG.info("No SPRING metadata file found")
1✔
250
            return False
1✔
251

252
        # We want this to exit hard if the metadata is malformed
253
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(spring_metadata_path)
1✔
254

255
        for file_info in crunchy_metadata.files:
1✔
256
            if not Path(file_info.path).exists():
1✔
257
                LOG.info(f"File {file_info.path} does not exist")
1✔
258
                return False
1✔
259
            if not file_info.updated:
1✔
260
                LOG.info("Files have not been unarchived")
1✔
261
                return False
1✔
262

263
        LOG.info(f"SPRING decompression is done for run {self.run_name}")
1✔
264
        return True
1✔
265

266
    def __str__(self):
1✔
267
        return f"CompressionData(run:{self.run_name})"
×
268

269
    def __repr__(self):
1✔
270
        return f"CompressionData(stub:{self.stub})"
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc