• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / cg / 9677298989

26 Jun 2024 09:38AM UTC coverage: 84.621%. First build
9677298989

Pull #3360

github

web-flow
Merge 58362c1a3 into 997c86953
Pull Request #3360: rework(backup)

87 of 97 new or added lines in 7 files covered. (89.69%)

20596 of 24339 relevant lines covered (84.62%)

0.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.69
/cg/cli/backup.py
1
"""Backup related CLI commands."""
2

3
import logging
1✔
4
from pathlib import Path
1✔
5
from typing import Iterable
1✔
6

7
import click
1✔
8
import housekeeper.store.models as hk_models
1✔
9

10
from cg.apps.housekeeper.hk import HousekeeperAPI
1✔
11
from cg.apps.slurm.slurm_api import SlurmAPI
1✔
12
from cg.cli.utils import CLICK_CONTEXT_SETTINGS
1✔
13
from cg.constants.cli_options import DRY_RUN
1✔
14
from cg.constants.constants import FlowCellStatus
1✔
15
from cg.constants.housekeeper_tags import SequencingFileTag
1✔
16
from cg.exc import (
1✔
17
    DsmcAlreadyRunningError,
18
    IlluminaRunAlreadyBackedUpError,
19
    IlluminaRunEncryptionError,
20
    FlowCellError,
21
    PdcError,
22
)
23
from cg.meta.backup.backup import SpringBackupAPI
1✔
24
from cg.services.illumina_services.backup_services.backup_service import IlluminaBackupService
1✔
25
from cg.services.pdc_service.pdc_service import PdcService
1✔
26
from cg.meta.encryption.encryption import (
1✔
27
    EncryptionAPI,
28
    SpringEncryptionAPI,
29
)
30
from cg.services.illumina_services.backup_services.encrypt_service import (
1✔
31
    IlluminaRunEncryptionService,
32
)
33
from cg.meta.tar.tar import TarAPI
1✔
34
from cg.models.cg_config import CGConfig
1✔
35
from cg.models.run_devices.illumina_run_directory_data import (
1✔
36
    IlluminaRunDirectoryData,
37
    get_sequencing_runs_from_path,
38
)
39
from cg.store.models import Flowcell, Sample, IlluminaSequencingRun
1✔
40
from cg.store.store import Store
1✔
41

42
LOG = logging.getLogger(__name__)
1✔
43

44

45
@click.group(context_settings=CLICK_CONTEXT_SETTINGS)
1✔
46
@click.pass_obj
1✔
47
def backup(context: CGConfig):
1✔
48
    """Backup utilities"""
49
    pass
×
50

51

52
@backup.command("illumina-runs")
1✔
53
@DRY_RUN
1✔
54
@click.pass_obj
1✔
55
def backup_illumina_runs(context: CGConfig, dry_run: bool):
1✔
56
    """Back-up illumina runs."""
57
    pdc_service = context.pdc_service
1✔
58
    pdc_service.dry_run = dry_run
1✔
59
    encryption_api = EncryptionAPI(binary_path=context.encryption.binary_path, dry_run=dry_run)
1✔
60
    tar_api = TarAPI(binary_path=context.tar.binary_path, dry_run=dry_run)
1✔
61
    backup_service = IlluminaBackupService(
1✔
62
        encryption_api=encryption_api,
63
        pdc_archiving_directory=context.illumina_backup_service.pdc_archiving_directory,
64
        status_db=context.status_db,
65
        tar_api=tar_api,
66
        pdc_service=pdc_service,
67
        sequencing_runs_dir=context.run_instruments.illumina.sequencing_runs_dir,
68
        dry_run=dry_run,
69
    )
70
    backup_service.dry_run = dry_run
1✔
71
    status_db: Store = context.status_db
1✔
72
    runs_dir_data: list[IlluminaRunDirectoryData] = get_sequencing_runs_from_path(
1✔
73
        sequencing_run_dir=Path(context.run_instruments.illumina.sequencing_runs_dir)
74
    )
75
    for run_dir_data in runs_dir_data:
1✔
76
        sequencing_run: IlluminaSequencingRun | None = (
1✔
77
            status_db.get_illumina_sequencing_run_by_device_internal_id(run_dir_data.id)
78
        )
79
        try:
1✔
80
            backup_service.start_run_backup(
1✔
81
                run_dir_data=run_dir_data,
82
                sequencing_run=sequencing_run,
83
                status_db=status_db,
84
                binary_path=context.encryption.binary_path,
85
                encryption_dir=Path(context.encryption.encryption_dir),
86
                pigz_binary_path=context.pigz.binary_path,
87
                sbatch_parameter=context.illumina_backup_service.slurm_flow_cell_encryption.dict(),
88
            )
89
        except (
1✔
90
            DsmcAlreadyRunningError,
91
            IlluminaRunAlreadyBackedUpError,
92
            IlluminaRunEncryptionError,
93
            PdcError,
94
        ) as error:
95
            logging.error(f"{error}")
1✔
96

97

98
@backup.command("encrypt-illumina-runs")
1✔
99
@DRY_RUN
1✔
100
@click.pass_obj
1✔
101
def encrypt_illumina_runs(context: CGConfig, dry_run: bool):
1✔
102
    """Encrypt illumina runs."""
103
    status_db: Store = context.status_db
1✔
104
    runs: list[IlluminaRunDirectoryData] = get_sequencing_runs_from_path(
1✔
105
        sequencing_run_dir=Path(context.run_instruments.illumina.sequencing_runs_dir)
106
    )
107
    for run in runs:
1✔
108
        sequencing_run: IlluminaSequencingRun | None = (
1✔
109
            status_db.get_illumina_sequencing_run_by_device_internal_id(run.id)
110
        )
111
        if sequencing_run and sequencing_run.has_backup:
1✔
112
            LOG.debug(f"Run: {run.id} is already backed-up")
1✔
113
            continue
1✔
114
        illumina_run_encryption_service = IlluminaRunEncryptionService(
1✔
115
            binary_path=context.encryption.binary_path,
116
            dry_run=dry_run,
117
            encryption_dir=Path(context.encryption.encryption_dir),
118
            run_dir_data=run,
119
            pigz_binary_path=context.pigz.binary_path,
120
            slurm_api=SlurmAPI(),
121
            sbatch_parameter=context.illumina_backup_service.slurm_flow_cell_encryption.dict(),
122
            tar_api=TarAPI(binary_path=context.tar.binary_path, dry_run=dry_run),
123
        )
124
        try:
1✔
125
            illumina_run_encryption_service.start_encryption()
1✔
126
        except (FlowCellError, IlluminaRunEncryptionError) as error:
1✔
127
            logging.error(f"{error}")
1✔
128

129

130
@backup.command("fetch-illumina-run")
1✔
131
@click.option("-f", "--flow-cell-id", help="Retrieve a specific flow cell, ex. 'HCK2KDSXX'")
1✔
132
@DRY_RUN
1✔
133
@click.pass_obj
1✔
134
def fetch_illumina_run(context: CGConfig, dry_run: bool, flow_cell_id: str | None = None):
1✔
135
    """Fetch the first Illumina run in the requested queue from backup"""
136

137
    pdc_service = context.pdc_service
1✔
138
    pdc_service.dry_run = dry_run
1✔
139
    encryption_api = EncryptionAPI(binary_path=context.encryption.binary_path, dry_run=dry_run)
1✔
140
    tar_api = TarAPI(binary_path=context.tar.binary_path, dry_run=dry_run)
1✔
141
    context.meta_apis["backup_api"] = IlluminaBackupService(
1✔
142
        encryption_api=encryption_api,
143
        pdc_archiving_directory=context.illumina_backup_service.pdc_archiving_directory,
144
        status_db=context.status_db,
145
        tar_api=tar_api,
146
        pdc_service=pdc_service,
147
        sequencing_runs_dir=context.run_instruments.illumina.sequencing_runs_dir,
148
        dry_run=dry_run,
149
    )
150
    backup_api: IlluminaBackupService = context.meta_apis["backup_api"]
1✔
151

152
    status_db: Store = context.status_db
1✔
153
    sequencing_run: IlluminaSequencingRun | None = (
1✔
154
        status_db.get_illumina_sequencing_run_by_device_internal_id(flow_cell_id)
155
        if flow_cell_id
156
        else None
157
    )
158

159
    if not sequencing_run and flow_cell_id:
1✔
160
        LOG.error(f"{flow_cell_id}: not found in database")
1✔
161
        raise click.Abort
1✔
162

163
    if not flow_cell_id:
1✔
164
        LOG.info("Fetching first sequencing run in queue")
1✔
165

166
    retrieval_time: float | None = backup_api.fetch_sequencing_run(sequencing_run=sequencing_run)
1✔
167

168
    if retrieval_time:
1✔
169
        hours = retrieval_time / 60 / 60
1✔
170
        LOG.info(f"Retrieval time: {hours:.1}h")
1✔
171
        return
1✔
172

173
    if not dry_run and sequencing_run:
1✔
NEW
174
        LOG.info(f"{sequencing_run}: updating flow cell status to {FlowCellStatus.REQUESTED}")
×
NEW
175
        status_db.update_illumina_sequencing_run_availability(
×
176
            sequencing_run=sequencing_run, data_availability=FlowCellStatus.REQUESTED
177
        )
178

179

180
@backup.command("archive-spring-files")
1✔
181
@DRY_RUN
1✔
182
@click.pass_context
1✔
183
@click.pass_obj
1✔
184
def archive_spring_files(config: CGConfig, context: click.Context, dry_run: bool):
1✔
185
    """Archive spring files to PDC"""
186
    housekeeper_api: HousekeeperAPI = config.housekeeper_api
×
187
    LOG.info("Getting all spring files from Housekeeper.")
×
188
    spring_files: Iterable[hk_models.File] = housekeeper_api.files(
×
189
        tags=[SequencingFileTag.SPRING]
190
    ).filter(hk_models.File.path.contains(f"{config.environment}/{config.demultiplex.out_dir}"))
191
    for spring_file in spring_files:
×
192
        LOG.info(f"Attempting encryption and PDC archiving for file {spring_file.path}")
×
193
        if Path(spring_file.path).exists():
×
194
            context.invoke(archive_spring_file, spring_file_path=spring_file.path, dry_run=dry_run)
×
195
        else:
196
            LOG.warning(
×
197
                f"Spring file {spring_file.path} found in Housekeeper, but not on disk! Archiving process skipped!"
198
            )
199

200

201
@backup.command("archive-spring-file")
1✔
202
@click.argument("spring-file-path", type=click.Path(exists=True))
1✔
203
@DRY_RUN
1✔
204
@click.pass_obj
1✔
205
def archive_spring_file(config: CGConfig, spring_file_path: str, dry_run: bool):
1✔
206
    """Archive a spring file to PDC"""
207
    housekeeper_api: HousekeeperAPI = config.housekeeper_api
×
208
    pdc_service: PdcService = PdcService(binary_path=config.pdc.binary_path, dry_run=dry_run)
×
209
    encryption_api: SpringEncryptionAPI = SpringEncryptionAPI(
×
210
        binary_path=config.encryption.binary_path,
211
        dry_run=dry_run,
212
    )
213
    spring_backup_api: SpringBackupAPI = SpringBackupAPI(
×
214
        encryption_api=encryption_api,
215
        hk_api=housekeeper_api,
216
        pdc_service=pdc_service,
217
        dry_run=dry_run,
218
    )
219
    LOG.debug("Start spring encryption/backup")
×
220
    spring_backup_api.encrypt_and_archive_spring_file(Path(spring_file_path))
×
221

222

223
@backup.command("retrieve-spring-files")
1✔
224
@DRY_RUN
1✔
225
@click.option("-s", "--sample-id", "object_type", flag_value="sample", type=str)
1✔
226
@click.option("-c", "--case-id", "object_type", flag_value="case", type=str)
1✔
227
@click.option("-f", "--flow-cell-id", "object_type", flag_value="run_devices", type=str)
1✔
228
@click.argument("identifier", type=str)
1✔
229
@click.pass_context
1✔
230
@click.pass_obj
1✔
231
def retrieve_spring_files(
1✔
232
    config: CGConfig,
233
    context: click.Context,
234
    object_type: str,
235
    identifier: str,
236
    dry_run: bool,
237
):
238
    """Retrieve all spring files for a given identity"""
239
    status_api: Store = config.status_db
×
240
    housekeeper_api: HousekeeperAPI = config.housekeeper_api
×
241

242
    samples: list[Sample] = _get_samples(status_api, object_type, identifier)
×
243

244
    for sample in samples:
×
245
        latest_version: hk_models.Version = housekeeper_api.last_version(bundle=sample.internal_id)
×
246
        spring_files: Iterable[hk_models.File] = housekeeper_api.files(
×
247
            bundle=sample.internal_id,
248
            tags=[SequencingFileTag.SPRING],
249
            version=latest_version.id,
250
        )
251
        for spring_file in spring_files:
×
252
            context.invoke(retrieve_spring_file, spring_file_path=spring_file.path, dry_run=dry_run)
×
253

254

255
def _get_samples(status_api: Store, object_type: str, identifier: str) -> list[Sample]:
1✔
256
    """Gets all samples belonging to a sample, case or flow cell id"""
257
    get_samples = {
×
258
        "sample": status_api.sample,
259
        "case": status_api.get_samples_by_case_id,
260
        "flow_cell": status_api.get_samples_from_flow_cell,
261
    }
262
    samples: Sample | list[Sample] = get_samples[object_type](identifier)
×
263
    return samples if isinstance(samples, list) else [samples]
×
264

265

266
@backup.command("retrieve-spring-file")
1✔
267
@click.argument("spring-file-path", type=click.Path())
1✔
268
@DRY_RUN
1✔
269
@click.pass_obj
1✔
270
def retrieve_spring_file(config: CGConfig, spring_file_path: str, dry_run: bool):
1✔
271
    """Retrieve a spring file from PDC"""
272
    LOG.info(f"Attempting PDC retrieval and decryption file {spring_file_path}")
×
273
    housekeeper_api: HousekeeperAPI = config.housekeeper_api
×
274
    pdc_service: PdcService = PdcService(binary_path=config.pdc.binary_path, dry_run=dry_run)
×
275
    encryption_api: SpringEncryptionAPI = SpringEncryptionAPI(
×
276
        binary_path=config.encryption.binary_path,
277
        dry_run=dry_run,
278
    )
279
    LOG.debug(f"Start spring retrieval if not dry run mode={dry_run}")
×
280
    spring_backup_api: SpringBackupAPI = SpringBackupAPI(
×
281
        encryption_api=encryption_api,
282
        hk_api=housekeeper_api,
283
        pdc_service=pdc_service,
284
        dry_run=dry_run,
285
    )
286
    spring_backup_api.retrieve_and_decrypt_spring_file(Path(spring_file_path))
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc