• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cnr-ibba / SMARTER-database / 9265419949

27 May 2024 09:15AM CUT coverage: 94.434%. Remained the same
9265419949

push

github

bunop
:bookmark: Bump version: 0.4.10.dev0 → 0.4.10

1 of 1 new or added line in 1 file covered. (100.0%)

3071 of 3252 relevant lines covered (94.43%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.8
/src/data/merge_datasets.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
1✔
4
Created on Tue Mar 23 17:11:58 2021
5

6
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
7

8
A simple script to merge plink binary files
9
"""
10

11
import click
1✔
12
import logging
1✔
13
import subprocess
1✔
14

15
from pathlib import Path
1✔
16

17
from src import __version__
1✔
18
from src.features.utils import get_interim_dir, get_processed_dir
1✔
19
from src.features.smarterdb import global_connection, Dataset, SPECIES2CODE
1✔
20
from src.data.common import WORKING_ASSEMBLIES, PLINK_SPECIES_OPT
1✔
21

22
logger = logging.getLogger(__name__)
1✔
23

24

25
@click.command()
1✔
26
@click.option(
1✔
27
    '--species_class',
28
    type=str,
29
    required=True,
30
    help=(
31
        "Search processed genotypes belonging to this species ('Sheep'"
32
        "or 'Goat')"
33
    )
34
)
35
@click.option(
1✔
36
    '--assembly',
37
    type=str,
38
    required=True,
39
    help="Search processed genotypes belonging to this assembly"
40
)
41
def main(species_class, assembly):
1✔
42
    """
43
    Search for processed genotype files for a certain species in
44
    ``data/processed`` folder and then call PLINK to join all genotypes
45
    in the same dataset
46
    """
47

48
    logger.info(f"{Path(__file__).name} started")
1✔
49

50
    # find assembly configuration
51
    if assembly not in WORKING_ASSEMBLIES:
1✔
52
        raise Exception(f"assembly {assembly} not managed by smarter")
×
53

54
    # open a file to track files to merge
55
    smarter_tag = "SMARTER-{specie}-{assembly}-top-{version}".format(
1✔
56
        specie=SPECIES2CODE[species_class.capitalize()],
57
        assembly=assembly.upper(),
58
        version=__version__
59
    )
60
    merge_file = get_interim_dir() / smarter_tag
1✔
61

62
    with merge_file.open(mode="w") as handle:
1✔
63
        for dataset in Dataset.objects(species=species_class.capitalize()):
1✔
64
            logger.debug(f"Got {dataset}")
1✔
65

66
            # search for result dir
67
            results_dir = Path(dataset.result_dir) / assembly.upper()
1✔
68

69
            if results_dir.exists():
1✔
70
                logger.debug(f"Found {results_dir}")
1✔
71

72
                # search for bed files
73
                bed_files = results_dir.glob('*.bed')
1✔
74

75
                # I can have more than 1 file for dataset (If one or more
76
                # files are included into dataset)
77
                for bed_file in bed_files:
1✔
78
                    # determine the bedfile full path
79
                    prefix = results_dir / bed_file.stem
1✔
80

81
                    logger.info(f"Appending '{prefix}' for merge")
1✔
82

83
                    # track file to merge
84
                    handle.write(f"{prefix}\n")
1✔
85

86
    # ok check for results dir
87
    final_dir = get_processed_dir() / assembly
1✔
88
    final_dir.mkdir(parents=True, exist_ok=True)
1✔
89

90
    # ok time to convert data in plink binary format
91
    cmd = ["plink"] + PLINK_SPECIES_OPT[dataset.species] + [
1✔
92
        "--merge-list",
93
        f"{merge_file}",
94
        "--make-bed",
95
        "--out",
96
        f"{final_dir / smarter_tag}"
97
    ]
98

99
    # debug
100
    logger.info("Executing: " + " ".join(cmd))
1✔
101

102
    subprocess.run(cmd, check=True)
1✔
103

104
    logger.info(f"{Path(__file__).name} ended")
1✔
105

106

107
if __name__ == '__main__':
1✔
108
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
×
109
    logging.basicConfig(level=logging.INFO, format=log_fmt)
×
110

111
    # connect to database
112
    global_connection()
×
113

114
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc