• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cnr-ibba / SMARTER-database / 9265419949

27 May 2024 09:15AM CUT coverage: 94.434%. Remained the same
9265419949

push

github

bunop
:bookmark: Bump version: 0.4.10.dev0 → 0.4.10

1 of 1 new or added line in 1 file covered. (100.0%)

3071 of 3252 relevant lines covered (94.43%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.5
/src/data/import_multiple_phenotypes.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
1✔
4
Created on Mon Sep 11 12:13:47 2023
5

6
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
7

8
This program acts like import_phenotypes but adding more data for the same
9
individual
10
"""
11

12
import click
1✔
13
import logging
1✔
14

15
import numpy as np
1✔
16
from click_option_group import optgroup, RequiredMutuallyExclusiveOptionGroup
1✔
17
from pathlib import Path
1✔
18

19
from src.features.smarterdb import global_connection, Phenotype
1✔
20
from src.data.common import pandas_open, deal_with_datasets, get_sample_species
1✔
21
from src.features.utils import sanitize
1✔
22

23
logger = logging.getLogger(__name__)
1✔
24

25

26
def create_or_update_phenotype(
1✔
27
        sample, phenotype: dict):
28

29
    if not phenotype:
1✔
30
        logger.debug(f"Skipping {sample}: nothing to update")
×
31
        return
×
32

33
    if not sample.phenotype:
1✔
34
        logger.debug(f"Create a new phenotype for {sample}")
1✔
35
        sample.phenotype = Phenotype()
1✔
36

37
    for key, value in phenotype.items():
1✔
38
        setattr(sample.phenotype, key, value)
1✔
39

40
    logger.info(
1✔
41
        f"Updating '{sample}' phenotype with '{sample.phenotype}'")
42

43
    # update sample
44
    sample.save()
1✔
45

46

47
@click.command()
1✔
48
@click.option(
1✔
49
    '--src_dataset', type=str, required=True,
50
    help="The raw dataset file name (zip archive) in which search datafile"
51
)
52
@click.option(
1✔
53
    '--dst_dataset', type=str, required=False,
54
    help=("The raw dataset file name (zip archive) in which add metadata"
55
          "(def. the 'src_dataset')")
56
)
57
@click.option('--datafile', type=str, required=True)
1✔
58
@click.option('--sheet_name',
1✔
59
              default="0",
60
              help="pandas 'sheet_name' option")
61
@optgroup.group(
1✔
62
    'Add metadata relying on breeds or samples columns',
63
    cls=RequiredMutuallyExclusiveOptionGroup
64
)
65
@optgroup.option('--breed_column', type=str, help="The breed column")
1✔
66
@optgroup.option('--id_column', type=str, help="The original_id column")
1✔
67
@optgroup.option('--alias_column', type=str, help="An alias for original_id")
1✔
68
@click.option(
1✔
69
    '--column',
70
    'columns',
71
    required=True,
72
    multiple=True,
73
    help=(
74
        "Column to track. Could be specified multiple times")
75
)
76
@click.option('--na_values', type=str, help="pandas NA values")
1✔
77
def main(src_dataset, dst_dataset, datafile, sheet_name, breed_column,
1✔
78
         id_column, alias_column, columns, na_values):
79
    """Read multiple data for the same sample from phenotype file and add it
80
    to SMARTER-database samples"""
81

82
    logger.info(f"{Path(__file__).name} started")
1✔
83

84
    if breed_column or alias_column:
1✔
85
        raise NotImplementedError(
×
86
            "Loading multiple phenotypes by breed or alias is not yet "
87
            "implemented")
88

89
    logger.debug(f"Reading {columns} columns")
1✔
90

91
    src_dataset, dst_dataset, datapath = deal_with_datasets(
1✔
92
        src_dataset, dst_dataset, datafile)
93

94
    SampleSpecie = get_sample_species(dst_dataset.species)
1✔
95

96
    if sheet_name and sheet_name.isnumeric():
1✔
97
        sheet_name = int(sheet_name)
1✔
98

99
    # open data with pandas
100
    data = pandas_open(datapath, na_values=na_values, sheet_name=sheet_name)
1✔
101

102
    # process unique ids
103
    for id_ in data[id_column].unique():
1✔
104
        subset = data[data[id_column] == id_]
1✔
105

106
        phenotype = {}
1✔
107

108
        for column in columns:
1✔
109
            phenotype[sanitize(column)] = subset[column] \
1✔
110
                .replace({np.nan: None}).to_list()
111

112
        original_id = str(id_)
1✔
113

114
        # ok iterate over all samples of this dataset
115
        for sample in SampleSpecie.objects.filter(
1✔
116
                dataset=dst_dataset, original_id=original_id):
117

118
            create_or_update_phenotype(sample, phenotype)
1✔
119

120
    logger.info(f"{Path(__file__).name} ended")
1✔
121

122

123
if __name__ == '__main__':
1✔
124
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
×
125
    logging.basicConfig(level=logging.INFO, format=log_fmt)
×
126

127
    # connect to database
128
    global_connection()
×
129

130
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc