• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cnr-ibba / SMARTER-database / 6322870190

27 Sep 2023 07:38AM UTC coverage: 94.306% (-0.1%) from 94.419%
6322870190

Pull #105

github

bunop
:sparkles: load phenotypes for *Fosses, Provencale* goat breeds
Pull Request #105: :bookmark: release 0.4.8.post1

81 of 81 new or added lines in 5 files covered. (100.0%)

2948 of 3126 relevant lines covered (94.31%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.27
/src/data/import_multiple_phenotypes.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
1✔
4
Created on Mon Sep 11 12:13:47 2023
5

6
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
7

8
This program acts like import_phenotypes but adding more data for the same
9
individual
10
"""
11

12
import click
1✔
13
import logging
1✔
14

15
from click_option_group import optgroup, RequiredMutuallyExclusiveOptionGroup
1✔
16
from pathlib import Path
1✔
17

18
from src.features.smarterdb import global_connection, Phenotype
1✔
19
from src.data.common import pandas_open, deal_with_datasets, get_sample_species
1✔
20
from src.features.utils import sanitize
1✔
21

22
logger = logging.getLogger(__name__)
1✔
23

24

25
def create_or_update_phenotype(
1✔
26
        sample, phenotype: dict):
27

28
    if not phenotype:
1✔
29
        logger.debug(f"Skipping {sample}: nothing to update")
×
30
        return
×
31

32
    if not sample.phenotype:
1✔
33
        logger.debug(f"Create a new phenotype for {sample}")
1✔
34
        sample.phenotype = Phenotype()
1✔
35

36
    for key, value in phenotype.items():
1✔
37
        setattr(sample.phenotype, key, value)
1✔
38

39
    logger.info(
1✔
40
        f"Updating '{sample}' phenotype with '{sample.phenotype}'")
41

42
    # update sample
43
    sample.save()
1✔
44

45

46
@click.command()
1✔
47
@click.option(
1✔
48
    '--src_dataset', type=str, required=True,
49
    help="The raw dataset file name (zip archive) in which search datafile"
50
)
51
@click.option(
1✔
52
    '--dst_dataset', type=str, required=False,
53
    help=("The raw dataset file name (zip archive) in which add metadata"
54
          "(def. the 'src_dataset')")
55
)
56
@click.option('--datafile', type=str, required=True)
1✔
57
@click.option('--sheet_name',
1✔
58
              default="0",
59
              help="pandas 'sheet_name' option")
60
@optgroup.group(
1✔
61
    'Add metadata relying on breeds or samples columns',
62
    cls=RequiredMutuallyExclusiveOptionGroup
63
)
64
@optgroup.option('--breed_column', type=str, help="The breed column")
1✔
65
@optgroup.option('--id_column', type=str, help="The original_id column")
1✔
66
@optgroup.option('--alias_column', type=str, help="An alias for original_id")
1✔
67
@click.option(
1✔
68
    '--column',
69
    'columns',
70
    required=True,
71
    multiple=True,
72
    help=(
73
        "Column to track. Could be specified multiple times")
74
)
75
@click.option('--na_values', type=str, help="pandas NA values")
1✔
76
def main(src_dataset, dst_dataset, datafile, sheet_name, breed_column,
1✔
77
         id_column, alias_column, columns, na_values):
78
    """Read multiple data for the same sample from phenotype file and add it
79
    to SMARTER-database samples"""
80

81
    logger.info(f"{Path(__file__).name} started")
1✔
82

83
    if breed_column or alias_column:
1✔
84
        raise NotImplementedError(
×
85
            "Loading multiple phenotypes by breed or alias is not yet "
86
            "implemented")
87

88
    logger.debug(f"Reading {columns} columns")
1✔
89

90
    src_dataset, dst_dataset, datapath = deal_with_datasets(
1✔
91
        src_dataset, dst_dataset, datafile)
92

93
    SampleSpecie = get_sample_species(dst_dataset.species)
1✔
94

95
    if sheet_name and sheet_name.isnumeric():
1✔
96
        sheet_name = int(sheet_name)
1✔
97

98
    # open data with pandas
99
    data = pandas_open(datapath, na_values=na_values, sheet_name=sheet_name)
1✔
100

101
    # process unique ids
102
    for id_ in data[id_column].unique():
1✔
103
        subset = data[data[id_column] == id_]
1✔
104

105
        phenotype = {}
1✔
106

107
        for column in columns:
1✔
108
            phenotype[sanitize(column)] = subset[column].to_list()
1✔
109

110
        original_id = str(id_)
1✔
111

112
        # ok iterate over all samples of this dataset
113
        for sample in SampleSpecie.objects.filter(
1✔
114
                dataset=dst_dataset, original_id=original_id):
115

116
            create_or_update_phenotype(sample, phenotype)
1✔
117

118
    logger.info(f"{Path(__file__).name} ended")
1✔
119

120

121
if __name__ == '__main__':
1✔
122
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
×
123
    logging.basicConfig(level=logging.INFO, format=log_fmt)
×
124

125
    # connect to database
126
    global_connection()
×
127

128
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc