• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

cnr-ibba / SMARTER-database / 9189630163

22 May 2024 10:26AM UTC coverage: 94.311%. Remained the same
9189630163

Pull #121

github

web-flow
Merge 389cf0ae7 into fcdb3ce6a
Pull Request #121: ⬆️ Bump jinja2 from 3.1.2 to 3.1.4

2984 of 3164 relevant lines covered (94.31%)

0.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.21
/src/data/import_affymetrix.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
1✔
4
Created on Wed Jun  9 15:44:58 2021
5

6
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
7
"""
8

9
import re
1✔
10
import click
1✔
11
import logging
1✔
12

13
from mongoengine.queryset import Q
1✔
14

15
from src.features.illumina import IlluSNP, IlluSNPException
1✔
16
from src.features.smarterdb import (
1✔
17
    global_connection, SupportedChip, Location, Probeset, SmarterDBException)
18
from src.features.affymetrix import read_Manifest
1✔
19
from src.data.common import get_variant_species, update_variant, new_variant
1✔
20

21
logger = logging.getLogger(__name__)
1✔
22

23

24
def get_alleles(record):
1✔
25
    """Define dbSNP alleles from affymetrix record"""
26

27
    alleles = None
1✔
28

29
    if hasattr(record, "ref_allele") and hasattr(record, "alt_allele"):
1✔
30
        # A snp not in dbSNP could have no allele
31
        if record.ref_allele and record.alt_allele:
1✔
32
            # in dbSNP alleles order has no meaning: it's writtein in
33
            # alphabetical order
34
            # https://www.ncbi.nlm.nih.gov/books/NBK44476/#Reports.does_the_order_of_the_alleles_li
35
            alleles = sorted([record.ref_allele, record.alt_allele])
1✔
36
            alleles = "/".join(alleles)
1✔
37

38
    return alleles
1✔
39

40

41
def fix_illumina_args(record):
1✔
42
    tmp = record.cust_id.split("_")
1✔
43

44
    args = {}
1✔
45

46
    # last element is a number
47
    try:
1✔
48
        tmp[-1] = str(int(tmp[-1]))
1✔
49

50
        # recode the illumina name and define a re.pattern
51
        args["name"] = "_".join(tmp[:-1]) + "." + tmp[-1]
1✔
52

53
    except ValueError as exc:
1✔
54
        logger.debug(
1✔
55
            f"Attempt to convert {tmp[-1]} as integer failed: "
56
            f"{exc}"
57
        )
58

59
        args["name"] = re.compile(".".join(tmp))
1✔
60

61
    logger.debug(f"Try to search with {args}")
1✔
62

63
    return args
1✔
64

65

66
def search_database(record, VariantSpecie):
1✔
67
    # if I have a cust_id, search with it
68
    if record.cust_id:
1✔
69
        # search for cust_id or affy_snp_id
70
        # (private Affy SNP with unmatched cust_id)
71
        qs = VariantSpecie.objects.filter(
1✔
72
            Q(name=record.cust_id) | Q(affy_snp_id=record.affy_snp_id))
73

74
        if qs.count() == 0:
1✔
75
            logger.debug(f"Can't find a Variant using {record.cust_id}")
1✔
76

77
            args = fix_illumina_args(record)
1✔
78
            qs = VariantSpecie.objects.filter(**args)
1✔
79

80
            if qs.count() == 0:
1✔
81
                logger.debug(f"Can't find a Variant using {args}")
1✔
82

83
    else:
84
        # search by affy id
85
        qs = VariantSpecie.objects.filter(
1✔
86
            Q(name=record.affy_snp_id) | Q(affy_snp_id=record.affy_snp_id))
87

88
        if qs.count() == 0:
1✔
89
            logger.debug(f"Can't find a Variant using {record.affy_snp_id}")
1✔
90

91
    return qs
1✔
92

93

94
@click.command()
1✔
95
@click.option('--species_class', type=str, required=True)
1✔
96
@click.option('--manifest', type=str, required=True)
1✔
97
@click.option('--chip_name', type=str, required=True)
1✔
98
@click.option('--version', type=str, required=True)
1✔
99
def main(species_class, manifest, chip_name, version):
1✔
100
    """Load SNP data from Affymetrix manifest file into SMARTER-database"""
101

102
    # determining the proper VariantSpecies class
103
    VariantSpecie = get_variant_species(species_class)
1✔
104

105
    # check chip_name
106
    affymetrix_chip = SupportedChip.objects(name=chip_name).get()
1✔
107

108
    # reset chip data (if any)
109
    affymetrix_chip.n_of_snps = 0
1✔
110

111
    logger.info(f"Reading from {manifest}")
1✔
112

113
    # grep a sample SNP
114
    for i, record in enumerate(read_Manifest(manifest)):
1✔
115
        # ['probe_set_id', 'affy_snp_id', 'dbsnp_rs_id', 'dbsnp_loctype',
116
        # 'chromosome', 'physical_position', 'position_end', 'strand',
117
        # 'chrx_pseudo_autosomal_region_1', 'cytoband', 'flank', 'allele_a',
118
        # 'allele_b', 'ref_allele', 'alt_allele', 'associated_gene',
119
        # 'genetic_map', 'microsatellite', 'allele_frequencies',
120
        # 'heterozygous_allele_frequencies', 'number_of_individuals',
121
        # 'in_hapmap', 'strand_versus_dbsnp', 'probe_count',
122
        # 'chrx_pseudo_autosomal_region_2', 'minor_allele',
123
        # 'minor_allele_frequency', 'omim', 'biomedical',
124
        # 'annotation_notes', 'ordered_alleles', 'allele_count',
125
        # 'genome', 'cust_id', 'cust_genes', 'cust_traits', 'date']
126
        logger.debug(f"Processing {record}")
1✔
127

128
        affymetrix_ab = f"{record.allele_a}/{record.allele_b}"
1✔
129

130
        alleles = get_alleles(record)
1✔
131

132
        # get the illumina coded snp relying on sequence
133
        try:
1✔
134
            illusnp = IlluSNP(record.flank, max_iter=25).toTop()
1✔
135

136
        except IlluSNPException as e:
×
137
            logger.debug(e)
×
138
            logger.warning(
×
139
                f"Ignoring {record}: only 2 allelic SNPs are supported")
140
            continue
×
141

142
        # update chip data indipendentely if it is an update or not
143
        affymetrix_chip.n_of_snps += 1
1✔
144

145
        # create a location object
146
        location = Location(
1✔
147
            version=version,
148
            chrom=record.chromosome,
149
            position=record.physical_position,
150
            affymetrix_ab=affymetrix_ab,
151
            alleles=alleles,
152
            strand=record.strand,
153
            illumina=illusnp.illumina,
154
            illumina_strand=illusnp.strand,
155
            imported_from="affymetrix",
156
            date=record.date,
157
        )
158

159
        rs_id = None
1✔
160

161
        if record.dbsnp_rs_id:
1✔
162
            rs_id = [record.dbsnp_rs_id]
1✔
163

164
        variant = VariantSpecie(
1✔
165
            chip_name=[chip_name],
166
            rs_id=rs_id,
167
            probesets=[
168
                Probeset(
169
                    chip_name=chip_name,
170
                    probeset_id=[record.probe_set_id]
171
                )],
172
            affy_snp_id=record.affy_snp_id,
173
            sequence={chip_name: record.flank},
174
            cust_id=record.cust_id,
175
        )
176

177
        logger.debug(f"Processing location {variant}, {location}")
1✔
178

179
        qs = search_database(record, VariantSpecie)
1✔
180

181
        if qs.count() == 1:
1✔
182
            try:
1✔
183
                update_variant(qs, variant, location)
1✔
184

185
            except SmarterDBException as exc:
×
186
                # TODO: remove this exception handling
187
                logger.warn(f"Error with {variant}: {exc} - ignoring snp")
×
188

189
        elif qs.count() == 0:
1✔
190
            new_variant(variant, location)
1✔
191

192
        if (i+1) % 5000 == 0:
1✔
193
            logger.info(f"{i+1} variants processed")
×
194

195
    # update chip info
196
    affymetrix_chip.save()
1✔
197

198
    logger.info(f"{i+1} variants processed")
1✔
199

200
    logger.info("Completed")
1✔
201

202

203
if __name__ == '__main__':
1✔
204
    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
×
205
    logging.basicConfig(level=logging.INFO, format=log_fmt)
×
206

207
    # connect to database
208
    global_connection()
×
209

210
    main()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc