• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moonso / loqusdb / 15019325316

14 May 2025 11:18AM UTC coverage: 70.681%. First build
15019325316

Pull #152

github

web-flow
Merge 8e4b7fb2b into 18d293f92
Pull Request #152: add flag to skip GQ check on SV file --snv-gq-only

4 of 5 new or added lines in 2 files covered. (80.0%)

1162 of 1644 relevant lines covered (70.68%)

1.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.5
/loqusdb/utils/load.py
1
# -*- coding: utf-8 -*-
2
"""
2✔
3
loqusdb.utils.load.py
4

5
Functions to load data into the database.
6
This functions take an adapter which is the communication device for the database.
7

8
"""
9

10
import logging
2✔
11

12
import click
2✔
13

14
from loqusdb.build_models.case import build_case
2✔
15
from loqusdb.build_models.profile_variant import build_profile_variant
2✔
16
from loqusdb.build_models.variant import build_variant
2✔
17
from loqusdb.exceptions import CaseError, VcfError
2✔
18
from loqusdb.utils.case import get_case, update_case
2✔
19
from loqusdb.utils.delete import delete
2✔
20
from loqusdb.utils.profiling import get_profiles, profile_match
2✔
21
from loqusdb.utils.vcf import check_vcf, get_vcf
2✔
22

23
LOG = logging.getLogger(__name__)
2✔
24

25

26
def load_database(
2✔
27
    adapter,
28
    variant_file=None,
29
    sv_file=None,
30
    family_file=None,
31
    family_type="ped",
32
    skip_case_id=False,
33
    gq_threshold=None,
34
    snv_gq_only=False,
35
    qual_gq=False,
36
    case_id=None,
37
    max_window=3000,
38
    profile_file=None,
39
    hard_threshold=0.95,
40
    soft_threshold=0.9,
41
    genome_build=None,
42
):
43
    """Load the database with a case and its variants
44

45
    Args:
46
          adapter: Connection to database
47
          variant_file(str): Path to variant file
48
          sv_file(str): Path to sv variant file
49
          family_file(str): Path to family file
50
          family_type(str): Format of family file
51
          skip_case_id(bool): If no case information should be added to variants
52
          gq_threshold(int): If only quality variants should be considered
53
          qual_gq(bool): Use QUAL field instead of GQ format tag to gate quality
54
          case_id(str): If different case id than the one in family file should be used
55
          max_window(int): Specify the max size for sv windows
56
          check_profile(bool): Does profile check if True
57
          hard_threshold(float): Rejects load if hamming distance above this is found
58
          soft_threshold(float): Stores similar samples if hamming distance above this is found
59

60
    Returns:
61
          nr_inserted(int)
62
    """
63
    vcf_files = []
2✔
64

65
    nr_variants = None
2✔
66
    vcf_individuals = None
2✔
67
    if variant_file:
2✔
68
        vcf_info = check_vcf(variant_file)
2✔
69
        nr_variants = vcf_info["nr_variants"]
2✔
70
        variant_type = vcf_info["variant_type"]
2✔
71
        vcf_files.append(variant_file)
2✔
72
        # Get the indivuduals that are present in vcf file
73
        vcf_individuals = vcf_info["individuals"]
2✔
74

75
    nr_sv_variants = None
2✔
76
    sv_individuals = None
2✔
77
    if sv_file:
2✔
78
        vcf_info = check_vcf(sv_file, "sv")
2✔
79
        nr_sv_variants = vcf_info["nr_variants"]
2✔
80
        vcf_files.append(sv_file)
2✔
81
        sv_individuals = vcf_info["individuals"]
2✔
82

83
    profiles = None
2✔
84
    matches = None
2✔
85
    if profile_file:
2✔
86
        profiles = get_profiles(adapter, profile_file)
2✔
87
        ###Check if any profile already exists
88
        matches = profile_match(
2✔
89
            adapter, profiles, hard_threshold=hard_threshold, soft_threshold=soft_threshold
90
        )
91

92
    # If a gq threshold is used the variants need to have GQ (only SNVs if snv_gq_only)
93
    for _vcf_file in vcf_files:
2✔
94
        is_sv = _vcf_file == sv_file
2✔
95
        if snv_gq_only and is_sv:
2✔
NEW
96
            continue  # skip GQ check for SV VCF
×
97

98
        vcf = get_vcf(_vcf_file)
2✔
99
        if gq_threshold and not vcf.contains("GQ") and not qual_gq:
2✔
100
            LOG.warning("Set gq-threshold to 0 or add info to vcf {0}".format(_vcf_file))
×
101
            raise SyntaxError("GQ is not defined in vcf header")
×
102

103
    # Get a ped_parser.Family object from family file
104
    family = None
2✔
105
    family_id = None
2✔
106
    if family_file:
2✔
107
        LOG.info("Loading family from %s", family_file)
2✔
108
        with open(family_file, "r") as family_lines:
2✔
109
            family = get_case(family_lines=family_lines, family_type=family_type)
2✔
110
            family_id = family.family_id
2✔
111

112
    # There has to be a case_id or a family at this stage.
113
    case_id = case_id or family_id
2✔
114
    # Convert infromation to a loqusdb Case object
115
    case_obj = build_case(
2✔
116
        case=family,
117
        case_id=case_id,
118
        vcf_path=variant_file,
119
        vcf_individuals=vcf_individuals,
120
        nr_variants=nr_variants,
121
        vcf_sv_path=sv_file,
122
        sv_individuals=sv_individuals,
123
        nr_sv_variants=nr_sv_variants,
124
        profiles=profiles,
125
        matches=matches,
126
        profile_path=profile_file,
127
    )
128
    # Build and load a new case, or update an existing one
129
    load_case(
2✔
130
        adapter=adapter,
131
        case_obj=case_obj,
132
    )
133

134
    nr_inserted = 0
2✔
135
    # If case was succesfully added we can store the variants
136
    for file_type in ["vcf_path", "vcf_sv_path"]:
2✔
137
        variant_type = "snv"
2✔
138
        if file_type == "vcf_sv_path":
2✔
139
            variant_type = "sv"
2✔
140
        if case_obj.get(file_type) is None:
2✔
141
            continue
2✔
142

143
        vcf_obj = get_vcf(case_obj[file_type])
2✔
144
        try:
2✔
145
            nr_inserted += load_variants(
2✔
146
                adapter=adapter,
147
                vcf_obj=vcf_obj,
148
                case_obj=case_obj,
149
                skip_case_id=skip_case_id,
150
                gq_threshold=gq_threshold if not snv_gq_only or variant_type == "snv" else None,
151
                qual_gq=qual_gq,
152
                max_window=max_window,
153
                variant_type=variant_type,
154
                genome_build=genome_build,
155
            )
156
        except Exception as err:
×
157
            # If something went wrong do a rollback
158
            LOG.warning(err)
×
159
            delete(
×
160
                adapter=adapter,
161
                case_obj=case_obj,
162
            )
163
            raise err
×
164
    return nr_inserted
2✔
165

166

167
def load_case(adapter, case_obj, update=False):
2✔
168
    """Load a case to the database
169

170
    Args:
171
        adapter: Connection to database
172
        case_obj: dict
173
        update(bool): If existing case should be updated
174

175
    Returns:
176
        case_obj(models.Case)
177
    """
178
    # Check if the case already exists in database.
179
    existing_case = adapter.case(case_obj)
2✔
180
    if existing_case:
2✔
181
        if not update:
×
182
            raise CaseError("Case {0} already exists in database".format(case_obj["case_id"]))
×
183
        case_obj = update_case(case_obj, existing_case)
×
184

185
    # Add the case to database
186

187
    adapter.add_case(case_obj, update=update)
2✔
188

189
    return case_obj
2✔
190

191

192
def load_variants(
2✔
193
    adapter,
194
    vcf_obj,
195
    case_obj,
196
    skip_case_id=False,
197
    gq_threshold=None,
198
    qual_gq=False,
199
    max_window=3000,
200
    variant_type="snv",
201
    genome_build=None,
202
):
203
    """Load variants for a family into the database.
204

205
    Args:
206
        adapter (loqusdb.plugins.Adapter): initialized plugin
207
        case_obj(Case): dict with case information
208
        nr_variants(int)
209
        skip_case_id (bool): whether to include the case id on variant level
210
                             or not
211
        gq_threshold(int)
212
        max_window(int): Specify the max size for sv windows
213
        variant_type(str): 'sv' or 'snv'
214

215
    Returns:
216
        nr_inserted(int)
217
    """
218
    if variant_type == "snv":
2✔
219
        nr_variants = case_obj["nr_variants"]
2✔
220
    else:
221
        nr_variants = case_obj["nr_sv_variants"]
2✔
222

223
    nr_inserted = 0
2✔
224
    case_id = case_obj["case_id"]
2✔
225
    if skip_case_id:
2✔
226
        case_id = None
2✔
227
    # Loop over the variants in the vcf
228
    with click.progressbar(vcf_obj, label="Inserting variants", length=nr_variants) as bar:
2✔
229

230
        variants = (
2✔
231
            build_variant(
232
                variant, case_obj, case_id, gq_threshold, qual_gq, genome_build=genome_build
233
            )
234
            for variant in bar
235
        )
236

237
    if variant_type == "sv":
2✔
238
        for sv_variant in variants:
2✔
239
            if not sv_variant:
2✔
240
                continue
×
241
            adapter.add_structural_variant(variant=sv_variant, max_window=max_window)
2✔
242
            nr_inserted += 1
2✔
243

244
    if variant_type == "snv":
2✔
245
        nr_inserted = adapter.add_variants(variants)
2✔
246

247
    LOG.info("Inserted %s variants of type %s", nr_inserted, variant_type)
2✔
248

249
    return nr_inserted
2✔
250

251

252
def load_profile_variants(adapter, variant_file):
2✔
253
    """
254

255
    Loads variants used for profiling
256

257
    Args:
258
        adapter (loqusdb.plugins.Adapter): initialized plugin
259
        variant_file(str): Path to variant file
260

261

262
    """
263

264
    vcf_info = check_vcf(variant_file)
2✔
265
    variant_type = vcf_info["variant_type"]
2✔
266

267
    if variant_type != "snv":
2✔
268
        LOG.critical("Variants used for profiling must be SNVs only")
×
269
        raise VcfError
×
270

271
    vcf = get_vcf(variant_file)
2✔
272

273
    profile_variants = [build_profile_variant(variant) for variant in vcf]
2✔
274
    adapter.add_profile_variants(profile_variants)
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc