• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neurobagel / api / 10085126638

24 Jul 2024 10:50PM UTC coverage: 95.972% (+0.05%) from 95.927%
10085126638

Pull #326

github

web-flow
Merge a4697b1ca into 0ac0d4297
Pull Request #326: [FIX] Exclude sessions missing a queried property from matches

7 of 7 new or added lines in 3 files covered. (100.0%)

1 existing line in 1 file now uncovered.

691 of 720 relevant lines covered (95.97%)

1.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.21
/app/api/utility.py
1
"""Constants for graph server connection and utility functions for writing the SPARQL query."""
1✔
2

3
import json
2✔
4
import os
2✔
5
import textwrap
2✔
6
import warnings
2✔
7
from collections import namedtuple
2✔
8
from pathlib import Path
2✔
9
from typing import Optional
2✔
10

11
import httpx
2✔
12

13
# Request constants
14
EnvVar = namedtuple("EnvVar", ["name", "val"])
2✔
15

16
ALLOWED_ORIGINS = EnvVar(
2✔
17
    "NB_API_ALLOWED_ORIGINS", os.environ.get("NB_API_ALLOWED_ORIGINS", "")
18
)
19

20
GRAPH_USERNAME = EnvVar(
2✔
21
    "NB_GRAPH_USERNAME", os.environ.get("NB_GRAPH_USERNAME")
22
)
23
GRAPH_PASSWORD = EnvVar(
2✔
24
    "NB_GRAPH_PASSWORD", os.environ.get("NB_GRAPH_PASSWORD")
25
)
26
GRAPH_ADDRESS = EnvVar(
2✔
27
    "NB_GRAPH_ADDRESS", os.environ.get("NB_GRAPH_ADDRESS", "206.12.99.17")
28
)
29
GRAPH_DB = EnvVar(
2✔
30
    "NB_GRAPH_DB", os.environ.get("NB_GRAPH_DB", "test_data/query")
31
)
32
GRAPH_PORT = EnvVar("NB_GRAPH_PORT", os.environ.get("NB_GRAPH_PORT", 5820))
2✔
33
# TODO: Environment variables can't be parsed as bool so this is a workaround but isn't ideal.
34
# Another option is to switch this to a command-line argument, but that would require changing the
35
# Dockerfile also since Uvicorn can't accept custom command-line args.
36
RETURN_AGG = EnvVar(
2✔
37
    "NB_RETURN_AGG", os.environ.get("NB_RETURN_AGG", "True").lower() == "true"
38
)
39

40
QUERY_URL = f"http://{GRAPH_ADDRESS.val}:{GRAPH_PORT.val}/{GRAPH_DB.val}"
2✔
41
QUERY_HEADER = {
2✔
42
    "Content-Type": "application/sparql-query",
43
    "Accept": "application/sparql-results+json",
44
}
45

46
CONTEXT = {
2✔
47
    "cogatlas": "https://www.cognitiveatlas.org/task/id/",
48
    "nb": "http://neurobagel.org/vocab/",
49
    "nbg": "http://neurobagel.org/graph/",  # TODO: Check if we still need this namespace.
50
    "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
51
    "nidm": "http://purl.org/nidash/nidm#",
52
    "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/",
53
}
54

55
# Store domains in named tuples
56
Domain = namedtuple("Domain", ["var", "pred"])
2✔
57
# Core domains
58
AGE = Domain("age", "nb:hasAge")
2✔
59
SEX = Domain("sex", "nb:hasSex")
2✔
60
DIAGNOSIS = Domain("diagnosis", "nb:hasDiagnosis")
2✔
61
IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
2✔
62
ASSESSMENT = Domain("assessment", "nb:hasAssessment")
2✔
63
IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
2✔
64
PROJECT = Domain("project", "nb:hasSamples")
2✔
65

66

67
CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT]
2✔
68

69
IS_CONTROL_TERM = "ncit:C94342"
2✔
70

71
BACKUP_VOCAB_DIR = (
2✔
72
    Path(__file__).absolute().parents[2] / "vocab/backup_external"
73
)
74

75

76
def parse_origins_as_list(allowed_origins: str) -> list:
2✔
77
    """Returns user-defined allowed origins as a list."""
78
    return list(allowed_origins.split(" "))
2✔
79

80

81
def create_context() -> str:
2✔
82
    """Creates a SPARQL query context string from the CONTEXT dictionary."""
83
    return "\n".join(
2✔
84
        [f"PREFIX {prefix}: <{uri}>" for prefix, uri in CONTEXT.items()]
85
    )
86

87

88
def unpack_http_response_json_to_dicts(response: dict) -> list[dict]:
2✔
89
    """
90
    Reformats a nested dictionary object from a SPARQL query response JSON into a more human-readable list of dictionaries,
91
    where the keys are the variables selected in the SPARQL query and the values correspond to the variable values.
92
    The number of dictionaries should correspond to the number of query matches.
93
    """
94
    return [
2✔
95
        {k: v["value"] for k, v in res.items()}
96
        for res in response["results"]["bindings"]
97
    ]
98

99

100
def create_bound_filter(var: str) -> str:
2✔
101
    """
102
    Create a SPARQL filter substring for checking if a variable is bound
103
    (meaning the variable actually has a corresponding value, e.g., the property exists).
104
    """
105
    return f"FILTER (BOUND(?{var})"
2✔
106

107

108
def create_query(
2✔
109
    return_agg: bool,
110
    age: Optional[tuple] = (None, None),
111
    sex: Optional[str] = None,
112
    diagnosis: Optional[str] = None,
113
    is_control: Optional[bool] = None,
114
    min_num_imaging_sessions: Optional[int] = None,
115
    min_num_phenotypic_sessions: Optional[int] = None,
116
    assessment: Optional[str] = None,
117
    image_modal: Optional[str] = None,
118
) -> str:
119
    """
120
    Creates a SPARQL query using a query template and filters it using the input parameters.
121

122
    Parameters
123
    ----------
124
    return_agg : bool
125
        Whether to return only aggregate query results (and not subject-level attributes besides file paths).
126
    age : tuple, optional
127
        Minimum and maximum age of subject, by default (None, None).
128
    sex : str, optional
129
        Subject sex, by default None.
130
    diagnosis : str, optional
131
        Subject diagnosis, by default None.
132
    is_control : bool, optional
133
        Whether or not subject is a control, by default None.
134
    min_num_imaging_sessions : int, optional
135
        Subject minimum number of imaging sessions, by default None.
136
    min_num_phenotypic_sessions : int, optional
137
        Subject minimum number of phenotypic sessions, by default None.
138
    assessment : str, optional
139
        Non-imaging assessment completed by subjects, by default None.
140
    image_modal : str, optional
141
        Imaging modality of subject scans, by default None.
142

143
    Returns
144
    -------
145
    str
146
        The SPARQL query.
147
    """
148
    subject_level_filters = ""
2✔
149
    if min_num_phenotypic_sessions is not None:
2✔
150
        subject_level_filters += (
×
151
            "\n"
152
            + f"FILTER (?num_matching_phenotypic_sessions >= {min_num_phenotypic_sessions})."
153
        )
154
    if min_num_imaging_sessions is not None:
2✔
155
        subject_level_filters += (
×
156
            "\n"
157
            + f"FILTER (?num_matching_imaging_sessions >= {min_num_imaging_sessions})."
158
        )
159

160
    phenotypic_session_level_filters = ""
2✔
161

162
    if age[0] is not None:
2✔
163
        phenotypic_session_level_filters += (
×
164
            "\n"
165
            + f"{create_bound_filter(AGE.var)} && ?{AGE.var} >= {age[0]})."
166
        )
167
    if age[1] is not None:
2✔
168
        phenotypic_session_level_filters += (
×
169
            "\n"
170
            + f"{create_bound_filter(AGE.var)} && ?{AGE.var} <= {age[1]})."
171
        )
172

173
    if sex is not None:
2✔
174
        phenotypic_session_level_filters += (
×
175
            "\n" + f"{create_bound_filter(SEX.var)} && ?{SEX.var} = {sex})."
176
        )
177

178
    if diagnosis is not None:
2✔
179
        phenotypic_session_level_filters += (
×
180
            "\n"
181
            + f"{create_bound_filter(DIAGNOSIS.var)} && ?{DIAGNOSIS.var} = {diagnosis})."
182
        )
183

184
    if is_control is not None:
2✔
185
        if is_control:
×
186
            phenotypic_session_level_filters += (
×
187
                "\n"
188
                + f"{create_bound_filter(IS_CONTROL.var)} && ?{IS_CONTROL.var} = {IS_CONTROL_TERM})."
189
            )
190
        else:
191
            # TODO: Revisit - this logic seems odd, since in our current data model the session should not have this edge if it's not a control.
UNCOV
192
            phenotypic_session_level_filters += (
×
193
                "\n"
194
                + f"{create_bound_filter(IS_CONTROL.var)} && ?{IS_CONTROL.var} != {IS_CONTROL_TERM})."
195
            )
196

197
    if assessment is not None:
2✔
198
        phenotypic_session_level_filters += (
×
199
            "\n"
200
            + f"{create_bound_filter(ASSESSMENT.var)} && ?{ASSESSMENT.var} = {assessment})."
201
        )
202

203
    imaging_session_level_filters = ""
2✔
204
    if image_modal is not None:
2✔
205
        imaging_session_level_filters += (
×
206
            "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
207
        )
208

209
    query_string = textwrap.dedent(
2✔
210
        f"""
211
        SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
212
        ?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path
213
        WHERE {{
214
            ?dataset_uuid a nb:Dataset;
215
                nb:hasLabel ?dataset_name;
216
                nb:hasSamples ?subject.
217
            ?subject a nb:Subject;
218
                nb:hasLabel ?sub_id;
219
                nb:hasSession ?session.
220
            ?session a ?session_type;
221
                nb:hasLabel ?session_id.
222
            OPTIONAL {{
223
                ?session nb:hasAcquisition/nb:hasContrastType ?image_modal.
224
                OPTIONAL {{?session nb:hasFilePath ?session_file_path.}}
225
            }}
226
            OPTIONAL {{?dataset_uuid nb:hasPortalURI ?dataset_portal_uri.}}
227
            OPTIONAL {{?session nb:hasAge ?age.}}
228
            OPTIONAL {{?session nb:hasSex ?sex.}}
229
            OPTIONAL {{?session nb:hasDiagnosis ?diagnosis.}}
230
            OPTIONAL {{?session nb:isSubjectGroup ?subject_group.}}
231
            OPTIONAL {{?session nb:hasAssessment ?assessment.}}
232
            {{
233
                SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions)
234
                WHERE {{
235
                    ?subject nb:hasSession ?phenotypic_session.
236
                    ?phenotypic_session a nb:PhenotypicSession.
237

238
                    OPTIONAL {{?phenotypic_session nb:hasAge ?age.}}
239
                    OPTIONAL {{?phenotypic_session nb:hasSex ?sex.}}
240
                    OPTIONAL {{?phenotypic_session nb:hasDiagnosis ?diagnosis.}}
241
                    OPTIONAL {{?phenotypic_session nb:isSubjectGroup ?subject_group.}}
242
                    OPTIONAL {{?phenotypic_session nb:hasAssessment ?assessment.}}
243

244
                    {phenotypic_session_level_filters}
245
                }} GROUP BY ?subject
246
            }}
247
            {{
248
                SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions)
249
                WHERE {{
250
                    OPTIONAL {{
251
                        ?subject nb:hasSession ?imaging_session.
252
                        ?imaging_session a nb:ImagingSession;
253
                            nb:hasAcquisition/nb:hasContrastType ?image_modal.
254
                    }}
255
                    {imaging_session_level_filters}
256
                }} GROUP BY ?subject
257
            }}
258
            {subject_level_filters}
259
        }}
260
    """
261
    )
262

263
    # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true,
264
    # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.
265
    if return_agg:
2✔
266
        query_string = (
2✔
267
            textwrap.dedent(
268
                """
269
            SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?image_modal
270
            WHERE {"""
271
            )
272
            + textwrap.indent(query_string, "    ")
273
            + "} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?image_modal"
274
        )
275

276
    print(query_string)
2✔
277
    return "\n".join([create_context(), query_string])
2✔
278

279

280
def create_multidataset_size_query(dataset_uuids: list) -> str:
2✔
281
    """Construct a SPARQL query to retrieve the number of subjects in each dataset in a list of dataset UUIDs."""
282
    dataset_uuids_string = "\n".join([f"<{uuid}>" for uuid in dataset_uuids])
2✔
283
    query_string = f"""
2✔
284
        SELECT ?dataset_uuid (COUNT(DISTINCT ?subject) as ?total_subjects)
285
        WHERE {{
286
            VALUES ?dataset_uuid {{
287
                {dataset_uuids_string}
288
            }}
289
            ?dataset_uuid nb:hasSamples ?subject.
290
            ?subject a nb:Subject.
291
        }} GROUP BY ?dataset_uuid
292
    """
293

294
    return "\n".join([create_context(), query_string])
2✔
295

296

297
def create_terms_query(data_element_URI: str) -> str:
2✔
298
    """
299
    Creates a SPARQL query using a simple query template to retrieve term URLS for a given data element.
300

301
    Parameters
302
    ----------
303
    data_element_URI : str
304
        The URI of the data element for which to retrieve the URIs of all connected term.
305

306
    Returns
307
    -------
308
    str
309
        The SPARQL query.
310

311
    Examples
312
    --------
313
    get_terms_query("nb:Assessment")
314
    """
315

316
    query_string = f"""
2✔
317
    SELECT DISTINCT ?termURL
318
    WHERE {{
319
        ?termURL a {data_element_URI} .
320
        {data_element_URI} rdfs:subClassOf nb:ControlledTerm .
321
    }}
322
    """
323

324
    return "\n".join([create_context(), query_string])
2✔
325

326

327
def is_term_namespace_in_context(term_url: str) -> bool:
2✔
328
    """
329
    Performs basic check for if a term URL contains a namespace URI from the context.
330

331
    Parameters
332
    ----------
333
    term_url : str
334
        A controlled term URI.
335

336
    Returns
337
    -------
338
    bool
339
        True if the term URL contains a namespace URI from the context, False otherwise.
340
    """
341
    for uri in CONTEXT.values():
2✔
342
        if uri in term_url:
2✔
343
            return True
2✔
344
    return False
2✔
345

346

347
def strip_namespace_from_term_uri(term: str, has_prefix: bool = False) -> str:
2✔
348
    """
349
    Removes namespace URI or prefix from a term URI if the namespace is recognized.
350

351
    Parameters
352
    ----------
353
    term : str
354
        A controlled term URI.
355
    has_prefix : bool, optional
356
        Whether the term URI includes a namespace prefix (as opposed to the full namespace URI), by default False.
357

358
    Returns
359
    -------
360
    str
361
        The unique term ID.
362
    """
363
    if has_prefix:
2✔
364
        term_split = term.rsplit(":", 1)
×
365
        if term_split[0] in CONTEXT:
×
366
            return term_split[1]
×
367
    else:
368
        for uri in CONTEXT.values():
2✔
369
            if uri in term:
2✔
370
                return term.replace(uri, "")
2✔
371

372
    # If no match found within the context, return original term
373
    return term
×
374

375

376
def replace_namespace_uri_with_prefix(url: str) -> str:
2✔
377
    """
378
    Replaces namespace URIs in term URLs with corresponding prefixes from the context.
379

380
    Parameters
381
    ----------
382
    url : str
383
        A controlled term URL.
384

385
    Returns
386
    -------
387
    str
388
        The term with namespace URIs replaced with prefixes if found in the context, or the original URL.
389
    """
390
    for prefix, uri in CONTEXT.items():
2✔
391
        if uri in url:
2✔
392
            return url.replace(uri, f"{prefix}:")
2✔
393

394
    # If no match found within the context, return original URL
395
    return url
×
396

397

398
def load_json(path: Path) -> dict:
2✔
399
    """
400
    Loads a user-specified JSON file.
401

402
    Parameters
403
    ----------
404
    path : Path
405
        Path to JSON file.
406
    """
407
    with open(path, "r") as f:
2✔
408
        return json.load(f)
2✔
409

410

411
def fetch_and_save_cogatlas(output_path: Path):
2✔
412
    """
413
    Fetches the Cognitive Atlas vocabulary using its native Task API and writes term ID-label mappings to a temporary lookup file.
414
    If the API request fails, a backup copy of the vocabulary is used instead.
415

416
    Saves a JSON with keys corresponding to Cognitive Atlas task IDs and values corresponding to human-readable task names.
417

418
    Parameters
419
    ----------
420
    output_path : Path
421
        File path to store output vocabulary lookup file.
422
    """
423
    api_url = "https://www.cognitiveatlas.org/api/v-alpha/task?format=json"
2✔
424

425
    try:
2✔
426
        response = httpx.get(url=api_url)
2✔
427
        if response.is_success:
2✔
428
            vocab = response.json()
2✔
429
        else:
430
            warnings.warn(
2✔
431
                f"""
432
                The API was unable to fetch the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source and will default to using a local backup copy of the vocabulary instead.
433

434
                Details of the response from the source:
435
                Status code {response.status_code}
436
                {response.reason_phrase}: {response.text}
437
                """
438
            )
439
            # Use backup copy of the raw vocabulary JSON
440
            vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
2✔
441
    except httpx.NetworkError as exc:
2✔
442
        warnings.warn(
2✔
443
            f""""
444
            Fetching of the Cognitive Atlas task vocabulary (https://www.cognitiveatlas.org/tasks/a/) from the source failed due to a network error.
445
            The API will default to using a local backup copy of the vocabulary instead.
446

447
            Error: {exc}
448
            """
449
        )
450
        # Use backup copy of the raw vocabulary JSON
451
        vocab = load_json(BACKUP_VOCAB_DIR / "cogatlas_task.json")
2✔
452

453
    term_labels = {term["id"]: term["name"] for term in vocab}
2✔
454
    with open(output_path, "w") as f:
2✔
455
        f.write(json.dumps(term_labels, indent=2))
2✔
456

457

458
def create_snomed_term_lookup(output_path: Path):
2✔
459
    """
460
    Reads in a file of disorder terms from the SNOMED CT vocabulary and writes term ID-label mappings to a temporary lookup file.
461

462
    Saves a JSON with keys corresponding to SNOMED CT IDs and values corresponding to human-readable term names.
463

464
    Parameters
465
    ----------
466
    output_path : Path
467
        File path to store output vocabulary lookup file.
468
    """
469
    vocab = load_json(BACKUP_VOCAB_DIR / "snomedct_disorder.json")
2✔
470

471
    term_labels = {term["sctid"]: term["preferred_name"] for term in vocab}
2✔
472
    with open(output_path, "w") as f:
2✔
473
        f.write(json.dumps(term_labels, indent=2))
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc