• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

CenterForOpenScience / SHARE / 13463256108

21 Feb 2025 06:48PM UTC coverage: 91.765% (+0.02%) from 91.742%
13463256108

push

github

aaxelb
fix: when backfilling an index, don't delete other indexes

8 of 8 new or added lines in 2 files covered. (100.0%)

42 existing lines in 3 files now uncovered.

26876 of 29288 relevant lines covered (91.76%)

1.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.62
/trove/render/jsonapi.py
1
import base64
2✔
2
from collections import defaultdict
2✔
3
import contextlib
2✔
4
import dataclasses
2✔
5
import datetime
2✔
6
import itertools
2✔
7
import json
2✔
8
import time
2✔
9
from typing import Iterable, Union
2✔
10

11
from primitive_metadata import primitive_rdf
2✔
12

13
from trove import exceptions as trove_exceptions
2✔
14
from trove.vocab.jsonapi import (
2✔
15
    JSONAPI_MEMBERNAME,
16
    JSONAPI_RELATIONSHIP,
17
    JSONAPI_ATTRIBUTE,
18
    JSONAPI_LINK_OBJECT,
19
)
20
from trove.vocab import mediatypes
2✔
21
from trove.vocab.namespaces import (
2✔
22
    OSFMAP,
23
    OWL,
24
    RDF,
25
    TROVE,
26
    XSD,
27
)
28
from trove.vocab.trove import (
2✔
29
    trove_indexcard_namespace,
30
)
31
from ._base import BaseRenderer
2✔
32

33

34
# a jsonapi resource may pull rdf data using an iri or blank node
35
# (using conventions from py for rdf as python primitives)
36
_IriOrBlanknode = Union[str, frozenset]
2✔
37

38

39
def _resource_ids_defaultdict():
2✔
40
    _prefix = str(time.time_ns())
2✔
41
    _ints = itertools.count()
2✔
42

43
    def _iter_ids():
2✔
44
        while True:
2✔
45
            _id = next(_ints)
2✔
46
            yield f'{_prefix}-{_id}'
2✔
47

48
    _ids = _iter_ids()
2✔
49
    return defaultdict(lambda: next(_ids))
2✔
50

51

52
@dataclasses.dataclass
2✔
53
class RdfJsonapiRenderer(BaseRenderer):
2✔
54
    '''render rdf data into jsonapi resources, guided by a given rdf vocabulary
2✔
55

UNCOV
56
    the given vocab describes how rdf predicates and classes in the data should
×
UNCOV
57
    map to jsonapi fields and resource objects in the rendered output, using
×
UNCOV
58
    `prefix jsonapi: <https://jsonapi.org/format/1.1/#>` and linked anchors in
×
UNCOV
59
    the jsonapi spec to represent jsonapi concepts:
×
UNCOV
60
      - jsonapi member name:
×
UNCOV
61
          `<iri> jsonapi:document-member-names "foo"@en`
×
UNCOV
62
      - jsonapi attribute:
×
UNCOV
63
          `<predicate_iri> rdf:type jsonapi:document-resource-object-attributes`
×
UNCOV
64
      - jsonapi relationship:
×
UNCOV
65
          `<predicate_iri> rdf:type jsonapi:document-resource-object-relationships`
×
UNCOV
66
      - to-one relationship or single-value attribute:
×
UNCOV
67
          `<predicate_iri> rdf:type owl:FunctionalProperty`
×
68

UNCOV
69
    note: does not support relationship links (or many other jsonapi features)
×
UNCOV
70
    '''
×
71
    MEDIATYPE = mediatypes.JSONAPI
2✔
72
    INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json']
2✔
73

74
    _identifier_object_cache: dict = dataclasses.field(default_factory=dict)
2✔
75
    _id_namespace_set: Iterable[primitive_rdf.IriNamespace] = (trove_indexcard_namespace(),)
2✔
76
    __to_include: set[primitive_rdf.RdfObject] | None = None
2✔
77
    __assigned_blanknode_resource_ids: defaultdict[frozenset, str] = dataclasses.field(
2✔
78
        default_factory=_resource_ids_defaultdict,
2✔
79
        repr=False,
2✔
80
    )
81

82
    def simple_render_document(self) -> str:
2✔
83
        return json.dumps(
2✔
84
            self.render_dict(self.response_focus.single_iri()),
2✔
85
            indent=2,  # TODO: pretty-print query param?
2✔
86
        )
87

88
    def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> dict:
2✔
89
        _primary_data: dict | list | None = None
2✔
90
        _included_data = []
2✔
91
        with self._contained__to_include() as _to_include:
2✔
92
            if isinstance(primary_iris, str):
2✔
93
                _already_included = {primary_iris}
2✔
94
                _primary_data = self.render_resource_object(primary_iris)
2✔
UNCOV
95
            else:
×
UNCOV
96
                _already_included = set(primary_iris)
×
97
                _primary_data = [
98
                    self.render_resource_object(_iri)
99
                    for _iri in primary_iris
100
                ]
101
            while _to_include:
2✔
102
                _next = _to_include.pop()
2✔
103
                if _next not in _already_included:
2✔
104
                    _already_included.add(_next)
2✔
105
                    _included_data.append(self.render_resource_object(_next))
2✔
106
        _document = {'data': _primary_data}
2✔
107
        if _included_data:
2✔
108
            _document['included'] = _included_data
2✔
109
        return _document
2✔
110

111
    def render_resource_object(self, iri_or_blanknode: _IriOrBlanknode) -> dict:
2✔
112
        _resource_object = {**self.render_identifier_object(iri_or_blanknode)}
2✔
113
        _twopledict = (
2✔
114
            (self.response_data.tripledict.get(iri_or_blanknode) or {})
2✔
115
            if isinstance(iri_or_blanknode, str)
2✔
116
            else primitive_rdf.twopledict_from_twopleset(iri_or_blanknode)
2✔
117
        )
118
        for _pred, _obj_set in _twopledict.items():
2✔
119
            if _pred != RDF.type:
2✔
120
                self._render_field(_pred, _obj_set, into=_resource_object)
2✔
121
        if isinstance(iri_or_blanknode, str):
2✔
122
            _resource_object.setdefault('links', {})['self'] = iri_or_blanknode
2✔
123
        return _resource_object
2✔
124

125
    def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode):
2✔
126
        try:
2✔
127
            return self._identifier_object_cache[iri_or_blanknode]
2✔
128
        except KeyError:
2✔
129
            if isinstance(iri_or_blanknode, str):
2✔
130
                _type_iris = list(self.response_data.q(iri_or_blanknode, RDF.type))
2✔
131
                _id_obj = {
2✔
132
                    'id': self._resource_id_for_iri(iri_or_blanknode),
2✔
133
                    'type': self._single_typename(_type_iris),
2✔
134
                }
135
            elif isinstance(iri_or_blanknode, frozenset):
2✔
136
                _type_iris = [
2✔
137
                    _obj
2✔
138
                    for _pred, _obj in iri_or_blanknode
2✔
139
                    if _pred == RDF.type
2✔
140
                ]
141
                _id_obj = {
2✔
142
                    'id': self._resource_id_for_blanknode(iri_or_blanknode),
2✔
143
                    'type': self._single_typename(_type_iris),
2✔
144
                }
UNCOV
145
            else:
×
UNCOV
146
                raise trove_exceptions.ExpectedIriOrBlanknode(f'expected str or frozenset (got {iri_or_blanknode})')
×
147
            self._identifier_object_cache[iri_or_blanknode] = _id_obj
2✔
148
            return _id_obj
2✔
149

150
    def _single_typename(self, type_iris: list[str]):
2✔
151
        if not type_iris:
2✔
152
            raise trove_exceptions.MissingRdfType
2✔
153
        if len(type_iris) == 1:
2✔
154
            return self._membername_for_iri(type_iris[0])
2✔
155
        # choose one predictably, preferring osfmap and trove
156
        for _namespace in (OSFMAP, TROVE):
2✔
157
            _type_iris = sorted(_iri for _iri in type_iris if _iri in _namespace)
2✔
158
            if _type_iris:
2✔
159
                return self._membername_for_iri(_type_iris[0])
2✔
UNCOV
160
        return self._membername_for_iri(sorted(type_iris)[0])
×
161

162
    def _membername_for_iri(self, iri: str):
2✔
163
        try:
2✔
164
            _membername = next(self.thesaurus.q(iri, JSONAPI_MEMBERNAME))
2✔
165
        except StopIteration:
2✔
166
            pass
2✔
UNCOV
167
        else:
×
168
            if isinstance(_membername, primitive_rdf.Literal):
2✔
169
                return _membername.unicode_value
2✔
UNCOV
170
            raise trove_exceptions.ExpectedLiteralObject((iri, JSONAPI_MEMBERNAME, _membername))
×
171
        return self.iri_shorthand.compact_iri(iri)
2✔
172

173
    def _resource_id_for_blanknode(self, blanknode: frozenset, /):
2✔
174
        return self.__assigned_blanknode_resource_ids[blanknode]
2✔
175

176
    def _resource_id_for_iri(self, iri: str):
2✔
177
        for _iri_namespace in self._id_namespace_set:
2✔
178
            if iri in _iri_namespace:
2✔
UNCOV
179
                return primitive_rdf.iri_minus_namespace(iri, namespace=_iri_namespace)
×
180
        # as fallback, encode the iri into a valid jsonapi member name
181
        return base64.urlsafe_b64encode(iri.encode()).decode()
2✔
182

183
    def _render_field(self, predicate_iri, object_set, *, into: dict):
2✔
184
        _is_relationship = (predicate_iri, RDF.type, JSONAPI_RELATIONSHIP) in self.thesaurus
2✔
185
        _is_attribute = (predicate_iri, RDF.type, JSONAPI_ATTRIBUTE) in self.thesaurus
2✔
186
        _field_key = self._membername_for_iri(predicate_iri)
2✔
187
        _doc_key = 'meta'  # unless configured for jsonapi, default to unstructured 'meta'
2✔
188
        if ':' not in _field_key:
2✔
189
            if _is_relationship:
2✔
190
                _doc_key = 'relationships'
2✔
191
            elif _is_attribute:
2✔
192
                _doc_key = 'attributes'
2✔
193
        if _is_relationship:
2✔
194
            _fieldvalue = self._render_relationship_object(predicate_iri, object_set)
2✔
UNCOV
195
        else:
×
196
            _fieldvalue = self._one_or_many(predicate_iri, self._attribute_datalist(object_set))
2✔
197
        # update the given `into` resource object
198
        into.setdefault(_doc_key, {})[_field_key] = _fieldvalue
2✔
199

200
    def _one_or_many(self, predicate_iri: str, datalist: list):
2✔
201
        _only_one = (predicate_iri, RDF.type, OWL.FunctionalProperty) in self.thesaurus
2✔
202
        if _only_one:
2✔
203
            if len(datalist) > 1:
2✔
UNCOV
204
                raise trove_exceptions.OwlObjection(f'multiple objects for to-one relation <{predicate_iri}>: {datalist}')
×
205
            return (datalist[0] if datalist else None)
2✔
206
        return datalist
2✔
207

208
    def _attribute_datalist(self, object_set):
2✔
209
        return [
2✔
210
            self._render_attribute_datum(_obj)
2✔
211
            for _obj in object_set
2✔
212
        ]
213

214
    def _render_relationship_object(self, predicate_iri, object_set):
2✔
215
        _data = []
2✔
216
        _links = {}
2✔
217
        for _obj in object_set:
2✔
218
            if isinstance(_obj, frozenset):
2✔
219
                if (RDF.type, RDF.Seq) in _obj:
2✔
220
                    for _seq_obj in primitive_rdf.sequence_objects_in_order(_obj):
2✔
221
                        _data.append(self.render_identifier_object(_seq_obj))
2✔
222
                        self._pls_include(_seq_obj)
2✔
UNCOV
223
                elif (RDF.type, JSONAPI_LINK_OBJECT) in _obj:
×
UNCOV
224
                    _key, _link_obj = self._render_link_object(_obj)
×
UNCOV
225
                    _links[_key] = _link_obj
×
UNCOV
226
                else:
×
UNCOV
227
                    _data.append(self.render_identifier_object(_obj))
×
UNCOV
228
                    self._pls_include(_obj)
×
UNCOV
229
            else:
×
230
                assert isinstance(_obj, str)
2✔
231
                _data.append(self.render_identifier_object(_obj))
2✔
232
                self._pls_include(_obj)
2✔
233
        _relationship_obj = {
2✔
234
            'data': self._one_or_many(predicate_iri, _data),
2✔
235
        }
236
        if _links:
2✔
UNCOV
237
            _relationship_obj['links'] = _links
×
238
        return _relationship_obj
2✔
239

240
    def _render_link_object(self, link_obj: frozenset):
2✔
241
        _membername = next(
242
            _obj.unicode_value
243
            for _pred, _obj in link_obj
244
            if _pred == JSONAPI_MEMBERNAME
245
        )
246
        _rendered_link = {
247
            'href': next(
248
                _obj
249
                for _pred, _obj in link_obj
250
                if _pred == RDF.value
251
            ),
252
            # TODO:
253
            # 'rel':
254
            # 'describedby':
255
            # 'title':
256
            # 'type':
257
            # 'hreflang':
258
            # 'meta':
259
        }
260
        return _membername, _rendered_link
261

262
    def _make_object_gen(self, object_set):
2✔
263
        for _obj in object_set:
264
            if isinstance(_obj, frozenset) and ((RDF.type, RDF.Seq) in _obj):
265
                yield from primitive_rdf.sequence_objects_in_order(_obj)
266
            else:
267
                yield _obj
268

269
    @contextlib.contextmanager
2✔
270
    def _contained__to_include(self):
2✔
271
        assert self.__to_include is None
2✔
272
        self.__to_include = set()
2✔
273
        try:
2✔
274
            yield self.__to_include
2✔
275
        finally:
276
            self.__to_include = None
2✔
277

278
    def _pls_include(self, item):
2✔
279
        if self.__to_include is not None:
2✔
280
            self.__to_include.add(item)
2✔
281

282
    def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict | list | str | float | int:
2✔
283
        if isinstance(rdfobject, frozenset):
2✔
284
            if (RDF.type, RDF.Seq) in rdfobject:
285
                return [
286
                    self._render_attribute_datum(_seq_obj)
287
                    for _seq_obj in primitive_rdf.sequence_objects_in_order(rdfobject)
288
                ]
289
            _json_blanknode = {}
290
            for _pred, _obj_set in primitive_rdf.twopledict_from_twopleset(rdfobject).items():
291
                _key = self._membername_for_iri(_pred)
292
                _json_blanknode[_key] = self._one_or_many(_pred, self._attribute_datalist(_obj_set))
293
            return _json_blanknode
294
        if isinstance(rdfobject, primitive_rdf.Literal):
2✔
295
            if RDF.JSON in rdfobject.datatype_iris:
2✔
296
                return json.loads(rdfobject.unicode_value)
2✔
297
            if XSD.integer in rdfobject.datatype_iris:
2✔
298
                return int(rdfobject.unicode_value)
2✔
299
            return rdfobject.unicode_value  # TODO: decide how to represent language
2✔
300
        elif isinstance(rdfobject, str):
2✔
301
            try:  # maybe it's a jsonapi resource
2✔
302
                return self.render_identifier_object(rdfobject)
2✔
303
            except Exception:
2✔
304
                return rdfobject
2✔
305
        elif isinstance(rdfobject, (float, int)):
306
            return rdfobject
307
        elif isinstance(rdfobject, datetime.date):
308
            # just "YYYY-MM-DD"
309
            return datetime.date.isoformat(rdfobject)
310
        raise trove_exceptions.UnsupportedRdfObject(rdfobject)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc