• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

CenterForOpenScience / SHARE / 15909158736

26 Jun 2025 06:03PM UTC coverage: 81.092% (-0.6%) from 81.702%
15909158736

Pull #850

github

web-flow
Merge pull request #875 from bodintsov/feature/share-cleanupgrade-2025-type-annotations

[ENG-7443] Feature/share cleanupgrade 2025 type annotations
Pull Request #850: [project][ENG-7225] share clean(up)grade 2025 (milestone 2: upgrade)

485 of 534 new or added lines in 63 files covered. (90.82%)

12 existing lines in 5 files now uncovered.

6150 of 7584 relevant lines covered (81.09%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.52
/trove/render/simple_csv.py
1
from __future__ import annotations
1✔
2
from collections.abc import (
1✔
3
    Generator,
4
    Iterator,
5
    Iterable,
6
    Sequence,
7
)
8
import csv
1✔
9
import functools
1✔
10
import itertools
1✔
11
import dataclasses
1✔
12
from typing import TYPE_CHECKING, ClassVar
1✔
13

14
from trove.trovesearch.search_params import (
1✔
15
    CardsearchParams,
16
    ValuesearchParams,
17
)
18
from trove.util.propertypath import Propertypath, GLOB_PATHSTEP
1✔
19
from trove.vocab import mediatypes
1✔
20
from trove.vocab import osfmap
1✔
21
from trove.vocab.namespaces import TROVE
1✔
22
from ._simple_trovesearch import SimpleTrovesearchRenderer
1✔
23
from ._rendering import StreamableRendering, ProtoRendering
1✔
24
if TYPE_CHECKING:
1✔
UNCOV
25
    from trove.util.trove_params import BasicTroveParams
×
NEW
26
    from trove.util.json import JsonValue, JsonObject
×
27

28

29
type Jsonpath = Sequence[str]  # path of json keys
1✔
30
type CsvValue = str | int | float | None
1✔
31

32
_MULTIVALUE_DELIMITER = ' ; '  # possible improvement: smarter in-value delimiting?
1✔
33
_VALUE_KEY_PREFERENCE = ('@value', '@id', 'name', 'prefLabel', 'label')
1✔
34
_ID_JSONPATH = ('@id',)
1✔
35

36

37
class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer):
1✔
38
    MEDIATYPE = mediatypes.CSV
1✔
39
    INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json']
1✔
40
    CSV_DIALECT: ClassVar[type[csv.Dialect]] = csv.excel
1✔
41

42
    def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering:
1✔
NEW
43
        return self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}]))
×
44

45
    def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering:
1✔
46
        _doc = TabularDoc(
1✔
47
            card_pages,
48
            trove_params=getattr(self.response_focus, 'search_params', None),
49
        )
50
        return StreamableRendering(  # type: ignore[return-value]
1✔
51
            mediatype=self.MEDIATYPE,
52
            content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()),
53
        )
54

55

56
def csv_stream(
1✔
57
    csv_dialect: type[csv.Dialect],
58
    header: list[CsvValue],
59
    rows: Iterator[list[CsvValue]],
60
) -> Iterator[str]:
61
    _writer = csv.writer(_Echo(), dialect=csv_dialect)
1✔
62
    yield _writer.writerow(header)
1✔
63
    for _row in rows:
1✔
64
        yield _writer.writerow(_row)
1✔
65

66

67
@dataclasses.dataclass
1✔
68
class TabularDoc:
1✔
69
    card_pages: Iterator[dict[str, JsonObject]]
1✔
70
    trove_params: BasicTroveParams | None = None
1✔
71
    _started: bool = False
1✔
72

73
    @functools.cached_property
1✔
74
    def column_jsonpaths(self) -> tuple[Jsonpath, ...]:
1✔
75
        _column_jsonpaths = (
1✔
76
            _osfmap_jsonpath(_path)
77
            for _path in self._column_paths()
78
        )
79
        return (_ID_JSONPATH, *_column_jsonpaths)
1✔
80

81
    @functools.cached_property
1✔
82
    def first_page(self) -> dict[str, JsonObject]:
1✔
83
        return next(self.card_pages, {})
1✔
84

85
    def _column_paths(self) -> Iterator[Propertypath]:
1✔
86
        _pathlists: list[Sequence[Propertypath]] = []
1✔
87
        if self.trove_params is not None:  # hacks
1✔
88
            if GLOB_PATHSTEP in self.trove_params.attrpaths_by_type:
×
89
                _pathlists.append(self.trove_params.attrpaths_by_type[GLOB_PATHSTEP])
×
90
            if isinstance(self.trove_params, ValuesearchParams):
×
91
                _expected_card_types = set(self.trove_params.valuesearch_type_iris())
×
92
            elif isinstance(self.trove_params, CardsearchParams):
×
93
                _expected_card_types = set(self.trove_params.cardsearch_type_iris())
×
94
            else:
95
                _expected_card_types = set()
×
96
            for _type_iri in sorted(_expected_card_types, key=len):
×
97
                try:
×
98
                    _pathlist = self.trove_params.attrpaths_by_type[_type_iri]
×
99
                except KeyError:
×
100
                    pass
×
101
                else:
102
                    _pathlists.append(_pathlist)
×
103
        if not _pathlists:
1✔
104
            _pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS)
1✔
105
        return self.iter_unique(itertools.chain.from_iterable(_pathlists))
1✔
106

107
    @staticmethod
1✔
108
    def iter_unique[T](iterable: Iterable[T]) -> Generator[T]:
1✔
109
        _seen = set()
1✔
110
        for _item in iterable:
1✔
111
            if _item not in _seen:
1✔
112
                _seen.add(_item)
1✔
113
                yield _item
1✔
114

115
    def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]:
1✔
116
        assert not self._started
1✔
117
        self._started = True
1✔
118
        if self.first_page:
1✔
119
            yield self.first_page
1✔
120
            yield from self.card_pages
1✔
121

122
    def header(self) -> list[CsvValue]:
1✔
123
        return ['.'.join(_path) for _path in self.column_jsonpaths]
1✔
124

125
    def rows(self) -> Generator[list[CsvValue]]:
1✔
126
        for _page in self._iter_card_pages():
1✔
127
            for _card_iri, _osfmap_json in _page.items():
1✔
128
                yield self._row_values(_osfmap_json)
1✔
129

130
    def _row_values(self, osfmap_json: JsonObject) -> list[CsvValue]:
1✔
131
        return [
1✔
132
            self._row_field_value(osfmap_json, _field_path)
133
            for _field_path in self.column_jsonpaths
134
        ]
135

136
    def _row_field_value(self, osfmap_json: JsonObject, field_path: Jsonpath) -> CsvValue:
1✔
137
        _rendered_values = [
1✔
138
            _render_tabularly(_obj)
139
            for _obj in _iter_values(osfmap_json, field_path)
140
        ]
141
        if len(_rendered_values) == 1:
1✔
142
            return _rendered_values[0]  # preserve type for single numbers
1✔
143
        # for multiple values, can only be a string
144
        return _MULTIVALUE_DELIMITER.join(map(str, _rendered_values))
1✔
145

146

147
def _osfmap_jsonpath(iri_path: Propertypath) -> Jsonpath:
1✔
148
    _shorthand = osfmap.osfmap_json_shorthand()
1✔
149
    return tuple(
1✔
150
        _shorthand.compact_iri(_pathstep)
151
        for _pathstep in iri_path
152
    )
153

154

155
def _has_value(osfmap_json: JsonObject, path: Jsonpath) -> bool:
1✔
156
    try:
×
157
        next(_iter_values(osfmap_json, path))
×
158
    except StopIteration:
×
159
        return False
×
160
    else:
161
        return True
×
162

163

164
def _iter_values(osfmap_json: JsonObject, path: Jsonpath) -> Generator[JsonValue]:
1✔
165
    assert path
1✔
166
    (_step, *_rest) = path
1✔
167
    _val = osfmap_json.get(_step)
1✔
168
    if _rest:
1✔
169
        if isinstance(_val, dict):
×
170
            yield from _iter_values(_val, _rest)
×
171
        elif isinstance(_val, list):
×
172
            for _val_obj in _val:
×
NEW
173
                if isinstance(_val_obj, dict):
×
NEW
174
                    yield from _iter_values(_val_obj, _rest)
×
175
    else:
176
        if isinstance(_val, list):
1✔
177
            yield from _val
×
178
        elif _val is not None:
1✔
179
            yield _val
1✔
180

181

182
def _render_tabularly(json_val: JsonValue) -> CsvValue:
1✔
183
    if isinstance(json_val, (str, int, float)):
1✔
184
        return json_val
1✔
185
    if isinstance(json_val, dict):
×
186
        for _key in _VALUE_KEY_PREFERENCE:
×
187
            _val = json_val.get(_key)
×
188
            if isinstance(_val, list):
×
189
                return (
×
190
                    _render_tabularly(_val[0])
191
                    if _val
192
                    else None
193
                )
194
            if _val is not None:
×
NEW
195
                return _render_tabularly(_val)
×
196
    return None
×
197

198

199
class _Echo:
1✔
200
    '''a write-only file-like object, to convince `csv.csvwriter.writerow` to return strings
201

202
    from https://docs.djangoproject.com/en/5.1/howto/outputting-csv/#streaming-large-csv-files
203
    '''
204
    def write(self, line: str) -> str:
1✔
205
        return line
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc