• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NaturalHistoryMuseum / splitgill / #76

04 Aug 2024 07:46PM UTC coverage: 34.976% (-59.7%) from 94.693%
#76

push

coveralls-python

jrdh
feat: remove default config values

BREAKING CHANGE: remove default config values

3 of 19 new or added lines in 3 files covered. (15.79%)

613 existing lines in 13 files now uncovered.

369 of 1055 relevant lines covered (34.98%)

0.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.77
/splitgill/model.py
1
from dataclasses import dataclass, field, astuple
1✔
2
from itertools import chain
1✔
3
from typing import Dict, Iterable, NamedTuple, List, Optional, FrozenSet, Any
1✔
4
from uuid import uuid4
1✔
5

6
from bson import ObjectId
1✔
7
from pymongo.results import BulkWriteResult
1✔
8

9
from splitgill.diffing import patch, DiffOp
1✔
10

11

12
@dataclass
1✔
13
class Record:
1✔
14
    """
15
    A record before it becomes managed by Splitgill.
16
    """
17

18
    id: str
1✔
19
    data: dict
1✔
20

21
    @property
1✔
22
    def is_delete(self) -> bool:
1✔
23
        """
24
        Returns True if this record is a delete request, otherwise False. A delete
25
        request is a record with empty data ({}).
26

27
        :return: True if this is a delete, False if not
28
        """
UNCOV
29
        return not self.data
×
30

31
    @staticmethod
1✔
32
    def new(data: dict) -> "Record":
1✔
UNCOV
33
        return Record(str(uuid4()), data)
×
34

35
    @staticmethod
1✔
36
    def delete(record_id: str) -> "Record":
1✔
37
        return Record(record_id, {})
×
38

39

40
VersionedData = NamedTuple("VersionedData", version=Optional[int], data=dict)
1✔
41

42

43
@dataclass
1✔
44
class MongoRecord:
1✔
45
    """
46
    A record retrieved from MongoDB.
47
    """
48

49
    _id: ObjectId
1✔
50
    id: str
1✔
51
    version: Optional[int]
1✔
52
    data: dict
1✔
53
    # you'd expect the keys to be ints but MongoDB doesn't allow non-string keys
54
    diffs: Dict[str, List[DiffOp]] = field(default_factory=dict)
1✔
55

56
    @property
1✔
57
    def is_deleted(self) -> bool:
1✔
58
        """
59
        A record is deleted if its current data is an empty dict.
60

61
        :return: True if this record has been deleted, False if not
62
        """
UNCOV
63
        return not self.data
×
64

65
    @property
1✔
66
    def is_uncommitted(self) -> bool:
1✔
67
        """
68
        A record is uncommitted if its current version is None.
69

70
        :return: True if this record has been deleted, False if not
71
        """
72
        return self.version is None
×
73

74
    @property
1✔
75
    def has_history(self) -> bool:
1✔
76
        """
77
        A record has history if it has any diffs.
78

79
        :return: True if this record has previous versions, False if not
80
        """
81
        return bool(self.diffs)
×
82

83
    def get_versions(self, desc=False) -> List[int]:
1✔
84
        """
85
        Returns a list of the record's versions in ascending order. If desc is True, the
86
        versions are returned in descending order. If the current version is None, it is
87
        not included.
88

89
        :return: the record's versions
90
        """
UNCOV
91
        versions = map(int, self.diffs)
×
UNCOV
92
        if self.version is not None:
×
UNCOV
93
            versions = chain(versions, (self.version,))
×
UNCOV
94
        return sorted(versions, reverse=desc)
×
95

96
    def iter(self) -> Iterable[VersionedData]:
1✔
97
        """
98
        Yields the versions and data of this record. These are yielded as (int, dict)
99
        VersionedData named tuples. The tuples are yielded in reverse order, starting
100
        with the latest data and working back to the first version.
101

102
        :return: VersionedData (version: int, data: dict) named tuples in descending
103
                 version order
104
        """
UNCOV
105
        yield VersionedData(self.version, self.data)
×
UNCOV
106
        base = self.data
×
UNCOV
107
        for version in sorted(map(int, self.diffs), reverse=True):
×
UNCOV
108
            data = patch(base, self.diffs[str(version)])
×
109
            # convert the string versions to ints on the way out the door
UNCOV
110
            yield VersionedData(version, data)
×
UNCOV
111
            base = data
×
112

113

114
# use frozen to get a free hash method and as these objects have no reason to be mutable
115
@dataclass(frozen=True)
1✔
116
class GeoFieldHint:
1✔
117
    """
118
    Class holding the fields representing the fields in a record which describe its
119
    latitude/longitude location and an optional uncertainty radius.
120
    """
121

122
    lat_field: str
1✔
123
    lon_field: str
1✔
124
    radius_field: Optional[str] = None
1✔
125

126
    def __eq__(self, other: Any) -> bool:
1✔
UNCOV
127
        if isinstance(other, GeoFieldHint):
×
UNCOV
128
            return self.lat_field == other.lat_field
×
129
        raise NotImplemented
×
130

131
    def __hash__(self) -> int:
1✔
UNCOV
132
        return hash(self.lat_field)
×
133

134

135
# set frozen=True to make the objects immutable and provide hashing (which we need for
136
# parser.parse_str's lru_cache)
137
@dataclass(frozen=True)
1✔
138
class ParsingOptions:
1✔
139
    """
140
    Holds options for parsing.
141

142
    The objects created using this class are immutable. You can instantiate them
143
    directly, but it's better to use The ParsingOptionBuilder defined below.
144
    """
145

146
    # lowercase string values which should be parsed as True
147
    true_values: FrozenSet[str]
1✔
148
    # lowercase string values which should be parsed as False
149
    false_values: FrozenSet[str]
1✔
150
    # date format strings to test candidates against using datetime.strptime
151
    date_formats: FrozenSet[str]
1✔
152
    # GeoFieldHint objects which can be used to test if a record contains any geographic
153
    # coordinate data
154
    geo_hints: FrozenSet[GeoFieldHint]
1✔
155
    # the maximum length of keyword strings (both case-sensitive and case-insensitive).
156
    # Strings will be truncated to this length before indexing
157
    keyword_length: int
1✔
158
    # the format to use to convert a float to a string for indexing. The string will
159
    # have format() called on it with the float value passed as the only parameter,
160
    # therefore the format string should use 0 to reference it
161
    float_format: str
1✔
162

163
    def to_doc(self) -> dict:
1✔
UNCOV
164
        return {
×
165
            "true_values": list(self.true_values),
166
            "false_values": list(self.false_values),
167
            "date_formats": list(self.date_formats),
168
            "geo_hints": [astuple(hint) for hint in self.geo_hints],
169
            "keyword_length": self.keyword_length,
170
            "float_format": self.float_format,
171
        }
172

173
    @classmethod
1✔
174
    def from_doc(cls, doc: dict) -> "ParsingOptions":
1✔
UNCOV
175
        return ParsingOptions(
×
176
            frozenset(doc["true_values"]),
177
            frozenset(doc["false_values"]),
178
            frozenset(doc["date_formats"]),
179
            frozenset(GeoFieldHint(*params) for params in doc["geo_hints"]),
180
            doc["keyword_length"],
181
            doc["float_format"],
182
        )
183

184

185
@dataclass
1✔
186
class IngestResult:
1✔
187
    """
188
    A dataclass containing information about the new data ingested into MongoDB.
189
    """
190

191
    # the version the new data was added at (if the data was not committed or no new
192
    # data was added, then this will be None)
193
    version: Optional[int] = None
1✔
194
    # the number of insert operations performed
195
    inserted: int = 0
1✔
196
    # the number of update operations performed
197
    updated: int = 0
1✔
198
    # the number of delete operations performed
199
    deleted: int = 0
1✔
200

201
    @property
1✔
202
    def was_committed(self) -> bool:
1✔
203
        """
204
        Returns True if the data was committed, False if not. This is determined by
205
        whether a version is available.
206

207
        :return: True if the data was committed, False if not
208
        """
209
        return self.version is not None
×
210

211
    def update(self, bulk_result: BulkWriteResult):
1✔
212
        """
213
        Update the counts with the counts in the bulk result object.
214

215
        :param bulk_result: a BulkWriteResult object
216
        """
UNCOV
217
        self.inserted += bulk_result.inserted_count
×
UNCOV
218
        self.updated += bulk_result.upserted_count
×
UNCOV
219
        self.deleted += bulk_result.deleted_count
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc