• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NaturalHistoryMuseum / splitgill / #85

07 Aug 2024 01:47PM UTC coverage: 35.3% (-59.3%) from 94.633%
#85

push

coveralls-python

jrdh
test: add an explicit test for datetime and date complete flow

Tests that when a datetime or date goes into splitgill (and therefore goes through the prepare_data function) it comes out correct when parsed.

377 of 1068 relevant lines covered (35.3%)

0.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.11
/splitgill/model.py
1
from dataclasses import dataclass, field, astuple
1✔
2
from itertools import chain
1✔
3
from typing import Dict, Iterable, NamedTuple, List, Optional, FrozenSet, Any
1✔
4
from uuid import uuid4
1✔
5

6
from bson import ObjectId
1✔
7
from pymongo.results import BulkWriteResult
1✔
8

9
from splitgill.diffing import patch, DiffOp
1✔
10

11

12
@dataclass
1✔
13
class Record:
1✔
14
    """
15
    A record before it becomes managed by Splitgill.
16
    """
17

18
    id: str
1✔
19
    data: dict
1✔
20

21
    @property
1✔
22
    def is_delete(self) -> bool:
1✔
23
        """
24
        Returns True if this record is a delete request, otherwise False. A delete
25
        request is a record with empty data ({}).
26

27
        :return: True if this is a delete, False if not
28
        """
29
        return not self.data
×
30

31
    @staticmethod
1✔
32
    def new(data: dict) -> "Record":
1✔
33
        return Record(str(uuid4()), data)
×
34

35
    @staticmethod
1✔
36
    def delete(record_id: str) -> "Record":
1✔
37
        return Record(record_id, {})
×
38

39

40
VersionedData = NamedTuple("VersionedData", version=Optional[int], data=dict)
1✔
41

42

43
@dataclass
1✔
44
class MongoRecord:
1✔
45
    """
46
    A record retrieved from MongoDB.
47
    """
48

49
    _id: ObjectId
1✔
50
    id: str
1✔
51
    version: Optional[int]
1✔
52
    data: dict
1✔
53
    # you'd expect the keys to be ints but MongoDB doesn't allow non-string keys
54
    diffs: Dict[str, List[DiffOp]] = field(default_factory=dict)
1✔
55

56
    @property
1✔
57
    def is_deleted(self) -> bool:
1✔
58
        """
59
        A record is deleted if its current data is an empty dict.
60

61
        :return: True if this record has been deleted, False if not
62
        """
63
        return not self.data
×
64

65
    @property
1✔
66
    def is_uncommitted(self) -> bool:
1✔
67
        """
68
        A record is uncommitted if its current version is None.
69

70
        :return: True if this record has been deleted, False if not
71
        """
72
        return self.version is None
×
73

74
    @property
1✔
75
    def has_history(self) -> bool:
1✔
76
        """
77
        A record has history if it has any diffs.
78

79
        :return: True if this record has previous versions, False if not
80
        """
81
        return bool(self.diffs)
×
82

83
    def get_versions(self, desc=False) -> List[int]:
1✔
84
        """
85
        Returns a list of the record's versions in ascending order. If desc is True, the
86
        versions are returned in descending order. If the current version is None, it is
87
        not included.
88

89
        :return: the record's versions
90
        """
91
        versions = map(int, self.diffs)
×
92
        if self.version is not None:
×
93
            versions = chain(versions, (self.version,))
×
94
        return sorted(versions, reverse=desc)
×
95

96
    def iter(self) -> Iterable[VersionedData]:
1✔
97
        """
98
        Yields the versions and data of this record. These are yielded as (int, dict)
99
        VersionedData named tuples. The tuples are yielded in reverse order, starting
100
        with the latest data and working back to the first version.
101

102
        :return: VersionedData (version: int, data: dict) named tuples in descending
103
                 version order
104
        """
105
        yield VersionedData(self.version, self.data)
×
106
        base = self.data
×
107
        for version in sorted(map(int, self.diffs), reverse=True):
×
108
            data = patch(base, self.diffs[str(version)])
×
109
            # convert the string versions to ints on the way out the door
110
            yield VersionedData(version, data)
×
111
            base = data
×
112

113

114
# use frozen to get a free hash method and as these objects have no reason to be mutable
115
@dataclass(frozen=True)
1✔
116
class GeoFieldHint:
1✔
117
    """
118
    Class holding the fields representing the fields in a record which describe its
119
    latitude/longitude location and an optional uncertainty radius.
120
    """
121

122
    lat_field: str
1✔
123
    lon_field: str
1✔
124
    radius_field: Optional[str] = None
1✔
125
    # the number of segments to use to create a circle around a point when a radius is
126
    # provided in the geo hint. Circles can't be directly represented in WKT nor
127
    # GeoJSON, so we have to build a polygon instead that looks like a circle using
128
    # triangles. This setting configures the number of segments to use to make the
129
    # circle, the higher this number the more accurate the polygon's representation of
130
    # the circle, but the more complex the shape. Defaults to 16 which produces 64 (+1
131
    # for the repeat start/end) coordinates in the resulting polygon. This should be
132
    # enough for the majority of uses.
133
    segments: int = 16
1✔
134

135
    def __eq__(self, other: Any) -> bool:
1✔
136
        if isinstance(other, GeoFieldHint):
×
137
            return self.lat_field == other.lat_field
×
138
        raise NotImplemented
×
139

140
    def __hash__(self) -> int:
1✔
141
        return hash(self.lat_field)
×
142

143

144
# set frozen=True to make the objects immutable and provide hashing (which we need for
145
# parser.parse_str's lru_cache)
146
@dataclass(frozen=True)
1✔
147
class ParsingOptions:
1✔
148
    """
149
    Holds options for parsing.
150

151
    The objects created using this class are immutable. You can instantiate them
152
    directly, but it's better to use The ParsingOptionBuilder defined below.
153
    """
154

155
    # lowercase string values which should be parsed as True
156
    true_values: FrozenSet[str]
1✔
157
    # lowercase string values which should be parsed as False
158
    false_values: FrozenSet[str]
1✔
159
    # date format strings to test candidates against using datetime.strptime
160
    date_formats: FrozenSet[str]
1✔
161
    # GeoFieldHint objects which can be used to test if a record contains any geographic
162
    # coordinate data
163
    geo_hints: FrozenSet[GeoFieldHint]
1✔
164
    # the maximum length of keyword strings (both case-sensitive and case-insensitive).
165
    # Strings will be truncated to this length before indexing
166
    keyword_length: int
1✔
167
    # the format to use to convert a float to a string for indexing. The string will
168
    # have format() called on it with the float value passed as the only parameter,
169
    # therefore the format string should use 0 to reference it
170
    float_format: str
1✔
171

172
    def to_doc(self) -> dict:
1✔
173
        return {
×
174
            "true_values": list(self.true_values),
175
            "false_values": list(self.false_values),
176
            "date_formats": list(self.date_formats),
177
            "geo_hints": [astuple(hint) for hint in self.geo_hints],
178
            "keyword_length": self.keyword_length,
179
            "float_format": self.float_format,
180
        }
181

182
    @classmethod
1✔
183
    def from_doc(cls, doc: dict) -> "ParsingOptions":
1✔
184
        return ParsingOptions(
×
185
            frozenset(doc["true_values"]),
186
            frozenset(doc["false_values"]),
187
            frozenset(doc["date_formats"]),
188
            frozenset(GeoFieldHint(*params) for params in doc["geo_hints"]),
189
            doc["keyword_length"],
190
            doc["float_format"],
191
        )
192

193

194
@dataclass
1✔
195
class IngestResult:
1✔
196
    """
197
    A dataclass containing information about the new data ingested into MongoDB.
198
    """
199

200
    # the version the new data was added at (if the data was not committed or no new
201
    # data was added, then this will be None)
202
    version: Optional[int] = None
1✔
203
    # the number of insert operations performed
204
    inserted: int = 0
1✔
205
    # the number of update operations performed
206
    updated: int = 0
1✔
207
    # the number of delete operations performed
208
    deleted: int = 0
1✔
209

210
    @property
1✔
211
    def was_committed(self) -> bool:
1✔
212
        """
213
        Returns True if the data was committed, False if not. This is determined by
214
        whether a version is available.
215

216
        :return: True if the data was committed, False if not
217
        """
218
        return self.version is not None
×
219

220
    def update(self, bulk_result: BulkWriteResult):
1✔
221
        """
222
        Update the counts with the counts in the bulk result object.
223

224
        :param bulk_result: a BulkWriteResult object
225
        """
226
        self.inserted += bulk_result.inserted_count
×
227
        self.updated += bulk_result.upserted_count
×
228
        self.deleted += bulk_result.deleted_count
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc