• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

NaturalHistoryMuseum / splitgill / #72

02 Aug 2024 09:12PM UTC coverage: 35.289% (-60.8%) from 96.047%
#72

push

coveralls-python

jrdh
build: swap docker-compose for docker compose

379 of 1074 relevant lines covered (35.29%)

0.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.77
/splitgill/model.py
1
from dataclasses import dataclass, field, astuple
1✔
2
from itertools import chain
1✔
3
from typing import Dict, Iterable, NamedTuple, List, Optional, FrozenSet, Any
1✔
4
from uuid import uuid4
1✔
5

6
from bson import ObjectId
1✔
7
from pymongo.results import BulkWriteResult
1✔
8

9
from splitgill.diffing import patch, DiffOp
1✔
10

11

12
@dataclass
1✔
13
class Record:
1✔
14
    """
15
    A record before it becomes managed by Splitgill.
16
    """
17

18
    id: str
1✔
19
    data: dict
1✔
20

21
    @property
1✔
22
    def is_delete(self) -> bool:
1✔
23
        """
24
        Returns True if this record is a delete request, otherwise False. A delete
25
        request is a record with empty data ({}).
26

27
        :return: True if this is a delete, False if not
28
        """
29
        return not self.data
×
30

31
    @staticmethod
1✔
32
    def new(data: dict) -> "Record":
1✔
33
        return Record(str(uuid4()), data)
×
34

35
    @staticmethod
1✔
36
    def delete(record_id: str) -> "Record":
1✔
37
        return Record(record_id, {})
×
38

39

40
VersionedData = NamedTuple("VersionedData", version=Optional[int], data=dict)
1✔
41

42

43
@dataclass
1✔
44
class MongoRecord:
1✔
45
    """
46
    A record retrieved from MongoDB.
47
    """
48

49
    _id: ObjectId
1✔
50
    id: str
1✔
51
    version: Optional[int]
1✔
52
    data: dict
1✔
53
    # you'd expect the keys to be ints but MongoDB doesn't allow non-string keys
54
    diffs: Dict[str, List[DiffOp]] = field(default_factory=dict)
1✔
55

56
    @property
1✔
57
    def is_deleted(self) -> bool:
1✔
58
        """
59
        A record is deleted if its current data is an empty dict.
60

61
        :return: True if this record has been deleted, False if not
62
        """
63
        return not self.data
×
64

65
    @property
1✔
66
    def is_uncommitted(self) -> bool:
1✔
67
        """
68
        A record is uncommitted if its current version is None.
69

70
        :return: True if this record has been deleted, False if not
71
        """
72
        return self.version is None
×
73

74
    @property
1✔
75
    def has_history(self) -> bool:
1✔
76
        """
77
        A record has history if it has any diffs.
78

79
        :return: True if this record has previous versions, False if not
80
        """
81
        return bool(self.diffs)
×
82

83
    def get_versions(self, desc=False) -> List[int]:
1✔
84
        """
85
        Returns a list of the record's versions in ascending order. If desc is True, the
86
        versions are returned in descending order. If the current version is None, it is
87
        not included.
88

89
        :return: the record's versions
90
        """
91
        versions = map(int, self.diffs)
×
92
        if self.version is not None:
×
93
            versions = chain(versions, (self.version,))
×
94
        return sorted(versions, reverse=desc)
×
95

96
    def iter(self) -> Iterable[VersionedData]:
1✔
97
        """
98
        Yields the versions and data of this record. These are yielded as (int, dict)
99
        VersionedData named tuples. The tuples are yielded in reverse order, starting
100
        with the latest data and working back to the first version.
101

102
        :return: VersionedData (version: int, data: dict) named tuples in descending
103
                 version order
104
        """
105
        yield VersionedData(self.version, self.data)
×
106
        base = self.data
×
107
        for version in sorted(map(int, self.diffs), reverse=True):
×
108
            data = patch(base, self.diffs[str(version)])
×
109
            # convert the string versions to ints on the way out the door
110
            yield VersionedData(version, data)
×
111
            base = data
×
112

113

114
# use frozen to get a free hash method and as these objects have no reason to be mutable
115
@dataclass(frozen=True)
1✔
116
class GeoFieldHint:
1✔
117
    """
118
    Class holding the fields representing the fields in a record which describe its
119
    latitude/longitude location and an optional uncertainty radius.
120
    """
121

122
    lat_field: str
1✔
123
    lon_field: str
1✔
124
    radius_field: Optional[str] = None
1✔
125

126
    def __eq__(self, other: Any) -> bool:
1✔
127
        if isinstance(other, GeoFieldHint):
×
128
            return self.lat_field == other.lat_field
×
129
        raise NotImplemented
×
130

131
    def __hash__(self) -> int:
1✔
132
        return hash(self.lat_field)
×
133

134

135
# set frozen=True to make the objects immutable and provide hashing (which we need for
136
# parser.parse_str's lru_cache)
137
@dataclass(frozen=True)
1✔
138
class ParsingOptions:
1✔
139
    """
140
    Holds options for parsing.
141

142
    The objects created using this class are immutable. You can instantiate them
143
    directly, but it's better to use The ParsingOptionBuilder defined below.
144
    """
145

146
    # lowercase string values which should be parsed as True
147
    true_values: FrozenSet[str]
1✔
148
    # lowercase string values which should be parsed as False
149
    false_values: FrozenSet[str]
1✔
150
    # date format strings to test candidates against using datetime.strptime
151
    date_formats: FrozenSet[str]
1✔
152
    # GeoFieldHint objects which can be used to test if a record contains any geographic
153
    # coordinate data
154
    geo_hints: FrozenSet[GeoFieldHint]
1✔
155
    # the maximum length of keyword strings (both case-sensitive and -insensitive).
156
    # Strings will be truncated to this length before indexing them in either keyword
157
    # field. Defaults to the maximum Elasticsearch allows.
158
    keyword_length: int = 2147483647
1✔
159
    # the format to use to convert a float to a string for indexing. The string will
160
    # have format() called on it with the float value passed as the only parameter,
161
    # therefore the format string should use 0 to reference it. The default format uses
162
    # 15 significant digits. This roughly matches how a float is actually stored in
163
    # elasticsearch and therefore gives a somewhat sensible representative idea to users
164
    # of what the number actually is and how it can be searched. This format will
165
    # produce string representations of numbers in scientific notation if it decides it
166
    # needs to.
167
    float_format: str = "{0:.15g}"
1✔
168

169
    def to_doc(self) -> dict:
1✔
170
        return {
×
171
            "true_values": list(self.true_values),
172
            "false_values": list(self.false_values),
173
            "date_formats": list(self.date_formats),
174
            "geo_hints": [astuple(hint) for hint in self.geo_hints],
175
            "keyword_length": self.keyword_length,
176
            "float_format": self.float_format,
177
        }
178

179
    @classmethod
1✔
180
    def from_doc(cls, doc: dict) -> "ParsingOptions":
1✔
181
        return ParsingOptions(
×
182
            frozenset(doc["true_values"]),
183
            frozenset(doc["false_values"]),
184
            frozenset(doc["date_formats"]),
185
            frozenset(GeoFieldHint(*params) for params in doc["geo_hints"]),
186
            doc["keyword_length"],
187
            doc["float_format"],
188
        )
189

190

191
@dataclass
1✔
192
class IngestResult:
1✔
193
    """
194
    A dataclass containing information about the new data ingested into MongoDB.
195
    """
196

197
    # the version the new data was added at (if the data was not committed or no new
198
    # data was added, then this will be None)
199
    version: Optional[int] = None
1✔
200
    # the number of insert operations performed
201
    inserted: int = 0
1✔
202
    # the number of update operations performed
203
    updated: int = 0
1✔
204
    # the number of delete operations performed
205
    deleted: int = 0
1✔
206

207
    @property
1✔
208
    def was_committed(self) -> bool:
1✔
209
        """
210
        Returns True if the data was committed, False if not. This is determined by
211
        whether a version is available.
212

213
        :return: True if the data was committed, False if not
214
        """
215
        return self.version is not None
×
216

217
    def update(self, bulk_result: BulkWriteResult):
1✔
218
        """
219
        Update the counts with the counts in the bulk result object.
220

221
        :param bulk_result: a BulkWriteResult object
222
        """
223
        self.inserted += bulk_result.inserted_count
×
224
        self.updated += bulk_result.upserted_count
×
225
        self.deleted += bulk_result.deleted_count
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc