15811823151

Committed 22 Jun 2025 11:17PM UTC coverage: 96.126%. First build

Build # 15811823151

Build Type

Pull #16

github

Committed by

web-flow

Commit Message

Merge 40720b623 into 085991d99

Pull Request Pull Request #16: refactor(humbledb): support for pymongo 4.x

Run Details

282 of 309 new or added lines in 10 files covered. (91.26%)

1191 of 1239 relevant lines covered (96.13%)

3.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.88

/humbledb/array.py

import itertools

from pytool.lang import UNSET

import humbledb
from humbledb import _version
from humbledb.document import Document
from humbledb.errors import NoConnection


class Page(Document):
    """Document class used by :class:`Array`."""

    size = "s"  # Number of entries in this page
    """ Number of entries currently in this page. """
    entries = "e"  # Array of entries
    """ Array of entries. """
    _opts = {"safe": True} if _version._lt("3.0.0") else {}


class ArrayMeta(type):
    """
    Metaclass for Arrays. This ensures that we have all the needed
    configuration options, as well as creating the :class:`Page` subclass that
    is specific to each Array subclass.

    """

    def __new__(mcs, name, bases, cls_dict):
        # Skip the Array base class
        if (
            name == "Array"
            and not len(bases)
            and mcs is ArrayMeta
            and cls_dict["__qualname__"] == "Array"
        ):
            return type.__new__(mcs, name, bases, cls_dict)
        # The dictionary for subclassing the Page document
        page_dict = {}
        # Check for required class members
        for member in "config_database", "config_collection":
            if member not in cls_dict:
                raise TypeError("{!r} missing required {!r}".format(name, member))
            # Move the config to the page
            page_dict[member] = cls_dict.pop(member)
        # Create our page subclass and assign to cls._page
        cls_dict["_page"] = type(name + "Page", (Page,), page_dict)
        # Return our new Array
        return type.__new__(mcs, name, bases, cls_dict)

    # Shortcut methods
    @property
    def size(cls):
        return cls._page.size

    @property
    def entries(cls):
        return cls._page.entries

    @property
    def find(cls):
        return cls._page.find

    @property
    def update(cls):
        return cls._page.update

    @property
    def remove(cls):  # This needs a try/except for tests
        try:
            return cls._page.remove
        except NoConnection:
            pass  # Collection not available yet


class Array(metaclass=ArrayMeta):
    """
    HumbleDB Array object. This helps manage paginated array documents in
    MongoDB. This class is designed to be inherited from, and not instantiated
    directly.

    If you know the `page_count` for this array ahead of time, passing it in
    to the constructor will save an extra query on the first append for a given
    instance.

    :param str _id: Sets the array's shared id
    :param int page_count: Total number of pages that already exist (optional)

    """

    config_max_size = 100
    """ Soft limit on the maximum number of entries per page. """

    config_page_marker = "#"
    """ Combined with the array_id and page number to create the page _id. """

    config_padding = 0
    """ Number of bytes to pad new page creation with. """

    def __init__(self, _id, page_count=UNSET):
        self._array_id = _id
        self.page_count = page_count

    def page_id(self, page_number=None):
        """
        Return the document ID for `page_number`. If page number is not
        specified the :attr:`Array.page_count` is used.

        :param int page_number: A page number (optional)

        """
        page_number = page_number or self.page_count or 0
        return "{}{:05d}".format(self._id, page_number)

    @property
    def _id(self):
        return "{}{}".format(self._array_id, self.config_page_marker)

    @property
    def _id_regex(self):
        _id = self._id.replace(".", "\.")
        return {"$regex": "^" + _id}

    def new_page(self, page_number):
        """
        Creates a new page document.

        :param int page_number: The page number to create

        """
        # Shortcut the page class
        Page = self._page
        # Create a new page instance
        page = Page()
        page._id = self.page_id(page_number)
        page.size = 0
        page.entries = []
        page["padding"] = "0" * self.config_padding
        # Insert the new page
        try:
            # We need to do this as safe, because otherwise it may not be
            # available to a subsequent call to append
            Page.insert(page, **Page._opts)
        except humbledb.errors.DuplicateKeyError:
            # A race condition already created this page, so we are done
            return
        # Remove the padding
        Page.update({"_id": page._id}, {"$unset": {"padding": 1}}, **Page._opts)

    def append(self, entry):
        """
        Append an entry to this array and return the page count.

        :param dict entry: New entry
        :returns: Total number of pages

        """
        # If we haven't set a page count, we query for it. This is generally a
        # very fast query.
        if self.page_count is UNSET:
            self.page_count = self.pages()
        # See if we have to create our initial page
        if self.page_count < 1:
            self.page_count = 1
            self.new_page(self.page_count)
        # Shortcut page class
        Page = self._page
        query = {"_id": self.page_id()}
        modify = {"$inc": {Page.size: 1}, "$push": {Page.entries: entry}}
        fields = {Page.size: 1}
        # Append our entry to our page and get the page's size
        page = Page.find_and_modify(query, modify, new=True, fields=fields)
        if not page:
            raise RuntimeError("Append failed: page does not exist.")
        # If we need to, we create the next page
        if page.size >= self.config_max_size:
            self.page_count += 1
            self.new_page(self.page_count)
        # Return the page count
        return self.page_count

    def remove(self, spec):
        """
        Remove first element matching `spec` from each page in this array.

        Due to how this is handled, all ``null`` values will be removed from
        the array.

        :param dict spec: Dictionary matching items to be removed
        :returns: ``True`` if an element was removed

        """
        Page = self._page
        # Since we can't reliably use dot-notation when the query is against an
        # embedded document, we need to use the $elemMatch operator instead
        if isinstance(spec, dict):
            query_spec = {"$elemMatch": spec}
        else:
            query_spec = spec
        # Update to set first instance matching ``spec`` on each page to
        # ``null`` (via $unset)
        query = {"_id": self._id_regex, Page.entries: query_spec}
        modify = {"$unset": {Page.entries + ".$": spec}, "$inc": {Page.size: -1}}
        result = Page.update(query, modify, multi=True)
        if not result or not result.get("updatedExisting", None):
            return
        # Update to remove all ``null`` entries from this array
        query = {"_id": self._id_regex, Page.entries: None}
        result = Page.update(query, {"$pull": {Page.entries: None}}, multi=True)
        # Check the result and return True if anything was modified
        if result and result.get("updatedExisting", None):
            return True

    def _all(self):
        """Return a cursor for iterating over all the pages."""
        Page = self._page
        return Page.find({"_id": self._id_regex}).sort("_id")

    def all(self):
        """Return all entries in this array."""
        cursor = self._all()
        return list(itertools.chain.from_iterable(p.entries for p in cursor))

    def clear(self):
        """Remove all documents in this array."""
        self._page.remove({self._page._id: self._id_regex})
        self.page_count = 0

    def length(self):
        """Return the total number of items in this array."""
        # This is implemented rather than __len__ because it incurs a query,
        # and we don't want to query transparently
        Page = self._page
        if _version._lt("3.0.0"):
            cursor = Page.find({"_id": self._id_regex}, fields={Page.size: 1, "_id": 0})
        else:
            cursor = Page.find({"_id": self._id_regex}, {Page.size: 1, "_id": 0})
        return sum(p.size for p in cursor)

    def pages(self):
        """Return the total number of pages in this array."""
        Page = self._page
        return Page.find({"_id": self._id_regex}).count()

    def __getitem__(self, index):
        """
        Return a page or pages for the given index or slice respectively.

        :param index: Integer index or ``slice()`` object

        """
        if not isinstance(index, (int, slice)):
            raise TypeError("Array indices must be integers, not %s" % type(index))
        Page = self._page  # Shorthand the Page class
        # If we have an integer index, it's a simple query for the page number
        if isinstance(index, int):
            if index < 0:
                raise IndexError("Array indices must be positive")
            # Page numbers are not zero indexed
            index += 1
            page = Page.find_one({"_id": self.page_id(index)})
            if not page:
                raise IndexError("Array index out of range")
            return page.entries
        # If we have a slice, we attempt to get the pages for [start, stop)
        if isinstance(index, slice):
            if index.step:
                raise TypeError("Arrays do not allow extended slices")
            if index.start and index.start < 0:
                raise IndexError("Array indices must be positive")
            if index.stop and index.stop < 0:
                raise IndexError("Array indices must be positive")
            # Page numbers are not zero indexed
            start = (index.start or 0) + 1
            stop = (index.stop or 2**32) + 1
            start = "{}{:05d}".format(self._id, start)
            stop = "{}{:05d}".format(self._id, stop)
            cursor = Page.find({"_id": {"$gte": start, "$lt": stop}})
            return list(itertools.chain.from_iterable(p.entries for p in cursor))
        # This comment will never be reached

1	import itertools	4✔
2
3	from pytool.lang import UNSET	4✔
4
5	import humbledb	4✔
6	from humbledb import _version	4✔
7	from humbledb.document import Document	4✔
8	from humbledb.errors import NoConnection	4✔
9
10
11	class Page(Document):	4✔
12	"""Document class used by :class:`Array`."""
13
14	size = "s" # Number of entries in this page	4✔
15	""" Number of entries currently in this page. """	4✔
16	entries = "e" # Array of entries	4✔
17	""" Array of entries. """	4✔
18	_opts = {"safe": True} if _version._lt("3.0.0") else {}	4✔
19
20
21	class ArrayMeta(type):	4✔
22	"""
23	Metaclass for Arrays. This ensures that we have all the needed
24	configuration options, as well as creating the :class:`Page` subclass that
25	is specific to each Array subclass.
26
27	"""
28
29	def __new__(mcs, name, bases, cls_dict):	4✔
30	# Skip the Array base class
31	if (	4✔
32	name == "Array"
33	and not len(bases)
34	and mcs is ArrayMeta
35	and cls_dict["__qualname__"] == "Array"
36	):
37	return type.__new__(mcs, name, bases, cls_dict)	4✔
38	# The dictionary for subclassing the Page document
39	page_dict = {}	4✔
40	# Check for required class members
41	for member in "config_database", "config_collection":	4✔
42	if member not in cls_dict:	4✔
43	raise TypeError("{!r} missing required {!r}".format(name, member))	4✔
44	# Move the config to the page
45	page_dict[member] = cls_dict.pop(member)	4✔
46	# Create our page subclass and assign to cls._page
47	cls_dict["_page"] = type(name + "Page", (Page,), page_dict)	4✔
48	# Return our new Array
49	return type.__new__(mcs, name, bases, cls_dict)	4✔
50
51	# Shortcut methods
52	@property	4✔
53	def size(cls):	4✔
54	return cls._page.size	4✔
55
56	@property	4✔
57	def entries(cls):	4✔
58	return cls._page.entries	4✔
59
60	@property	4✔
61	def find(cls):	4✔
62	return cls._page.find	4✔
63
64	@property	4✔
65	def update(cls):	4✔
NEW 66	return cls._page.update	×
67
68	@property	4✔
69	def remove(cls): # This needs a try/except for tests	4✔
NEW 70	try:	×
NEW 71	return cls._page.remove	×
NEW 72	except NoConnection:	×
NEW 73	pass # Collection not available yet	×
74
75
76	class Array(metaclass=ArrayMeta):	4✔
77	"""
78	HumbleDB Array object. This helps manage paginated array documents in
79	MongoDB. This class is designed to be inherited from, and not instantiated
80	directly.
81
82	If you know the `page_count` for this array ahead of time, passing it in
83	to the constructor will save an extra query on the first append for a given
84	instance.
85
86	:param str _id: Sets the array's shared id
87	:param int page_count: Total number of pages that already exist (optional)
88
89	"""
90
91	config_max_size = 100	4✔
92	""" Soft limit on the maximum number of entries per page. """	4✔
93
94	config_page_marker = "#"	4✔
95	""" Combined with the array_id and page number to create the page _id. """	4✔
96
97	config_padding = 0	4✔
98	""" Number of bytes to pad new page creation with. """	4✔
99
100	def __init__(self, _id, page_count=UNSET):	4✔
101	self._array_id = _id	4✔
102	self.page_count = page_count	4✔
103
104	def page_id(self, page_number=None):	4✔
105	"""
106	Return the document ID for `page_number`. If page number is not
107	specified the :attr:`Array.page_count` is used.
108
109	:param int page_number: A page number (optional)
110
111	"""
112	page_number = page_number or self.page_count or 0	4✔
113	return "{}{:05d}".format(self._id, page_number)	4✔
114
115	@property	4✔
116	def _id(self):	4✔
117	return "{}{}".format(self._array_id, self.config_page_marker)	4✔
118
119	@property	4✔
120	def _id_regex(self):	4✔
121	_id = self._id.replace(".", "\.")	4✔
122	return {"$regex": "^" + _id}	4✔
123
124	def new_page(self, page_number):	4✔
125	"""
126	Creates a new page document.
127
128	:param int page_number: The page number to create
129
130	"""
131	# Shortcut the page class
132	Page = self._page	4✔
133	# Create a new page instance
134	page = Page()	4✔
135	page._id = self.page_id(page_number)	4✔
136	page.size = 0	4✔
137	page.entries = []	4✔
138	page["padding"] = "0" * self.config_padding	4✔
139	# Insert the new page
140	try:	4✔
141	# We need to do this as safe, because otherwise it may not be
142	# available to a subsequent call to append
143	Page.insert(page, **Page._opts)	4✔
144	except humbledb.errors.DuplicateKeyError:	4✔
145	# A race condition already created this page, so we are done
146	return	4✔
147	# Remove the padding
148	Page.update({"_id": page._id}, {"$unset": {"padding": 1}}, **Page._opts)	4✔
149
150	def append(self, entry):	4✔
151	"""
152	Append an entry to this array and return the page count.
153
154	:param dict entry: New entry
155	:returns: Total number of pages
156
157	"""
158	# If we haven't set a page count, we query for it. This is generally a
159	# very fast query.
160	if self.page_count is UNSET:	4✔
161	self.page_count = self.pages()	4✔
162	# See if we have to create our initial page
163	if self.page_count < 1:	4✔
164	self.page_count = 1	4✔
165	self.new_page(self.page_count)	4✔
166	# Shortcut page class
167	Page = self._page	4✔
168	query = {"_id": self.page_id()}	4✔
169	modify = {"$inc": {Page.size: 1}, "$push": {Page.entries: entry}}	4✔
170	fields = {Page.size: 1}	4✔
171	# Append our entry to our page and get the page's size
172	page = Page.find_and_modify(query, modify, new=True, fields=fields)	4✔
173	if not page:	4✔
174	raise RuntimeError("Append failed: page does not exist.")	4✔
175	# If we need to, we create the next page
176	if page.size >= self.config_max_size:	4✔
177	self.page_count += 1	4✔
178	self.new_page(self.page_count)	4✔
179	# Return the page count
180	return self.page_count	4✔
181
182	def remove(self, spec):	4✔
183	"""
184	Remove first element matching `spec` from each page in this array.
185
186	Due to how this is handled, all ``null`` values will be removed from
187	the array.
188
189	:param dict spec: Dictionary matching items to be removed
190	:returns: ``True`` if an element was removed
191
192	"""
193	Page = self._page	4✔
194	# Since we can't reliably use dot-notation when the query is against an
195	# embedded document, we need to use the $elemMatch operator instead
196	if isinstance(spec, dict):	4✔
197	query_spec = {"$elemMatch": spec}	4✔
198	else:
199	query_spec = spec	4✔
200	# Update to set first instance matching ``spec`` on each page to
201	# ``null`` (via $unset)
202	query = {"_id": self._id_regex, Page.entries: query_spec}	4✔
203	modify = {"$unset": {Page.entries + ".$": spec}, "$inc": {Page.size: -1}}	4✔
204	result = Page.update(query, modify, multi=True)	4✔
205	if not result or not result.get("updatedExisting", None):	4✔
206	return	4✔
207	# Update to remove all ``null`` entries from this array
208	query = {"_id": self._id_regex, Page.entries: None}	4✔
209	result = Page.update(query, {"$pull": {Page.entries: None}}, multi=True)	4✔
210	# Check the result and return True if anything was modified
211	if result and result.get("updatedExisting", None):	4✔
212	return True	4✔
213
214	def _all(self):	4✔
215	"""Return a cursor for iterating over all the pages."""
216	Page = self._page	4✔
217	return Page.find({"_id": self._id_regex}).sort("_id")	4✔
218
219	def all(self):	4✔
220	"""Return all entries in this array."""
221	cursor = self._all()	4✔
222	return list(itertools.chain.from_iterable(p.entries for p in cursor))	4✔
223
224	def clear(self):	4✔
225	"""Remove all documents in this array."""
226	self._page.remove({self._page._id: self._id_regex})	4✔
227	self.page_count = 0	4✔
228
229	def length(self):	4✔
230	"""Return the total number of items in this array."""
231	# This is implemented rather than __len__ because it incurs a query,
232	# and we don't want to query transparently
233	Page = self._page	4✔
234	if _version._lt("3.0.0"):	4✔
NEW 235	cursor = Page.find({"_id": self._id_regex}, fields={Page.size: 1, "_id": 0})	×
236	else:
237	cursor = Page.find({"_id": self._id_regex}, {Page.size: 1, "_id": 0})	4✔
238	return sum(p.size for p in cursor)	4✔
239
240	def pages(self):	4✔
241	"""Return the total number of pages in this array."""
242	Page = self._page	4✔
243	return Page.find({"_id": self._id_regex}).count()	4✔
244
245	def __getitem__(self, index):	4✔
246	"""
247	Return a page or pages for the given index or slice respectively.
248
249	:param index: Integer index or ``slice()`` object
250
251	"""
252	if not isinstance(index, (int, slice)):	4✔
253	raise TypeError("Array indices must be integers, not %s" % type(index))	4✔
254	Page = self._page # Shorthand the Page class	4✔
255	# If we have an integer index, it's a simple query for the page number
256	if isinstance(index, int):	4✔
257	if index < 0:	4✔
258	raise IndexError("Array indices must be positive")	×
259	# Page numbers are not zero indexed
260	index += 1	4✔
261	page = Page.find_one({"_id": self.page_id(index)})	4✔
262	if not page:	4✔
263	raise IndexError("Array index out of range")	4✔
264	return page.entries	4✔
265	# If we have a slice, we attempt to get the pages for [start, stop)
266	if isinstance(index, slice):	4✔
267	if index.step:	4✔
268	raise TypeError("Arrays do not allow extended slices")	4✔
269	if index.start and index.start < 0:	4✔
270	raise IndexError("Array indices must be positive")	×
271	if index.stop and index.stop < 0:	4✔
272	raise IndexError("Array indices must be positive")	×
273	# Page numbers are not zero indexed
274	start = (index.start or 0) + 1	4✔
275	stop = (index.stop or 2**32) + 1	4✔
276	start = "{}{:05d}".format(self._id, start)	4✔
277	stop = "{}{:05d}".format(self._id, stop)	4✔
278	cursor = Page.find({"_id": {"$gte": start, "$lt": stop}})	4✔
279	return list(itertools.chain.from_iterable(p.entries for p in cursor))	4✔
280	# This comment will never be reached

shakefu / humbledb / 15811823151

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous