• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zarr-developers / zarr-python / 2000

pending completion
2000

Pull #621

travis-pro

web-flow
Merge c184da121 into 610db340b
Pull Request #621: Start stop for iterator

37 of 37 new or added lines in 2 files covered. (100.0%)

9762 of 9992 relevant lines covered (97.7%)

2.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.79
/zarr/hierarchy.py
1
from collections.abc import MutableMapping
3✔
2
from itertools import islice
3✔
3

4
import numpy as np
3✔
5

6
from zarr.attrs import Attributes
3✔
7
from zarr.core import Array
3✔
8
from zarr.creation import (array, create, empty, empty_like, full, full_like,
3✔
9
                           normalize_store_arg, ones, ones_like, zeros,
10
                           zeros_like)
11
from zarr.errors import (
3✔
12
    ContainsArrayError,
13
    ContainsGroupError,
14
    GroupNotFoundError,
15
    ReadOnlyError,
16
)
17
from zarr.meta import decode_group_metadata
3✔
18
from zarr.storage import (MemoryStore, attrs_key, contains_array,
3✔
19
                          contains_group, group_meta_key, init_group, listdir,
20
                          rename, rmdir)
21
from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock,
3✔
22
                       normalize_shape, normalize_storage_path)
23

24

25
class Group(MutableMapping):
3✔
26
    """Instantiate a group from an initialized store.
27

28
    Parameters
29
    ----------
30
    store : MutableMapping
31
        Group store, already initialized.
32
        If the Group is used in a context manager, and the store has a ``close`` method,
33
        it will be called on exit.
34
    path : string, optional
35
        Group path.
36
    read_only : bool, optional
37
        True if group should be protected against modification.
38
    chunk_store : MutableMapping, optional
39
        Separate storage for chunks. If not provided, `store` will be used
40
        for storage of both chunks and metadata.
41
    cache_attrs : bool, optional
42
        If True (default), user attributes will be cached for attribute read
43
        operations. If False, user attributes are reloaded from the store prior
44
        to all attribute read operations.
45
    synchronizer : object, optional
46
        Array synchronizer.
47

48
    Attributes
49
    ----------
50
    store
51
    path
52
    name
53
    read_only
54
    chunk_store
55
    synchronizer
56
    attrs
57
    info
58

59
    Methods
60
    -------
61
    __len__
62
    __iter__
63
    __contains__
64
    __getitem__
65
    __enter__
66
    __exit__
67
    group_keys
68
    groups
69
    array_keys
70
    arrays
71
    visit
72
    visitkeys
73
    visitvalues
74
    visititems
75
    tree
76
    create_group
77
    require_group
78
    create_groups
79
    require_groups
80
    create_dataset
81
    require_dataset
82
    create
83
    empty
84
    zeros
85
    ones
86
    full
87
    array
88
    empty_like
89
    zeros_like
90
    ones_like
91
    full_like
92
    info
93
    move
94

95
    """
96

97
    def __init__(self, store, path=None, read_only=False, chunk_store=None,
3✔
98
                 cache_attrs=True, synchronizer=None):
99
        self._store = store
3✔
100
        self._chunk_store = chunk_store
3✔
101
        self._path = normalize_storage_path(path)
3✔
102
        if self._path:
3✔
103
            self._key_prefix = self._path + '/'
3✔
104
        else:
105
            self._key_prefix = ''
3✔
106
        self._read_only = read_only
3✔
107
        self._synchronizer = synchronizer
3✔
108

109
        # guard conditions
110
        if contains_array(store, path=self._path):
3✔
111
            raise ContainsArrayError(path)
3✔
112

113
        # initialize metadata
114
        try:
3✔
115
            mkey = self._key_prefix + group_meta_key
3✔
116
            meta_bytes = store[mkey]
3✔
117
        except KeyError:
3✔
118
            raise GroupNotFoundError(path)
3✔
119
        else:
120
            meta = decode_group_metadata(meta_bytes)
3✔
121
            self._meta = meta
3✔
122

123
        # setup attributes
124
        akey = self._key_prefix + attrs_key
3✔
125
        self._attrs = Attributes(store, key=akey, read_only=read_only,
3✔
126
                                 cache=cache_attrs, synchronizer=synchronizer)
127

128
        # setup info
129
        self._info = InfoReporter(self)
3✔
130

131
    @property
3✔
132
    def store(self):
1✔
133
        """A MutableMapping providing the underlying storage for the group."""
134
        return self._store
3✔
135

136
    @property
3✔
137
    def path(self):
1✔
138
        """Storage path."""
139
        return self._path
3✔
140

141
    @property
3✔
142
    def name(self):
1✔
143
        """Group name following h5py convention."""
144
        if self._path:
3✔
145
            # follow h5py convention: add leading slash
146
            name = self._path
3✔
147
            if name[0] != '/':
3✔
148
                name = '/' + name
3✔
149
            return name
3✔
150
        return '/'
3✔
151

152
    @property
3✔
153
    def basename(self):
1✔
154
        """Final component of name."""
155
        return self.name.split('/')[-1]
3✔
156

157
    @property
3✔
158
    def read_only(self):
1✔
159
        """A boolean, True if modification operations are not permitted."""
160
        return self._read_only
3✔
161

162
    @property
3✔
163
    def chunk_store(self):
1✔
164
        """A MutableMapping providing the underlying storage for array chunks."""
165
        if self._chunk_store is None:
3✔
166
            return self._store
3✔
167
        else:
168
            return self._chunk_store
3✔
169

170
    @property
3✔
171
    def synchronizer(self):
1✔
172
        """Object used to synchronize write access to groups and arrays."""
173
        return self._synchronizer
3✔
174

175
    @property
3✔
176
    def attrs(self):
1✔
177
        """A MutableMapping containing user-defined attributes. Note that
178
        attribute values must be JSON serializable."""
179
        return self._attrs
3✔
180

181
    @property
3✔
182
    def info(self):
1✔
183
        """Return diagnostic information about the group."""
184
        return self._info
3✔
185

186
    def __eq__(self, other):
3✔
187
        return (
3✔
188
            isinstance(other, Group) and
189
            self._store == other.store and
190
            self._read_only == other.read_only and
191
            self._path == other.path
192
            # N.B., no need to compare attributes, should be covered by
193
            # store comparison
194
        )
195

196
    def __iter__(self):
3✔
197
        """Return an iterator over group member names.
198

199
        Examples
200
        --------
201
        >>> import zarr
202
        >>> g1 = zarr.group()
203
        >>> g2 = g1.create_group('foo')
204
        >>> g3 = g1.create_group('bar')
205
        >>> d1 = g1.create_dataset('baz', shape=100, chunks=10)
206
        >>> d2 = g1.create_dataset('quux', shape=200, chunks=20)
207
        >>> for name in g1:
208
        ...     print(name)
209
        bar
210
        baz
211
        foo
212
        quux
213

214
        """
215
        for key in sorted(listdir(self._store, self._path)):
3✔
216
            path = self._key_prefix + key
3✔
217
            if (contains_array(self._store, path) or
3✔
218
                    contains_group(self._store, path)):
219
                yield key
3✔
220

221
    def __len__(self):
3✔
222
        """Number of members."""
223
        return sum(1 for _ in self)
3✔
224

225
    def __repr__(self):
3✔
226
        t = type(self)
×
227
        r = '<{}.{}'.format(t.__module__, t.__name__)
×
228
        if self.name:
×
229
            r += ' %r' % self.name
×
230
        if self._read_only:
×
231
            r += ' read-only'
×
232
        r += '>'
×
233
        return r
×
234

235
    def __enter__(self):
3✔
236
        """Return the Group for use as a context manager."""
237
        return self
3✔
238

239
    def __exit__(self, exc_type, exc_val, exc_tb):
3✔
240
        """If the underlying Store has a ``close`` method, call it."""
241
        try:
3✔
242
            self.store.close()
3✔
243
        except AttributeError:
3✔
244
            pass
3✔
245

246
    def info_items(self):
3✔
247

248
        def typestr(o):
3✔
249
            return '{}.{}'.format(type(o).__module__, type(o).__name__)
3✔
250

251
        items = []
3✔
252

253
        # basic info
254
        if self.name is not None:
3✔
255
            items += [('Name', self.name)]
3✔
256
        items += [
3✔
257
            ('Type', typestr(self)),
258
            ('Read-only', str(self.read_only)),
259
        ]
260

261
        # synchronizer
262
        if self._synchronizer is not None:
3✔
263
            items += [('Synchronizer type', typestr(self._synchronizer))]
3✔
264

265
        # storage info
266
        items += [('Store type', typestr(self._store))]
3✔
267
        if self._chunk_store is not None:
3✔
268
            items += [('Chunk store type', typestr(self._chunk_store))]
3✔
269

270
        # members
271
        items += [('No. members', len(self))]
3✔
272
        array_keys = sorted(self.array_keys())
3✔
273
        group_keys = sorted(self.group_keys())
3✔
274
        items += [('No. arrays', len(array_keys))]
3✔
275
        items += [('No. groups', len(group_keys))]
3✔
276
        if array_keys:
3✔
277
            items += [('Arrays', ', '.join(array_keys))]
3✔
278
        if group_keys:
3✔
279
            items += [('Groups', ', '.join(group_keys))]
3✔
280

281
        return items
3✔
282

283
    def __getstate__(self):
3✔
284
        return (self._store, self._path, self._read_only, self._chunk_store,
3✔
285
                self.attrs.cache, self._synchronizer)
286

287
    def __setstate__(self, state):
3✔
288
        self.__init__(*state)
3✔
289

290
    def _item_path(self, item):
3✔
291
        absolute = isinstance(item, str) and item and item[0] == '/'
3✔
292
        path = normalize_storage_path(item)
3✔
293
        if not absolute and self._path:
3✔
294
            path = self._key_prefix + path
3✔
295
        return path
3✔
296

297
    def __contains__(self, item):
3✔
298
        """Test for group membership.
299

300
        Examples
301
        --------
302
        >>> import zarr
303
        >>> g1 = zarr.group()
304
        >>> g2 = g1.create_group('foo')
305
        >>> d1 = g1.create_dataset('bar', shape=100, chunks=10)
306
        >>> 'foo' in g1
307
        True
308
        >>> 'bar' in g1
309
        True
310
        >>> 'baz' in g1
311
        False
312

313
        """
314
        path = self._item_path(item)
3✔
315
        return contains_array(self._store, path) or \
3✔
316
            contains_group(self._store, path)
317

318
    def __getitem__(self, item):
3✔
319
        """Obtain a group member.
320

321
        Parameters
322
        ----------
323
        item : string
324
            Member name or path.
325

326
        Examples
327
        --------
328
        >>> import zarr
329
        >>> g1 = zarr.group()
330
        >>> d1 = g1.create_dataset('foo/bar/baz', shape=100, chunks=10)
331
        >>> g1['foo']
332
        <zarr.hierarchy.Group '/foo'>
333
        >>> g1['foo/bar']
334
        <zarr.hierarchy.Group '/foo/bar'>
335
        >>> g1['foo/bar/baz']
336
        <zarr.core.Array '/foo/bar/baz' (100,) float64>
337

338
        """
339
        path = self._item_path(item)
3✔
340
        if contains_array(self._store, path):
3✔
341
            return Array(self._store, read_only=self._read_only, path=path,
3✔
342
                         chunk_store=self._chunk_store,
343
                         synchronizer=self._synchronizer, cache_attrs=self.attrs.cache)
344
        elif contains_group(self._store, path):
3✔
345
            return Group(self._store, read_only=self._read_only, path=path,
3✔
346
                         chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
347
                         synchronizer=self._synchronizer)
348
        else:
349
            raise KeyError(item)
3✔
350

351
    def __setitem__(self, item, value):
3✔
352
        self.array(item, value, overwrite=True)
3✔
353

354
    def __delitem__(self, item):
3✔
355
        return self._write_op(self._delitem_nosync, item)
3✔
356

357
    def _delitem_nosync(self, item):
3✔
358
        path = self._item_path(item)
3✔
359
        if contains_array(self._store, path) or \
3✔
360
                contains_group(self._store, path):
361
            rmdir(self._store, path)
3✔
362
        else:
363
            raise KeyError(item)
3✔
364

365
    def __getattr__(self, item):
3✔
366
        # allow access to group members via dot notation
367
        try:
3✔
368
            return self.__getitem__(item)
3✔
369
        except KeyError:
3✔
370
            raise AttributeError
3✔
371

372
    def __dir__(self):
3✔
373
        # noinspection PyUnresolvedReferences
374
        base = super().__dir__()
3✔
375
        keys = sorted(set(base + list(self)))
3✔
376
        keys = [k for k in keys if is_valid_python_name(k)]
3✔
377
        return keys
3✔
378

379
    def _ipython_key_completions_(self):
3✔
380
        return sorted(self)
3✔
381

382
    def group_keys(self):
3✔
383
        """Return an iterator over member names for groups only.
384

385
        Examples
386
        --------
387
        >>> import zarr
388
        >>> g1 = zarr.group()
389
        >>> g2 = g1.create_group('foo')
390
        >>> g3 = g1.create_group('bar')
391
        >>> d1 = g1.create_dataset('baz', shape=100, chunks=10)
392
        >>> d2 = g1.create_dataset('quux', shape=200, chunks=20)
393
        >>> sorted(g1.group_keys())
394
        ['bar', 'foo']
395

396
        """
397
        for key in sorted(listdir(self._store, self._path)):
3✔
398
            path = self._key_prefix + key
3✔
399
            if contains_group(self._store, path):
3✔
400
                yield key
3✔
401

402
    def groups(self):
3✔
403
        """Return an iterator over (name, value) pairs for groups only.
404

405
        Examples
406
        --------
407
        >>> import zarr
408
        >>> g1 = zarr.group()
409
        >>> g2 = g1.create_group('foo')
410
        >>> g3 = g1.create_group('bar')
411
        >>> d1 = g1.create_dataset('baz', shape=100, chunks=10)
412
        >>> d2 = g1.create_dataset('quux', shape=200, chunks=20)
413
        >>> for n, v in g1.groups():
414
        ...     print(n, type(v))
415
        bar <class 'zarr.hierarchy.Group'>
416
        foo <class 'zarr.hierarchy.Group'>
417

418
        """
419
        for key in sorted(listdir(self._store, self._path)):
3✔
420
            path = self._key_prefix + key
3✔
421
            if contains_group(self._store, path):
3✔
422
                yield key, Group(self._store, path=path, read_only=self._read_only,
3✔
423
                                 chunk_store=self._chunk_store,
424
                                 cache_attrs=self.attrs.cache,
425
                                 synchronizer=self._synchronizer)
426

427
    def array_keys(self, recurse=False):
3✔
428
        """Return an iterator over member names for arrays only.
429

430
        Parameters
431
        ----------
432
        recurse : recurse, optional
433
            Option to return member names for all arrays, even from groups
434
            below the current one. If False, only member names for arrays in
435
            the current group will be returned. Default value is False.
436

437
        Examples
438
        --------
439
        >>> import zarr
440
        >>> g1 = zarr.group()
441
        >>> g2 = g1.create_group('foo')
442
        >>> g3 = g1.create_group('bar')
443
        >>> d1 = g1.create_dataset('baz', shape=100, chunks=10)
444
        >>> d2 = g1.create_dataset('quux', shape=200, chunks=20)
445
        >>> sorted(g1.array_keys())
446
        ['baz', 'quux']
447

448
        """
449
        return self._array_iter(keys_only=True,
3✔
450
                                method='array_keys',
451
                                recurse=recurse)
452

453
    def arrays(self, recurse=False):
3✔
454
        """Return an iterator over (name, value) pairs for arrays only.
455

456
        Parameters
457
        ----------
458
        recurse : recurse, optional
459
            Option to return (name, value) pairs for all arrays, even from groups
460
            below the current one. If False, only (name, value) pairs for arrays in
461
            the current group will be returned. Default value is False.
462

463
        Examples
464
        --------
465
        >>> import zarr
466
        >>> g1 = zarr.group()
467
        >>> g2 = g1.create_group('foo')
468
        >>> g3 = g1.create_group('bar')
469
        >>> d1 = g1.create_dataset('baz', shape=100, chunks=10)
470
        >>> d2 = g1.create_dataset('quux', shape=200, chunks=20)
471
        >>> for n, v in g1.arrays():
472
        ...     print(n, type(v))
473
        baz <class 'zarr.core.Array'>
474
        quux <class 'zarr.core.Array'>
475

476
        """
477
        return self._array_iter(keys_only=False,
3✔
478
                                method='arrays',
479
                                recurse=recurse)
480

481
    def _array_iter(self, keys_only, method, recurse):
3✔
482
        for key in sorted(listdir(self._store, self._path)):
3✔
483
            path = self._key_prefix + key
3✔
484
            if contains_array(self._store, path):
3✔
485
                yield key if keys_only else (key, self[key])
3✔
486
            elif recurse and contains_group(self._store, path):
3✔
487
                group = self[key]
3✔
488
                for i in getattr(group, method)(recurse=recurse):
3✔
489
                    yield i
3✔
490

491
    def visitvalues(self, func):
3✔
492
        """Run ``func`` on each object.
493

494
        Note: If ``func`` returns ``None`` (or doesn't return),
495
              iteration continues. However, if ``func`` returns
496
              anything else, it ceases and returns that value.
497

498
        Examples
499
        --------
500
        >>> import zarr
501
        >>> g1 = zarr.group()
502
        >>> g2 = g1.create_group('foo')
503
        >>> g3 = g1.create_group('bar')
504
        >>> g4 = g3.create_group('baz')
505
        >>> g5 = g3.create_group('quux')
506
        >>> def print_visitor(obj):
507
        ...     print(obj)
508
        >>> g1.visitvalues(print_visitor)
509
        <zarr.hierarchy.Group '/bar'>
510
        <zarr.hierarchy.Group '/bar/baz'>
511
        <zarr.hierarchy.Group '/bar/quux'>
512
        <zarr.hierarchy.Group '/foo'>
513
        >>> g3.visitvalues(print_visitor)
514
        <zarr.hierarchy.Group '/bar/baz'>
515
        <zarr.hierarchy.Group '/bar/quux'>
516

517
        """
518

519
        def _visit(obj):
3✔
520
            yield obj
3✔
521
            keys = sorted(getattr(obj, "keys", lambda: [])())
3✔
522
            for k in keys:
3✔
523
                for v in _visit(obj[k]):
3✔
524
                    yield v
3✔
525

526
        for each_obj in islice(_visit(self), 1, None):
3✔
527
            value = func(each_obj)
3✔
528
            if value is not None:
3✔
529
                return value
3✔
530

531
    def visit(self, func):
3✔
532
        """Run ``func`` on each object's path.
533

534
        Note: If ``func`` returns ``None`` (or doesn't return),
535
              iteration continues. However, if ``func`` returns
536
              anything else, it ceases and returns that value.
537

538
        Examples
539
        --------
540
        >>> import zarr
541
        >>> g1 = zarr.group()
542
        >>> g2 = g1.create_group('foo')
543
        >>> g3 = g1.create_group('bar')
544
        >>> g4 = g3.create_group('baz')
545
        >>> g5 = g3.create_group('quux')
546
        >>> def print_visitor(name):
547
        ...     print(name)
548
        >>> g1.visit(print_visitor)
549
        bar
550
        bar/baz
551
        bar/quux
552
        foo
553
        >>> g3.visit(print_visitor)
554
        baz
555
        quux
556

557
        """
558

559
        base_len = len(self.name)
3✔
560
        return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/")))
3✔
561

562
    def visitkeys(self, func):
3✔
563
        """An alias for :py:meth:`~Group.visit`.
564
        """
565

566
        return self.visit(func)
3✔
567

568
    def visititems(self, func):
3✔
569
        """Run ``func`` on each object's path and the object itself.
570

571
        Note: If ``func`` returns ``None`` (or doesn't return),
572
              iteration continues. However, if ``func`` returns
573
              anything else, it ceases and returns that value.
574

575
        Examples
576
        --------
577
        >>> import zarr
578
        >>> g1 = zarr.group()
579
        >>> g2 = g1.create_group('foo')
580
        >>> g3 = g1.create_group('bar')
581
        >>> g4 = g3.create_group('baz')
582
        >>> g5 = g3.create_group('quux')
583
        >>> def print_visitor(name, obj):
584
        ...     print((name, obj))
585
        >>> g1.visititems(print_visitor)
586
        ('bar', <zarr.hierarchy.Group '/bar'>)
587
        ('bar/baz', <zarr.hierarchy.Group '/bar/baz'>)
588
        ('bar/quux', <zarr.hierarchy.Group '/bar/quux'>)
589
        ('foo', <zarr.hierarchy.Group '/foo'>)
590
        >>> g3.visititems(print_visitor)
591
        ('baz', <zarr.hierarchy.Group '/bar/baz'>)
592
        ('quux', <zarr.hierarchy.Group '/bar/quux'>)
593

594
        """
595

596
        base_len = len(self.name)
3✔
597
        return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"), o))
3✔
598

599
    def tree(self, expand=False, level=None):
3✔
600
        """Provide a ``print``-able display of the hierarchy.
601

602
        Parameters
603
        ----------
604
        expand : bool, optional
605
            Only relevant for HTML representation. If True, tree will be fully expanded.
606
        level : int, optional
607
            Maximum depth to descend into hierarchy.
608

609
        Examples
610
        --------
611
        >>> import zarr
612
        >>> g1 = zarr.group()
613
        >>> g2 = g1.create_group('foo')
614
        >>> g3 = g1.create_group('bar')
615
        >>> g4 = g3.create_group('baz')
616
        >>> g5 = g3.create_group('quux')
617
        >>> d1 = g5.create_dataset('baz', shape=100, chunks=10)
618
        >>> g1.tree()
619
        /
620
         ├── bar
621
         │   ├── baz
622
         │   └── quux
623
         │       └── baz (100,) float64
624
         └── foo
625
        >>> g1.tree(level=2)
626
        /
627
         ├── bar
628
         │   ├── baz
629
         │   └── quux
630
         └── foo
631
        >>> g3.tree()
632
        bar
633
         ├── baz
634
         └── quux
635
             └── baz (100,) float64
636

637
        Notes
638
        -----
639
        Please note that this is an experimental feature. The behaviour of this
640
        function is still evolving and the default output and/or parameters may change
641
        in future versions.
642

643
        """
644

645
        return TreeViewer(self, expand=expand, level=level)
3✔
646

647
    def _write_op(self, f, *args, **kwargs):
3✔
648

649
        # guard condition
650
        if self._read_only:
3✔
651
            raise ReadOnlyError()
3✔
652

653
        if self._synchronizer is None:
3✔
654
            # no synchronization
655
            lock = nolock
3✔
656
        else:
657
            # synchronize on the root group
658
            lock = self._synchronizer[group_meta_key]
3✔
659

660
        with lock:
3✔
661
            return f(*args, **kwargs)
3✔
662

663
    def create_group(self, name, overwrite=False):
3✔
664
        """Create a sub-group.
665

666
        Parameters
667
        ----------
668
        name : string
669
            Group name.
670
        overwrite : bool, optional
671
            If True, overwrite any existing array with the given name.
672

673
        Returns
674
        -------
675
        g : zarr.hierarchy.Group
676

677
        Examples
678
        --------
679
        >>> import zarr
680
        >>> g1 = zarr.group()
681
        >>> g2 = g1.create_group('foo')
682
        >>> g3 = g1.create_group('bar')
683
        >>> g4 = g1.create_group('baz/quux')
684

685
        """
686

687
        return self._write_op(self._create_group_nosync, name, overwrite=overwrite)
3✔
688

689
    def _create_group_nosync(self, name, overwrite=False):
3✔
690
        path = self._item_path(name)
3✔
691

692
        # create terminal group
693
        init_group(self._store, path=path, chunk_store=self._chunk_store,
3✔
694
                   overwrite=overwrite)
695

696
        return Group(self._store, path=path, read_only=self._read_only,
3✔
697
                     chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
698
                     synchronizer=self._synchronizer)
699

700
    def create_groups(self, *names, **kwargs):
3✔
701
        """Convenience method to create multiple groups in a single call."""
702
        return tuple(self.create_group(name, **kwargs) for name in names)
3✔
703

704
    def require_group(self, name, overwrite=False):
3✔
705
        """Obtain a sub-group, creating one if it doesn't exist.
706

707
        Parameters
708
        ----------
709
        name : string
710
            Group name.
711
        overwrite : bool, optional
712
            Overwrite any existing array with given `name` if present.
713

714
        Returns
715
        -------
716
        g : zarr.hierarchy.Group
717

718
        Examples
719
        --------
720
        >>> import zarr
721
        >>> g1 = zarr.group()
722
        >>> g2 = g1.require_group('foo')
723
        >>> g3 = g1.require_group('foo')
724
        >>> g2 == g3
725
        True
726

727
        """
728

729
        return self._write_op(self._require_group_nosync, name,
3✔
730
                              overwrite=overwrite)
731

732
    def _require_group_nosync(self, name, overwrite=False):
3✔
733
        path = self._item_path(name)
3✔
734

735
        # create terminal group if necessary
736
        if not contains_group(self._store, path):
3✔
737
            init_group(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
738
                       overwrite=overwrite)
739

740
        return Group(self._store, path=path, read_only=self._read_only,
3✔
741
                     chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
742
                     synchronizer=self._synchronizer)
743

744
    def require_groups(self, *names):
3✔
745
        """Convenience method to require multiple groups in a single call."""
746
        return tuple(self.require_group(name) for name in names)
3✔
747

748
    # noinspection PyIncorrectDocstring
749
    def create_dataset(self, name, **kwargs):
3✔
750
        """Create an array.
751

752
        Arrays are known as "datasets" in HDF5 terminology. For compatibility
753
        with h5py, Zarr groups also implement the require_dataset() method.
754

755
        Parameters
756
        ----------
757
        name : string
758
            Array name.
759
        data : array_like, optional
760
            Initial data.
761
        shape : int or tuple of ints
762
            Array shape.
763
        chunks : int or tuple of ints, optional
764
            Chunk shape. If not provided, will be guessed from `shape` and
765
            `dtype`.
766
        dtype : string or dtype, optional
767
            NumPy dtype.
768
        compressor : Codec, optional
769
            Primary compressor.
770
        fill_value : object
771
            Default value to use for uninitialized portions of the array.
772
        order : {'C', 'F'}, optional
773
            Memory layout to be used within each chunk.
774
        synchronizer : zarr.sync.ArraySynchronizer, optional
775
            Array synchronizer.
776
        filters : sequence of Codecs, optional
777
            Sequence of filters to use to encode chunk data prior to
778
            compression.
779
        overwrite : bool, optional
780
            If True, replace any existing array or group with the given name.
781
        cache_metadata : bool, optional
782
            If True, array configuration metadata will be cached for the
783
            lifetime of the object. If False, array metadata will be reloaded
784
            prior to all data access and modification operations (may incur
785
            overhead depending on storage and data access pattern).
786

787
        Returns
788
        -------
789
        a : zarr.core.Array
790

791
        Examples
792
        --------
793
        >>> import zarr
794
        >>> g1 = zarr.group()
795
        >>> d1 = g1.create_dataset('foo', shape=(10000, 10000),
796
        ...                        chunks=(1000, 1000))
797
        >>> d1
798
        <zarr.core.Array '/foo' (10000, 10000) float64>
799
        >>> d2 = g1.create_dataset('bar/baz/qux', shape=(100, 100, 100),
800
        ...                        chunks=(100, 10, 10))
801
        >>> d2
802
        <zarr.core.Array '/bar/baz/qux' (100, 100, 100) float64>
803

804
        """
805

806
        return self._write_op(self._create_dataset_nosync, name, **kwargs)
3✔
807

808
    def _create_dataset_nosync(self, name, data=None, **kwargs):
3✔
809

810
        path = self._item_path(name)
3✔
811

812
        # determine synchronizer
813
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
814
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
815

816
        # create array
817
        if data is None:
3✔
818
            a = create(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
819
                       **kwargs)
820

821
        else:
822
            a = array(data, store=self._store, path=path, chunk_store=self._chunk_store,
3✔
823
                      **kwargs)
824

825
        return a
3✔
826

827
    def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs):
3✔
828
        """Obtain an array, creating if it doesn't exist.
829

830
        Arrays are known as "datasets" in HDF5 terminology. For compatibility
831
        with h5py, Zarr groups also implement the create_dataset() method.
832

833
        Other `kwargs` are as per :func:`zarr.hierarchy.Group.create_dataset`.
834

835
        Parameters
836
        ----------
837
        name : string
838
            Array name.
839
        shape : int or tuple of ints
840
            Array shape.
841
        dtype : string or dtype, optional
842
            NumPy dtype.
843
        exact : bool, optional
844
            If True, require `dtype` to match exactly. If false, require
845
            `dtype` can be cast from array dtype.
846

847
        """
848

849
        return self._write_op(self._require_dataset_nosync, name, shape=shape,
3✔
850
                              dtype=dtype, exact=exact, **kwargs)
851

852
    def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
3✔
853
                                **kwargs):
854

855
        path = self._item_path(name)
3✔
856

857
        if contains_array(self._store, path):
3✔
858

859
            # array already exists at path, validate that it is the right shape and type
860

861
            synchronizer = kwargs.get('synchronizer', self._synchronizer)
3✔
862
            cache_metadata = kwargs.get('cache_metadata', True)
3✔
863
            cache_attrs = kwargs.get('cache_attrs', self.attrs.cache)
3✔
864
            a = Array(self._store, path=path, read_only=self._read_only,
3✔
865
                      chunk_store=self._chunk_store, synchronizer=synchronizer,
866
                      cache_metadata=cache_metadata, cache_attrs=cache_attrs)
867
            shape = normalize_shape(shape)
3✔
868
            if shape != a.shape:
3✔
869
                raise TypeError('shape do not match existing array; expected {}, got {}'
3✔
870
                                .format(a.shape, shape))
871
            dtype = np.dtype(dtype)
3✔
872
            if exact:
3✔
873
                if dtype != a.dtype:
3✔
874
                    raise TypeError('dtypes do not match exactly; expected {}, got {}'
3✔
875
                                    .format(a.dtype, dtype))
876
            else:
877
                if not np.can_cast(dtype, a.dtype):
3✔
878
                    raise TypeError('dtypes ({}, {}) cannot be safely cast'
3✔
879
                                    .format(dtype, a.dtype))
880
            return a
3✔
881

882
        else:
883
            return self._create_dataset_nosync(name, shape=shape, dtype=dtype,
3✔
884
                                               **kwargs)
885

886
    def create(self, name, **kwargs):
3✔
887
        """Create an array. Keyword arguments as per
888
        :func:`zarr.creation.create`."""
889
        return self._write_op(self._create_nosync, name, **kwargs)
3✔
890

891
    def _create_nosync(self, name, **kwargs):
3✔
892
        path = self._item_path(name)
3✔
893
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
894
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
895
        return create(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
896
                      **kwargs)
897

898
    def empty(self, name, **kwargs):
3✔
899
        """Create an array. Keyword arguments as per
900
        :func:`zarr.creation.empty`."""
901
        return self._write_op(self._empty_nosync, name, **kwargs)
3✔
902

903
    def _empty_nosync(self, name, **kwargs):
3✔
904
        path = self._item_path(name)
3✔
905
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
906
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
907
        return empty(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
908
                     **kwargs)
909

910
    def zeros(self, name, **kwargs):
3✔
911
        """Create an array. Keyword arguments as per
912
        :func:`zarr.creation.zeros`."""
913
        return self._write_op(self._zeros_nosync, name, **kwargs)
3✔
914

915
    def _zeros_nosync(self, name, **kwargs):
3✔
916
        path = self._item_path(name)
3✔
917
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
918
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
919
        return zeros(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
920
                     **kwargs)
921

922
    def ones(self, name, **kwargs):
3✔
923
        """Create an array. Keyword arguments as per
924
        :func:`zarr.creation.ones`."""
925
        return self._write_op(self._ones_nosync, name, **kwargs)
3✔
926

927
    def _ones_nosync(self, name, **kwargs):
3✔
928
        path = self._item_path(name)
3✔
929
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
930
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
931
        return ones(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs)
3✔
932

933
    def full(self, name, fill_value, **kwargs):
3✔
934
        """Create an array. Keyword arguments as per
935
        :func:`zarr.creation.full`."""
936
        return self._write_op(self._full_nosync, name, fill_value, **kwargs)
3✔
937

938
    def _full_nosync(self, name, fill_value, **kwargs):
3✔
939
        path = self._item_path(name)
3✔
940
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
941
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
942
        return full(store=self._store, path=path, chunk_store=self._chunk_store,
3✔
943
                    fill_value=fill_value, **kwargs)
944

945
    def array(self, name, data, **kwargs):
3✔
946
        """Create an array. Keyword arguments as per
947
        :func:`zarr.creation.array`."""
948
        return self._write_op(self._array_nosync, name, data, **kwargs)
3✔
949

950
    def _array_nosync(self, name, data, **kwargs):
3✔
951
        path = self._item_path(name)
3✔
952
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
953
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
954
        return array(data, store=self._store, path=path, chunk_store=self._chunk_store,
3✔
955
                     **kwargs)
956

957
    def empty_like(self, name, data, **kwargs):
3✔
958
        """Create an array. Keyword arguments as per
959
        :func:`zarr.creation.empty_like`."""
960
        return self._write_op(self._empty_like_nosync, name, data, **kwargs)
3✔
961

962
    def _empty_like_nosync(self, name, data, **kwargs):
3✔
963
        path = self._item_path(name)
3✔
964
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
965
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
966
        return empty_like(data, store=self._store, path=path,
3✔
967
                          chunk_store=self._chunk_store, **kwargs)
968

969
    def zeros_like(self, name, data, **kwargs):
3✔
970
        """Create an array. Keyword arguments as per
971
        :func:`zarr.creation.zeros_like`."""
972
        return self._write_op(self._zeros_like_nosync, name, data, **kwargs)
3✔
973

974
    def _zeros_like_nosync(self, name, data, **kwargs):
3✔
975
        path = self._item_path(name)
3✔
976
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
977
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
978
        return zeros_like(data, store=self._store, path=path,
3✔
979
                          chunk_store=self._chunk_store, **kwargs)
980

981
    def ones_like(self, name, data, **kwargs):
3✔
982
        """Create an array. Keyword arguments as per
983
        :func:`zarr.creation.ones_like`."""
984
        return self._write_op(self._ones_like_nosync, name, data, **kwargs)
3✔
985

986
    def _ones_like_nosync(self, name, data, **kwargs):
3✔
987
        path = self._item_path(name)
3✔
988
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
989
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
990
        return ones_like(data, store=self._store, path=path,
3✔
991
                         chunk_store=self._chunk_store, **kwargs)
992

993
    def full_like(self, name, data, **kwargs):
3✔
994
        """Create an array. Keyword arguments as per
995
        :func:`zarr.creation.full_like`."""
996
        return self._write_op(self._full_like_nosync, name, data, **kwargs)
3✔
997

998
    def _full_like_nosync(self, name, data, **kwargs):
3✔
999
        path = self._item_path(name)
3✔
1000
        kwargs.setdefault('synchronizer', self._synchronizer)
3✔
1001
        kwargs.setdefault('cache_attrs', self.attrs.cache)
3✔
1002
        return full_like(data, store=self._store, path=path,
3✔
1003
                         chunk_store=self._chunk_store, **kwargs)
1004

1005
    def _move_nosync(self, path, new_path):
3✔
1006
        rename(self._store, path, new_path)
3✔
1007
        if self._chunk_store is not None:
3✔
1008
            rename(self._chunk_store, path, new_path)
3✔
1009

1010
    def move(self, source, dest):
3✔
1011
        """Move contents from one path to another relative to the Group.
1012

1013
        Parameters
1014
        ----------
1015
        source : string
1016
            Name or path to a Zarr object to move.
1017
        dest : string
1018
            New name or path of the Zarr object.
1019
        """
1020

1021
        source = self._item_path(source)
3✔
1022
        dest = self._item_path(dest)
3✔
1023

1024
        # Check that source exists.
1025
        if not (contains_array(self._store, source) or
3✔
1026
                contains_group(self._store, source)):
1027
            raise ValueError('The source, "%s", does not exist.' % source)
3✔
1028
        if contains_array(self._store, dest) or contains_group(self._store, dest):
3✔
1029
            raise ValueError('The dest, "%s", already exists.' % dest)
3✔
1030

1031
        # Ensure groups needed for `dest` exist.
1032
        if "/" in dest:
3✔
1033
            self.require_group("/" + dest.rsplit("/", 1)[0])
3✔
1034

1035
        self._write_op(self._move_nosync, source, dest)
3✔
1036

1037

1038
def _normalize_store_arg(store, clobber=False, storage_options=None):
3✔
1039
    return normalize_store_arg(store, clobber=clobber, default=MemoryStore,
3✔
1040
                               storage_options=storage_options)
1041

1042

1043
def group(store=None, overwrite=False, chunk_store=None,
3✔
1044
          cache_attrs=True, synchronizer=None, path=None):
1045
    """Create a group.
1046

1047
    Parameters
1048
    ----------
1049
    store : MutableMapping or string, optional
1050
        Store or path to directory in file system.
1051
    overwrite : bool, optional
1052
        If True, delete any pre-existing data in `store` at `path` before
1053
        creating the group.
1054
    chunk_store : MutableMapping, optional
1055
        Separate storage for chunks. If not provided, `store` will be used
1056
        for storage of both chunks and metadata.
1057
    cache_attrs : bool, optional
1058
        If True (default), user attributes will be cached for attribute read
1059
        operations. If False, user attributes are reloaded from the store prior
1060
        to all attribute read operations.
1061
    synchronizer : object, optional
1062
        Array synchronizer.
1063
    path : string, optional
1064
        Group path within store.
1065

1066
    Returns
1067
    -------
1068
    g : zarr.hierarchy.Group
1069

1070
    Examples
1071
    --------
1072
    Create a group in memory::
1073

1074
        >>> import zarr
1075
        >>> g = zarr.group()
1076
        >>> g
1077
        <zarr.hierarchy.Group '/'>
1078

1079
    Create a group with a different store::
1080

1081
        >>> store = zarr.DirectoryStore('data/example.zarr')
1082
        >>> g = zarr.group(store=store, overwrite=True)
1083
        >>> g
1084
        <zarr.hierarchy.Group '/'>
1085

1086
    """
1087

1088
    # handle polymorphic store arg
1089
    store = _normalize_store_arg(store)
3✔
1090
    path = normalize_storage_path(path)
3✔
1091

1092
    # require group
1093
    if overwrite or not contains_group(store):
3✔
1094
        init_group(store, overwrite=overwrite, chunk_store=chunk_store,
3✔
1095
                   path=path)
1096

1097
    return Group(store, read_only=False, chunk_store=chunk_store,
3✔
1098
                 cache_attrs=cache_attrs, synchronizer=synchronizer, path=path)
1099

1100

1101
def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None,
3✔
1102
               chunk_store=None, storage_options=None):
1103
    """Open a group using file-mode-like semantics.
1104

1105
    Parameters
1106
    ----------
1107
    store : MutableMapping or string, optional
1108
        Store or path to directory in file system or name of zip file.
1109
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
1110
        Persistence mode: 'r' means read only (must exist); 'r+' means
1111
        read/write (must exist); 'a' means read/write (create if doesn't
1112
        exist); 'w' means create (overwrite if exists); 'w-' means create
1113
        (fail if exists).
1114
    cache_attrs : bool, optional
1115
        If True (default), user attributes will be cached for attribute read
1116
        operations. If False, user attributes are reloaded from the store prior
1117
        to all attribute read operations.
1118
    synchronizer : object, optional
1119
        Array synchronizer.
1120
    path : string, optional
1121
        Group path within store.
1122
    chunk_store : MutableMapping or string, optional
1123
        Store or path to directory in file system or name of zip file.
1124
    storage_options : dict
1125
        If using an fsspec URL to create the store, these will be passed to
1126
        the backend implementation. Ignored otherwise.
1127

1128
    Returns
1129
    -------
1130
    g : zarr.hierarchy.Group
1131

1132
    Examples
1133
    --------
1134
    >>> import zarr
1135
    >>> root = zarr.open_group('data/example.zarr', mode='w')
1136
    >>> foo = root.create_group('foo')
1137
    >>> bar = root.create_group('bar')
1138
    >>> root
1139
    <zarr.hierarchy.Group '/'>
1140
    >>> root2 = zarr.open_group('data/example.zarr', mode='a')
1141
    >>> root2
1142
    <zarr.hierarchy.Group '/'>
1143
    >>> root == root2
1144
    True
1145

1146
    """
1147

1148
    # handle polymorphic store arg
1149
    clobber = mode != 'r'
3✔
1150
    store = _normalize_store_arg(store, clobber=clobber, storage_options=storage_options)
3✔
1151
    if chunk_store is not None:
3✔
1152
        chunk_store = _normalize_store_arg(chunk_store, clobber=clobber,
3✔
1153
                                           storage_options=storage_options)
1154
    path = normalize_storage_path(path)
3✔
1155

1156
    # ensure store is initialized
1157

1158
    if mode in ['r', 'r+']:
3✔
1159
        if contains_array(store, path=path):
3✔
1160
            raise ContainsArrayError(path)
3✔
1161
        elif not contains_group(store, path=path):
3✔
1162
            raise GroupNotFoundError(path)
3✔
1163

1164
    elif mode == 'w':
3✔
1165
        init_group(store, overwrite=True, path=path, chunk_store=chunk_store)
3✔
1166

1167
    elif mode == 'a':
3✔
1168
        if contains_array(store, path=path):
3✔
1169
            raise ContainsArrayError(path)
3✔
1170
        if not contains_group(store, path=path):
3✔
1171
            init_group(store, path=path, chunk_store=chunk_store)
3✔
1172

1173
    elif mode in ['w-', 'x']:
3✔
1174
        if contains_array(store, path=path):
3✔
1175
            raise ContainsArrayError(path)
3✔
1176
        elif contains_group(store, path=path):
3✔
1177
            raise ContainsGroupError(path)
3✔
1178
        else:
1179
            init_group(store, path=path, chunk_store=chunk_store)
3✔
1180

1181
    # determine read only status
1182
    read_only = mode == 'r'
3✔
1183

1184
    return Group(store, read_only=read_only, cache_attrs=cache_attrs,
3✔
1185
                 synchronizer=synchronizer, path=path, chunk_store=chunk_store)
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc