• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

rmcar17 / cogent3 / 17852834425

19 Sep 2025 08:22AM UTC coverage: 90.681% (+0.009%) from 90.672%
17852834425

push

github

rmcar17
TST: Convert dict views to lists

28257 of 31161 relevant lines covered (90.68%)

5.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.17
/src/cogent3/util/dict_array.py
1
"""Wrapper for numpy arrays so that they can be indexed by name
2

3
>>> a = numpy.identity(3, int)
4
>>> b = DictArrayTemplate("abc", "ABC").wrap(a)
5
>>> b[0]
6
===========
7
A    B    C
8
-----------
9
1    0    0
10
-----------
11
>>> b["a"]
12
===========
13
A    B    C
14
-----------
15
1    0    0
16
-----------
17
>>> b.keys()
18
['a', 'b', 'c']
19
>>> b["a"].keys()
20
['A', 'B', 'C']
21
"""
22

23
import contextlib
6✔
24
import json
6✔
25
import os
6✔
26
from collections import defaultdict
6✔
27
from collections.abc import Hashable, Iterable, Iterator
6✔
28
from collections.abc import Sequence as PySeq
6✔
29
from itertools import combinations, product
6✔
30
from typing import TYPE_CHECKING, Any, Protocol, SupportsInt, TypeVar, cast
6✔
31

32
import numpy
6✔
33
import numpy.typing as npt
6✔
34
from typing_extensions import Self
6✔
35

36
from cogent3._version import __version__
6✔
37
from cogent3.util.deserialise import get_class, register_deserialiser
6✔
38
from cogent3.util.io import PathType, atomic_write
6✔
39
from cogent3.util.misc import get_object_provenance
6✔
40

41
if TYPE_CHECKING:
42
    from cogent3.core.table import Table
43

44

45
NumpyArray = npt.NDArray[numpy.number]
6✔
46

47
PySeqStr = PySeq[str]
6✔
48

49

50
class SupportsLTHashable(Hashable, Protocol):
6✔
51
    def __lt__(self, other: object) -> bool: ...
52

53

54
T = TypeVar("T")
6✔
55
K = TypeVar("K", bound=SupportsLTHashable)
6✔
56

57

58
def convert_1D_dict(
6✔
59
    data: dict[K, list[float]], row_order: PySeq[K] | None = None
60
) -> tuple[list[list[float]], PySeq[K]]:
61
    """returns a 1D list and header as dict keys
62

63
    Parameters
64
    ----------
65
    data : dict
66
        a 1D dict
67
    row_order
68
        series with column headings. If not provided, the sorted top level dict
69
        keys are used.
70
    """
71
    if row_order is None:
6✔
72
        row_order = sorted(data)
6✔
73

74
    rows = [data[c] for c in row_order]
6✔
75
    return rows, row_order
6✔
76

77

78
def convert2Ddistance(
6✔
79
    dists: dict[tuple[K, K], float],
80
    header: PySeq[K] | None = None,
81
    row_order: PySeq[K] | None = None,
82
) -> tuple[list[list[float]], PySeq[K], PySeq[K]]:
83
    """returns a 2 dimensional list, header and row order
84

85
    Parameters
86
    ----------
87
    dists : dict
88
        a 1Ddict with {(a, b): dist, ..}
89
    header
90
        series with column headings. If not provided, the sorted top level dict
91
        keys are used.
92
    row_order
93
        a specified order to generate the rows
94

95
    Returns
96
    -------
97
    2D list, header and row_order. If a dist not present, it's set to 0, or
98
    the symmetric value e.g. (a, b) -> (b, a).
99
    """
100
    if header is None:
6✔
101
        names: set[K] = set()
6✔
102
        for pair in dists:
6✔
103
            names.update(set(pair))
6✔
104
        header = sorted(names)
6✔
105

106
    rows: list[list[float]] = []
6✔
107
    for i in range(len(header)):
6✔
108
        n1 = header[i]
6✔
109
        row: list[float] = []
6✔
110
        for j in range(len(header)):
6✔
111
            n2 = header[j]
6✔
112
            dist = dists.get((n1, n2), dists.get((n2, n1), 0))
6✔
113
            row.append(dist)
6✔
114
        rows.append(row)
6✔
115

116
    row_order = header[:]
6✔
117
    return rows, row_order, header
6✔
118

119

120
def convert2DDict(
6✔
121
    twoDdict: dict[K, dict[K, float]],
122
    header: PySeq[K] | None = None,
123
    row_order: PySeq[K] | None = None,
124
    make_symmetric: bool = False,
125
) -> tuple[list[list[float]], PySeq[K], PySeq[K]]:
126
    """returns a 2 dimensional list, header and row order
127

128
    Parameters
129
    ----------
130
    twoDdict : dict
131
        a 2 dimensional dict with top level keys corresponding to column
132
        headings, lower level keys correspond to row headings
133
    header
134
        series with column headings. If not provided, the sorted top level dict
135
        keys are used.
136
    row_order
137
        a specified order to generate the rows
138
    make_symmetric : bool
139
        if True, twoDdict[a][b] == twoDdict[b][a]
140
    """
141
    if not row_order:
6✔
142
        row_order = list(twoDdict.keys())
6✔
143
        row_order.sort()
6✔
144

145
    if not header:  # we assume columns consistent across dict
6✔
146
        header = list(twoDdict[row_order[0]].keys())
6✔
147
        header.sort()
6✔
148

149
    if make_symmetric:
6✔
150
        combined = sorted(set(header) | set(row_order))
6✔
151
        header = row_order = combined
6✔
152
        data: dict[K, dict[K, float]] = defaultdict(dict)
6✔
153

154
        for k1, k2 in combinations(combined, 2):
6✔
155
            if k1 in twoDdict:
6✔
156
                val = twoDdict[k1].get(k2, 0)
6✔
157
            elif k2 in twoDdict:
6✔
158
                val = twoDdict[k2].get(k1, 0)
×
159
            else:
160
                val = 0
6✔
161
            data[k1][k2] = data[k2][k1] = val
6✔
162
        for k in data:
6✔
163
            data[k][k] = 0
6✔
164
        twoDdict = data
6✔
165

166
    # make list of lists
167
    rows: list[list[float]] = []
6✔
168
    for row in row_order:
6✔
169
        elements: list[float] = []
6✔
170
        for column in header:
6✔
171
            elements.append(twoDdict[row][column])
6✔
172
        rows.append(elements)
6✔
173

174
    return rows, row_order, header
6✔
175

176

177
def convert_dict(
6✔
178
    data: dict[K, list[float]] | dict[K, dict[K, float]] | dict[tuple[K, K], float],
179
    header: PySeq[K] | None = None,
180
    row_order: PySeq[K] | None = None,
181
) -> tuple[list[list[float]], PySeq[K], PySeq[K] | None]:
182
    """returns a list, DictArrayTemplate args
183

184
    Parameters
185
    ----------
186
    data : dict
187
        a 1D or 2D dict
188
    header
189
        series with column headings. If not provided, the sorted top level dict
190
        keys are used.
191
    row_order
192
        a specified order to generate the rows
193
    """
194
    rows: list[list[float]] | list[float]
195
    first_key = next(iter(data))
6✔
196
    if isinstance(first_key, tuple) and len(first_key) == 2:
6✔
197
        data = cast("dict[tuple[K, K], float]", data)
6✔
198
        rows, row_order, header = convert2Ddistance(data, header, row_order)
6✔
199
    elif hasattr(data[first_key], "keys"):  # type: ignore[index]
6✔
200
        data = cast("dict[K, dict[K, float]]", data)
6✔
201
        rows, row_order, header = convert2DDict(data, header, row_order)
6✔
202
    else:
203
        data = cast("dict[K, list[float]]", data)
6✔
204
        rows, row_order = convert_1D_dict(data, header)
6✔
205
    return rows, row_order, header
6✔
206

207

208
def convert_series(
6✔
209
    data: PySeq[float] | PySeq[PySeq[float]],
210
    row_order: PySeq[K] | int | None = None,
211
    header: PySeq[K] | int | None = None,
212
) -> tuple[PySeq[float] | PySeq[PySeq[float]], PySeq[K] | int, PySeq[K] | int | None]:
213
    """returns a list, header and row order
214

215
    Parameters
216
    ----------
217
    data : dict
218
        a 1D or 2D dict
219
    header
220
        series with column headings. If not provided, the sorted top level dict
221
        keys are used.
222
    row_order
223
        a specified order to generate the rows
224
    """
225
    first_element = data[0]
6✔
226
    nrows = len(data)
6✔
227
    try:
6✔
228
        ncols = len(first_element)  # type: ignore[arg-type]
6✔
229
    except TypeError:
6✔
230
        ncols = 1
6✔
231

232
    if header is not None:
6✔
233
        dim_h = header if isinstance(header, int) else len(header)
6✔
234
    else:
235
        dim_h = None
6✔
236

237
    if row_order is not None:
6✔
238
        dim_r = row_order if isinstance(row_order, int) else len(row_order)
6✔
239
    else:
240
        dim_r = None
6✔
241

242
    if nrows == 1 and ncols > 1:
6✔
243
        if dim_h is not None and dim_h != ncols:
6✔
244
            msg = (
×
245
                f"mismatch between number columns={dim_h} "
246
                f"and number of elements in data={ncols}"
247
            )
248
            raise ValueError(
×
249
                msg,
250
            )
251
        if dim_r is not None and dim_r != 1:
6✔
252
            msg = (
×
253
                f"mismatch between number rows={dim_r} "
254
                f"and number of rows in data={ncols}"
255
            )
256
            raise ValueError(
×
257
                msg,
258
            )
259

260
    if not header:
6✔
261
        header = None if ncols == 1 else ncols
6✔
262
    row_order = row_order if row_order else nrows
6✔
263

264
    return data, row_order, header
6✔
265

266

267
def convert_for_dictarray(
6✔
268
    data: "DictArray | PySeq[float] | PySeq[PySeq[float]] | dict[K, list[float]] | dict[K, dict[K, float]] | dict[tuple[K, K], float]",
269
    header: PySeq[K] | None = None,
270
    row_order: PySeq[K] | None = None,
271
) -> tuple[
272
    NumpyArray | PySeq[float] | PySeq[PySeq[float]], PySeq[K] | None, PySeq[K] | None
273
]:
274
    """returns a list, header and row order from data
275

276
    Parameters
277
    ----------
278
    data : iterable
279
        data series, dictarray, dict, etc..
280
    header
281
        series with column headings. If not provided, the sorted top level dict
282
        keys are used.
283
    row_order
284
        a specified order to generate the rows
285
    """
286
    result_data: NumpyArray | PySeq[float] | PySeq[PySeq[float]]
287
    if isinstance(data, DictArray):
6✔
288
        header = data.template.names[0]
6✔
289
        row_order = data.template.names[1]
6✔
290
        result_data = data.array.copy()
6✔
291
    elif hasattr(data, "keys"):  # dictlike, it could be defaultdict
6✔
292
        data = cast(
6✔
293
            "dict[K, list[float]] | dict[K, dict[K, float]] | dict[tuple[K, K], float]",
294
            data,
295
        )
296
        result_data, row_order, header = convert_dict(data, header, row_order)
6✔
297
    else:
298
        data = cast("PySeq[float] | PySeq[PySeq[float]]", data)
6✔
299
        result_data, row_order, header = convert_series(data, header, row_order)  # type: ignore[assignment]
6✔
300

301
    return result_data, row_order, header
6✔
302

303

304
class NumericKey(int):
6✔
305
    """a distinct numerical type for use as a DictArray key"""
306

307
    def __new__(cls, val: SupportsInt) -> "NumericKey":
6✔
308
        return int.__new__(cls, val)
6✔
309

310

311
NestedFloatList = list[float] | list["NestedFloatList"]
6✔
312

313

314
class DictArrayTemplate:
6✔
315
    def __init__(
6✔
316
        self, *dimensions: Iterable[str] | Iterable[int] | int | None
317
    ) -> None:  # Consider replacing Iterable[] with Generic specifier
318
        self.names: list[list[int] | list[NumericKey] | list[str]] = []
6✔
319
        self.ordinals: list[dict[int | NumericKey | str, int]] = []
6✔
320
        for names in dimensions:
6✔
321
            if names is None:
6✔
322
                continue
6✔
323
            if isinstance(names, int):
6✔
324
                names = list(range(names))
6✔
325
            else:
326
                names = cast(
6✔
327
                    "list[NumericKey] | list[str]",
328
                    [NumericKey(v) if isinstance(v, int) else v for v in names],
329
                )
330

331
            self.names.append(names)
6✔
332

333
            ordinals = cast(
6✔
334
                "dict[int | NumericKey | str, int]",
335
                {c: i for (i, c) in enumerate(names)},
336
            )
337

338
            self.ordinals.append(ordinals)
6✔
339
        self._shape = tuple(len(keys) for keys in self.names)
6✔
340

341
    def __eq__(self, other: object) -> bool:
6✔
342
        return self is other or (
6✔
343
            isinstance(other, DictArrayTemplate) and self.names == other.names
344
        )
345

346
    def _dict2list(
6✔
347
        self, value: NestedFloatList | dict[Any, NestedFloatList], depth: int = 0
348
    ) -> NestedFloatList:
349
        # Unpack (possibly nested) dictionary into correct order of elements
350
        if depth < len(self._shape):
6✔
351
            value = cast("dict[Any, NestedFloatList]", value)
6✔
352
            return [self._dict2list(value[key], depth + 1) for key in self.names[depth]]
6✔
353
        return cast("NestedFloatList", value)
6✔
354

355
    def unwrap(
6✔
356
        self,
357
        value: "DictArray | dict[Any, NestedFloatList] | Iterable[float] | NestedFloatList",
358
    ) -> NumpyArray:
359
        """Convert to a simple numpy array"""
360
        if isinstance(value, DictArray):
6✔
361
            if value.template == self:
6✔
362
                value = value.array
6✔
363
            else:
364
                raise ValueError  # used to return None, which can't be right
×
365
        elif isinstance(value, dict):
6✔
366
            value = self._dict2list(cast("dict[Any, NestedFloatList]", value))
6✔
367
        value = numpy.asarray(value)
6✔
368
        assert value.shape == self._shape, (value.shape, self._shape)
6✔
369
        return value
6✔
370

371
    def wrap(
6✔
372
        self,
373
        array: NumpyArray | PySeq[float] | PySeq[PySeq[float]],
374
        dtype: numpy.dtype[Any] | type[int] | None = None,
375
    ) -> "DictArray":
376
        if hasattr(array, "keys"):
6✔
377
            if len(self._shape) == 2:
6✔
378
                r, h = self.names[:2]
6✔
379
            else:
380
                r, h = self.names[0], None
6✔
381
            array, _, _ = convert_for_dictarray(
6✔
382
                cast(
383
                    "dict[Any, list[float]] | dict[Any, dict[Any, float]] | dict[tuple[Any, Any], float]",
384
                    array,
385
                ),
386
                h,
387
                r,
388
            )
389
        array = numpy.asarray(array, dtype=dtype)
6✔
390
        for dim, categories in enumerate(self.names):
6✔
391
            assert len(categories) == numpy.shape(array)[dim], (
6✔
392
                f"cats={categories}; dim={dim}"
393
            )
394
        return DictArray(array, self)
6✔
395

396
    def interpret_index(
6✔
397
        self,
398
        names: NumpyArray
399
        | tuple[
400
            int | slice | list[int | NumericKey | str] | NumpyArray | NumericKey | str,
401
            ...,
402
        ]
403
        | int
404
        | slice
405
        | list[int | NumericKey | str]
406
        | NumericKey
407
        | str,
408
    ) -> tuple[
409
        tuple[int | slice | list[int | NumericKey] | NumericKey, ...],
410
        Self | None,
411
    ]:
412
        if isinstance(names, numpy.ndarray) and "int" in names.dtype.name:
6✔
413
            # the numpy item() method casts to the nearest Python type
414
            names = tuple(v.item() for v in names)
6✔
415

416
        if not isinstance(names, tuple):
6✔
417
            names = (names,)
6✔
418

419
        index: list[int | slice | list[int | NumericKey] | NumericKey] = []
6✔
420
        remaining: list[list[NumericKey] | list[int] | list[str]] = []
6✔
421
        for ordinals, allnames, name in zip(
6✔
422
            self.ordinals,
423
            self.names,
424
            names,
425
            strict=False,
426
        ):
427
            if not isinstance(name, (int, slice, list, numpy.ndarray)):
6✔
428
                name = ordinals[name]
6✔
429
            elif isinstance(name, slice):
6✔
430
                start = name.start
6✔
431
                stop = name.stop
6✔
432
                with contextlib.suppress(ValueError):
6✔
433
                    # either None, or it's an int index
434
                    start = allnames.index(start)
6✔
435
                with contextlib.suppress(ValueError):
6✔
436
                    # as above
437
                    stop = allnames.index(stop)
6✔
438
                name = slice(start, stop, name.step)
6✔
439
                remaining.append(allnames.__getitem__(name))
6✔
440
            elif isinstance(name, (list, numpy.ndarray)):
6✔
441
                name = [n if isinstance(n, int) else ordinals[n] for n in name]
6✔
442
                remaining.append(
6✔
443
                    cast(
444
                        "list[NumericKey] | list[int] | list[str]",
445
                        [allnames[cast("int", i)] for i in name],
446
                    )
447
                )
448

449
            index.append(
6✔
450
                cast(
451
                    "int | slice | list[int | NumericKey] | NumericKey",
452
                    name,
453
                )
454
            )
455
        remaining.extend(self.names[len(index) :])
6✔
456
        klass = type(self)(*remaining) if remaining else None
6✔
457
        return (tuple(index), klass)
6✔
458

459

460
class DictArray:
6✔
461
    """Wraps a numpy array so that it can be indexed with strings. Behaves
462
    like nested dictionaries (only ordered).
463

464
    Notes
465
    -----
466
    Used for things like substitution matrices and bin probabilities.
467

468
    Indexing can be done via conventional integer based operations, using
469
    keys, lists of int/keys.
470

471
    Behaviour differs from numpy array indexing when you provide lists of
472
    indices. Such indexing is applied sequentially, e.g. darr[[0, 2], [1, 2]]
473
    will return the intersection of rows [0, 2] with columns [1, 2]. In numpy,
474
    the result would instead be the elements at [0, 1], [2, 2].
475
    """
476

477
    def __init__(self, *args: Any, **kwargs: Any) -> None:
6✔
478
        """allow alternate ways of creating for time being"""
479
        if len(args) == 1:
6✔
480
            vals, row_keys, col_keys = convert_for_dictarray(args[0])
6✔
481
            dtype = kwargs.get("dtype")
6✔
482
            self.array: npt.NDArray[numpy.number] = numpy.asarray(vals, dtype=dtype)
6✔
483
            self.template = DictArrayTemplate(row_keys, col_keys)
6✔
484
        elif len(args) == 2:
6✔
485
            if not isinstance(args[1], DictArrayTemplate):
6✔
486
                raise NotImplementedError
×
487
            self.array = args[0]
6✔
488
            self.template = args[1]
6✔
489
        else:
490
            if "dtype" in kwargs or "typecode" in kwargs:
×
491
                dtype = kwargs["dtype"]
×
492
                kwargs.pop("dtype", None)
×
493
                kwargs.pop("typecode", None)
×
494
            else:
495
                dtype = None
×
496
            create_new = DictArrayTemplate(*args[1:]).wrap(args[0], dtype=dtype)
×
497
            self.__dict__ = create_new.__dict__
×
498
        self.shape = self.array.shape
6✔
499

500
    @classmethod
6✔
501
    def from_array_names(
6✔
502
        cls,
503
        array: npt.NDArray[numpy.number],
504
        *names: PySeqStr,
505
    ) -> "DictArray":
506
        """creates instance directly from a numpy array and series of names
507

508
        Parameters
509
        ----------
510
        array
511
            any data type
512
        names
513
            must match the array dimensions
514
        """
515

516
        if len(names) != array.ndim or any(
6✔
517
            len(labels) != array.shape[dim] for dim, labels in enumerate(names)
518
        ):
519
            msg = "names must match array dimensions"
×
520
            raise ValueError(msg)
×
521

522
        template = DictArrayTemplate(*names)
6✔
523
        return DictArray(array, template)
6✔
524

525
    def to_array(self) -> NumpyArray:
6✔
526
        return self.array
6✔
527

528
    def __add__(self, other: Self) -> "DictArray":
6✔
529
        if not isinstance(other, type(self)):
6✔
530
            msg = f"Incompatible types: {type(self)} and {type(other)}"
6✔
531
            raise TypeError(msg)
6✔
532

533
        if other.template.names != self.template.names:
6✔
534
            msg = f"unequal dimension names {self.template.names} != {other.template.names}"
6✔
535
            raise ValueError(
6✔
536
                msg,
537
            )
538

539
        return self.template.wrap(self.array + other.array)
6✔
540

541
    def __array__(
6✔
542
        self,
543
        dtype: numpy.dtype[Any] | None = None,
544
        copy: bool = False,
545
    ) -> NumpyArray:
546
        array = self.array
6✔
547
        if dtype is not None:
6✔
548
            array = array.astype(dtype)
6✔
549
        return array
6✔
550

551
    def to_dict(
6✔
552
        self, flatten: bool = False
553
    ) -> dict[int | NumericKey | str | tuple[int | NumericKey | str, ...], Any]:
554
        """returns data as a dict
555

556
        Parameters
557
        ----------
558
        flatten : bool
559
            returns a 1D dictionary
560
        """
561
        names = self.template.names
6✔
562
        shape = self.shape
6✔
563
        result: dict[
6✔
564
            int | NumericKey | str | tuple[int | NumericKey | str, ...], Any
565
        ] = {}
566
        if len(names) == 1:
6✔
567
            result = {
6✔
568
                names[0][i]: v.item() if hasattr(v, "item") else v
569
                for i, v in enumerate(self.array)
570
            }
571
        elif flatten:
6✔
572
            for indices in product(*[range(n) for n in shape]):
6✔
573
                value = self.array[indices]
6✔
574
                value = value.item() if hasattr(value, "item") else value
6✔
575
                coord = tuple(n[i] for n, i in zip(names, indices, strict=False))
6✔
576
                result[coord] = value
6✔
577
        else:
578
            for indices in product(*[range(n) for n in shape]):
6✔
579
                value = self.array[indices]
6✔
580
                value = value.item() if hasattr(value, "item") else value
6✔
581
                coord = tuple(n[i] for n, i in zip(names, indices, strict=False))
6✔
582
                current = result
6✔
583
                nested = coord[0]
6✔
584
                for nested in coord[:-1]:
6✔
585
                    current[nested] = current.get(nested, {})
6✔
586
                current[nested][coord[-1]] = value
6✔
587

588
        return result
6✔
589

590
    def to_rich_dict(self) -> dict[str, Any]:
6✔
591
        data = self.array.tolist()
6✔
592
        return {
6✔
593
            "type": get_object_provenance(self.template),
594
            "array": data,
595
            "names": self.template.names,
596
            "version": __version__,
597
        }
598

599
    def to_json(self) -> str:
6✔
600
        return json.dumps(self.to_rich_dict())
6✔
601

602
    def __getitem__(
6✔
603
        self,
604
        names: NumpyArray
605
        | tuple[
606
            int | slice | list[int | NumericKey | str] | NumpyArray | NumericKey | str,
607
            ...,
608
        ]
609
        | int
610
        | slice
611
        | list[int | NumericKey | str]
612
        | NumericKey
613
        | str,
614
    ) -> "NumpyArray | DictArray":
615
        index, remaining = self.template.interpret_index(names)
6✔
616
        if list in {type(v) for v in index}:
6✔
617
            result = self.array
6✔
618
            for dim, indices in enumerate(index):
6✔
619
                if isinstance(indices, slice):
6✔
620
                    actual_indices = (
6✔
621
                        (indices,)
622
                        if dim == 0
623
                        else (slice(None, None),) * dim + (indices,)
624
                    )
625
                    result = result[tuple(actual_indices)]
6✔
626
                    continue
6✔
627

628
                if isinstance(indices, int):
6✔
629
                    indices = [indices]
6✔
630

631
                result = result.take(indices, axis=dim)
6✔
632

633
        else:
634
            result = self.array[index]
6✔
635

636
        if remaining is None:
6✔
637
            return result
6✔
638
        return self.__class__(result.reshape(remaining._shape), remaining)
6✔
639

640
    def __iter__(self) -> Iterator["float | DictArray"]:
6✔
641
        _, remaining = self.template.interpret_index(0)
6✔
642
        for elt in self.array:
6✔
643
            if remaining is None:
6✔
644
                yield elt
6✔
645
            else:
646
                yield remaining.wrap(elt)
6✔
647

648
    def __len__(self) -> int:
6✔
649
        return len(self.template.names[0])
6✔
650

651
    def keys(self) -> list[int] | list[str] | list[NumericKey]:
6✔
652
        return self.template.names[0][:]
6✔
653

654
    def items(self) -> list[tuple[int | str | NumericKey, Any]]:
6✔
655
        return [(n, self[n]) for n in self.keys()]
6✔
656

657
    def __repr__(self) -> str:
6✔
658
        if self.array.ndim > 2:
6✔
659
            return f"{self.array.ndim} dimensional {type(self).__name__}"
×
660

661
        t = self.to_table()
6✔
662
        t.set_repr_policy(show_shape=False)
6✔
663
        return str(t)
6✔
664

665
    def __ne__(self, other: object) -> bool:
6✔
666
        return not self.__eq__(other)
6✔
667

668
    def __eq__(self, other: object) -> bool:
6✔
669
        if self is other:
6✔
670
            return True
×
671
        if isinstance(other, DictArray):
6✔
672
            return bool(
6✔
673
                self.template == other.template
674
                and numpy.all(
675
                    self.array == other.array,
676
                )
677
            )
678
        if isinstance(other, type(self.array)):
6✔
679
            return self.array == other
×
680
        if isinstance(other, dict):
6✔
681
            return self.to_dict() == other
6✔
682
        return False
×
683

684
    def to_normalized(
6✔
685
        self, by_row: bool = False, by_column: bool = False
686
    ) -> "DictArray":
687
        """returns a DictArray as frequencies
688

689
        Parameters
690
        ----------
691
        by_row
692
            rows sum to 1
693
        by_col
694
            columns sum to 1
695
        """
696
        assert not (by_row and by_column)
6✔
697
        # TODO need to check there are two dimension!
698
        if by_row:
6✔
699
            axis = 1
6✔
700
        elif by_column:
6✔
701
            axis = 0
6✔
702
        else:
703
            axis = None
×
704

705
        result = self.array / self.array.sum(axis=axis, keepdims=True)
6✔
706
        return self.template.wrap(result)
6✔
707

708
    def row_sum(self) -> "DictArray":
6✔
709
        """returns DictArray summed across rows"""
710
        axis = 1 if len(self.shape) == 2 else 0
6✔
711
        result = self.array.sum(axis=axis)
6✔
712
        template = DictArrayTemplate(self.template.names[0])
6✔
713
        return template.wrap(result)
6✔
714

715
    def col_sum(self) -> "DictArray":
6✔
716
        """returns DictArray summed across columns"""
717
        result = self.array.sum(axis=0)
6✔
718
        template = DictArrayTemplate(self.template.names[1])
6✔
719
        return template.wrap(result)
6✔
720

721
    def _repr_html_(self) -> str:
6✔
722
        if self.array.ndim > 2:
6✔
723
            return f"{self.array.ndim} dimensional {type(self).__name__}"
6✔
724

725
        t = self.to_table()
6✔
726
        t.set_repr_policy(show_shape=False)
6✔
727
        return t._repr_html_()
6✔
728

729
    def to_string(self, format_name: str = "tsv", sep: str | None = None) -> str:
6✔
730
        """Return the data as a formatted string.
731

732
        Parameters
733
        ----------
734
        format_name
735
            possible formats are 'csv', or 'tsv' (default).
736
        sep
737
            A string separator for delineating columns, e.g. ',' or
738
            '\t'. Overrides format.
739
        """
740
        if format_name.lower() not in ("tsv", "csv"):
6✔
741
            msg = f"'{format_name}' not supported"
6✔
742
            raise ValueError(msg)
6✔
743

744
        sep = sep or {"tsv": "\t", "csv": ","}[format_name.lower()]
6✔
745

746
        data = cast(
6✔
747
            "dict[tuple[int | NumericKey | str, ...], Any]",
748
            self.to_dict(flatten=True),
749
        )
750
        rows = [[f"dim-{i + 1}" for i in range(self.array.ndim)] + ["value"]] + [
6✔
751
            [str(x) for x in row] for row in [[*list(k), v] for k, v in data.items()]
752
        ]
753
        return "\n".join([sep.join(row) for row in rows])
6✔
754

755
    def to_table(self) -> "Table":
6✔
756
        """return Table instance
757

758
        Notes
759
        -----
760
        Raises ValueError if number of dimensions > 2
761
        """
762
        ndim = self.array.ndim
6✔
763
        if ndim > 2:
6✔
764
            msg = f"cannot make 2D table from {ndim}D array"
6✔
765
            raise ValueError(msg)
6✔
766

767
        from cogent3.core.table import Table
6✔
768

769
        header = self.template.names[0] if ndim == 1 else self.template.names[1]
6✔
770
        index = "" if ndim == 2 else None
6✔
771
        if ndim == 1:
6✔
772
            data = {c: [v] for c, v in zip(header, self.array, strict=False)}
6✔
773
        else:
774
            data = {c: self.array[:, i].tolist() for i, c in enumerate(header)}
6✔
775
            data[""] = self.template.names[0]
6✔
776

777
        return Table(
6✔
778
            header=cast("list[str]", header),
779
            data=cast("list[list[Any]]", data),  # FIXME: Incorrect cast
780
            index_name=index,
781
        )
782

783
    def write(self, path: PathType, format_name: str = "tsv", sep: str = "\t") -> None:
6✔
784
        """writes a flattened version to path
785

786
        Parameters
787
        ----------
788
        path
789
        format_name
790
            possible formats are 'rest'/'rst', 'markdown'/'md',
791
            'latex', 'html', 'phylip', 'bedgraph', 'csv', 'tsv', or 'simple'
792
            (default).
793
        sep
794
            used to split fields, will be inferred from path suffix if not
795
            provided
796
        """
797
        data = self.to_string(format_name=format_name, sep=sep)
6✔
798
        with atomic_write(path, mode="wt") as outfile:
6✔
799
            outfile.write(data)
6✔
800

801

802
@register_deserialiser(
6✔
803
    get_object_provenance(DictArrayTemplate),
804
)
805
def deserialise_dict_array(data: dict[str, Any]) -> DictArray:
6✔
806
    """deserialising DictArray, Table instances"""
807
    data.pop("version", None)
6✔
808
    type_ = data.pop("type")
6✔
809
    klass = get_class(type_)
6✔
810
    named_dims = data.pop("names")
6✔
811
    array = data.pop("array")
6✔
812
    template = klass(*named_dims)
6✔
813
    return template.wrap(array)
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc