• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jnothman / UpSetPlot / 7344254321

28 Dec 2023 03:58AM UTC coverage: 98.586% (+15.0%) from 83.549%
7344254321

push

github

web-flow
Format with black/ruff (#240)

844 of 848 new or added lines in 8 files covered. (99.53%)

4 existing lines in 3 files now uncovered.

1534 of 1556 relevant lines covered (98.59%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/upsetplot/tests/test_data.py
1
from collections import OrderedDict
2✔
2
import pytest
2✔
3
import pandas as pd
2✔
4
import numpy as np
2✔
5
from distutils.version import LooseVersion
2✔
6
from pandas.testing import assert_series_equal, assert_frame_equal, assert_index_equal
2✔
7
from upsetplot import from_memberships, from_contents, from_indicators, generate_data
2✔
8

9

10
@pytest.mark.parametrize("typ", [set, list, tuple, iter])
2✔
UNCOV
11
def test_from_memberships_no_data(typ):
1✔
12
    with pytest.raises(ValueError, match="at least one category"):
2✔
13
        from_memberships([])
2✔
14
    with pytest.raises(ValueError, match="at least one category"):
2✔
15
        from_memberships([[], []])
2✔
16
    with pytest.raises(ValueError, match="strings"):
2✔
17
        from_memberships([[1]])
2✔
18
    with pytest.raises(ValueError, match="strings"):
2✔
19
        from_memberships([[1, "str"]])
2✔
20
    with pytest.raises(TypeError):
2✔
21
        from_memberships([1])
2✔
22

23
    out = from_memberships(
2✔
24
        [
1✔
25
            typ([]),
2✔
26
            typ(["hello"]),
2✔
27
            typ(["world"]),
2✔
28
            typ(["hello", "world"]),
2✔
29
        ]
30
    )
31
    exp = pd.DataFrame(
2✔
32
        [[False, False, 1], [True, False, 1], [False, True, 1], [True, True, 1]],
2✔
33
        columns=["hello", "world", "ones"],
2✔
34
    ).set_index(["hello", "world"])["ones"]
2✔
35
    assert isinstance(exp.index, pd.MultiIndex)
2✔
36
    assert_series_equal(exp, out)
2✔
37

38
    # test sorting by name
39
    out = from_memberships([typ(["hello"]), typ(["world"])])
2✔
40
    exp = pd.DataFrame(
2✔
41
        [[True, False, 1], [False, True, 1]], columns=["hello", "world", "ones"]
2✔
42
    ).set_index(["hello", "world"])["ones"]
2✔
43
    assert_series_equal(exp, out)
2✔
44
    out = from_memberships([typ(["world"]), typ(["hello"])])
2✔
45
    exp = pd.DataFrame(
2✔
46
        [[False, True, 1], [True, False, 1]], columns=["hello", "world", "ones"]
2✔
47
    ).set_index(["hello", "world"])["ones"]
2✔
48
    assert_series_equal(exp, out)
2✔
49

50

51
@pytest.mark.parametrize(
2✔
52
    "data,ndim",
2✔
53
    [
1✔
54
        ([1, 2, 3, 4], 1),
2✔
55
        (np.array([1, 2, 3, 4]), 1),
2✔
56
        (pd.Series([1, 2, 3, 4], name="foo"), 1),
2✔
57
        ([[1, "a"], [2, "b"], [3, "c"], [4, "d"]], 2),
2✔
58
        (
1✔
59
            pd.DataFrame(
2✔
60
                [[1, "a"], [2, "b"], [3, "c"], [4, "d"]],
2✔
61
                columns=["foo", "bar"],
2✔
62
                index=["q", "r", "s", "t"],
2✔
63
            ),
64
            2,
2✔
65
        ),
66
    ],
67
)
68
def test_from_memberships_with_data(data, ndim):
1✔
69
    memberships = [[], ["hello"], ["world"], ["hello", "world"]]
2✔
70
    out = from_memberships(memberships, data=data)
2✔
71
    assert out is not data  # make sure frame is copied
2✔
72
    if hasattr(data, "loc") and np.asarray(data).dtype.kind in "ifb":
2✔
73
        # but not deepcopied when possible
74
        if LooseVersion(pd.__version__) > LooseVersion("0.35"):
2✔
75
            assert out.values.base is np.asarray(data).base
2✔
76
    if ndim == 1:
2✔
77
        assert isinstance(out, pd.Series)
2✔
78
    else:
79
        assert isinstance(out, pd.DataFrame)
2✔
80
    assert_frame_equal(
2✔
81
        pd.DataFrame(out).reset_index(drop=True),
2✔
82
        pd.DataFrame(data).reset_index(drop=True),
2✔
83
    )
84
    no_data = from_memberships(memberships=memberships)
2✔
85
    assert_index_equal(out.index, no_data.index)
2✔
86

87
    with pytest.raises(ValueError, match="length"):
2✔
88
        from_memberships(memberships[:-1], data=data)
2✔
89

90

91
@pytest.mark.parametrize(
2✔
92
    "data", [None, {"attr1": [3, 4, 5, 6, 7, 8], "attr2": list("qrstuv")}]
2✔
93
)
94
@pytest.mark.parametrize("typ", [set, list, tuple, iter])
2✔
95
@pytest.mark.parametrize("id_column", ["id", "blah"])
2✔
96
def test_from_contents_vs_memberships(data, typ, id_column):
1✔
97
    contents = OrderedDict(
2✔
98
        [
1✔
99
            ("cat1", typ(["aa", "bb", "cc"])),
2✔
100
            ("cat2", typ(["cc", "dd"])),
2✔
101
            ("cat3", typ(["ee"])),
2✔
102
        ]
103
    )
104
    # Note that ff is not present in contents
105
    data_df = pd.DataFrame(data, index=["aa", "bb", "cc", "dd", "ee", "ff"])
2✔
106
    baseline = from_contents(contents, data=data_df, id_column=id_column)
2✔
107
    # compare from_contents to from_memberships
108
    expected = from_memberships(
2✔
109
        memberships=[{"cat1"}, {"cat1"}, {"cat1", "cat2"}, {"cat2"}, {"cat3"}, []],
2✔
110
        data=data_df,
2✔
111
    )
112
    assert_series_equal(
2✔
113
        baseline[id_column].reset_index(drop=True),
2✔
114
        pd.Series(["aa", "bb", "cc", "dd", "ee", "ff"], name=id_column),
2✔
115
    )
116
    baseline_without_id = baseline.drop([id_column], axis=1)
2✔
117
    assert_frame_equal(
2✔
118
        baseline_without_id,
2✔
119
        expected,
2✔
120
        check_column_type=baseline_without_id.shape[1] > 0,
2✔
121
    )
122

123

124
def test_from_contents(typ=set, id_column="id"):
2✔
125
    contents = OrderedDict(
2✔
126
        [("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]
2✔
127
    )
128
    empty_data = pd.DataFrame(index=["aa", "bb", "cc", "dd", "ee"])
2✔
129
    baseline = from_contents(contents, data=empty_data, id_column=id_column)
2✔
130
    # data=None
131
    out = from_contents(contents, id_column=id_column)
2✔
132
    assert_frame_equal(out.sort_values(id_column), baseline)
2✔
133

134
    # unordered contents dict
135
    out = from_contents(
2✔
136
        {"cat3": contents["cat3"], "cat2": contents["cat2"], "cat1": contents["cat1"]},
2✔
137
        data=empty_data,
2✔
138
        id_column=id_column,
2✔
139
    )
140
    assert_frame_equal(out.reorder_levels(["cat1", "cat2", "cat3"]), baseline)
2✔
141

142
    # empty category
143
    out = from_contents(
2✔
144
        {
1✔
145
            "cat1": contents["cat1"],
2✔
146
            "cat2": contents["cat2"],
2✔
147
            "cat3": contents["cat3"],
2✔
148
            "cat4": [],
2✔
149
        },
150
        data=empty_data,
2✔
151
        id_column=id_column,
2✔
152
    )
153
    assert not out.index.to_frame()["cat4"].any()  # cat4 should be all-false
2✔
154
    assert len(out.index.names) == 4
2✔
155
    out.index = out.index.to_frame().set_index(["cat1", "cat2", "cat3"]).index
2✔
156
    assert_frame_equal(out, baseline)
2✔
157

158

159
@pytest.mark.parametrize("id_column", ["id", "blah"])
2✔
160
def test_from_contents_invalid(id_column):
1✔
161
    contents = OrderedDict(
2✔
162
        [("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]
2✔
163
    )
164
    with pytest.raises(ValueError, match="columns overlap"):
2✔
165
        from_contents(
2✔
166
            contents, data=pd.DataFrame({"cat1": [1, 2, 3, 4, 5]}), id_column=id_column
2✔
167
        )
168
    with pytest.raises(ValueError, match="duplicate ids"):
2✔
169
        from_contents({"cat1": ["aa", "bb"], "cat2": ["dd", "dd"]}, id_column=id_column)
2✔
170
    # category named id
171
    with pytest.raises(ValueError, match="cannot be named"):
2✔
172
        from_contents(
2✔
173
            {
1✔
174
                id_column: {"aa", "bb", "cc"},
2✔
175
                "cat2": {"cc", "dd"},
2✔
176
            },
177
            id_column=id_column,
2✔
178
        )
179
    # category named id
180
    with pytest.raises(ValueError, match="cannot contain"):
2✔
181
        from_contents(
2✔
182
            contents,
2✔
183
            data=pd.DataFrame(
2✔
184
                {id_column: [1, 2, 3, 4, 5]}, index=["aa", "bb", "cc", "dd", "ee"]
2✔
185
            ),
186
            id_column=id_column,
2✔
187
        )
188
    with pytest.raises(ValueError, match="identifiers in contents"):
2✔
189
        from_contents({"cat1": ["aa"]}, data=pd.DataFrame([[1]]), id_column=id_column)
2✔
190

191

192
@pytest.mark.parametrize(
2✔
193
    "indicators,data,exc_type,match",
2✔
194
    [
1✔
195
        (["a", "b"], None, ValueError, "data must be provided"),
2✔
196
        (lambda df: [True, False, True], None, ValueError, "data must be provided"),
2✔
197
        (["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),
2✔
198
        (("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),
2✔
199
        ({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),
2✔
200
        (
1✔
201
            pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),
2✔
202
            {"A": [1, 2, 3]},
2✔
203
            ValueError,
2✔
204
            "all its values must be present",
2✔
205
        ),
206
    ],
207
)
208
def test_from_indicators_invalid(indicators, data, exc_type, match):
1✔
209
    with pytest.raises(exc_type, match=match):
2✔
210
        from_indicators(indicators=indicators, data=data)
2✔
211

212

213
@pytest.mark.parametrize(
2✔
214
    "indicators",
2✔
215
    [
1✔
216
        pd.DataFrame({"cat1": [False, True, False]}),
2✔
217
        pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),
2✔
218
        {"cat1": [False, True, False]},
2✔
219
        lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},
2✔
220
    ],
221
)
222
@pytest.mark.parametrize(
2✔
223
    "data",
2✔
224
    [
1✔
225
        pd.DataFrame({"val1": [3, 4, 5]}),
2✔
226
        pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),
2✔
227
        {"val1": [3, 4, 5]},
2✔
228
    ],
229
)
230
def test_from_indicators_equivalence(indicators, data):
1✔
231
    assert_frame_equal(
2✔
232
        from_indicators(indicators, data), from_memberships([[], ["cat1"], []], data)
2✔
233
    )
234

235

236
def test_generate_data_warning():
2✔
237
    with pytest.warns(DeprecationWarning):
2✔
238
        generate_data()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc