• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jnothman / UpSetPlot / 7342943552

28 Dec 2023 12:13AM UTC coverage: 83.549% (-14.0%) from 97.551%
7342943552

push

github

web-flow
Fix warning due to styling dtyles, and fix column dtype test failure (#238)


Fixes #225

6 of 6 new or added lines in 2 files covered. (100.0%)

312 existing lines in 7 files now uncovered.

1681 of 2012 relevant lines covered (83.55%)

1.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.77
/upsetplot/tests/test_data.py
1
from collections import OrderedDict
2✔
2
import pytest
2✔
3
import pandas as pd
2✔
4
import numpy as np
2✔
5
from distutils.version import LooseVersion
2✔
6
from pandas.testing import (assert_series_equal, assert_frame_equal,
2✔
UNCOV
7
                                 assert_index_equal)
×
8
from upsetplot import (from_memberships, from_contents, from_indicators,
2✔
UNCOV
9
                       generate_data)
×
10

11

12
@pytest.mark.parametrize('typ', [set, list, tuple, iter])
2✔
13
def test_from_memberships_no_data(typ):
1✔
14
    with pytest.raises(ValueError, match='at least one category'):
2✔
15
        from_memberships([])
2✔
16
    with pytest.raises(ValueError, match='at least one category'):
2✔
17
        from_memberships([[], []])
2✔
18
    with pytest.raises(ValueError, match='strings'):
2✔
19
        from_memberships([[1]])
2✔
20
    with pytest.raises(ValueError, match='strings'):
2✔
21
        from_memberships([[1, 'str']])
2✔
22
    with pytest.raises(TypeError):
2✔
23
        from_memberships([1])
2✔
24

25
    out = from_memberships([typ([]),
2✔
26
                            typ(['hello']),
2✔
27
                            typ(['world']),
2✔
28
                            typ(['hello', 'world']),
2✔
UNCOV
29
                            ])
×
30
    exp = pd.DataFrame([[False, False, 1],
2✔
31
                        [True, False, 1],
2✔
32
                        [False, True, 1],
2✔
33
                        [True, True, 1]],
2✔
34
                       columns=['hello', 'world', 'ones']
2✔
35
                       ).set_index(['hello', 'world'])['ones']
2✔
36
    assert isinstance(exp.index, pd.MultiIndex)
2✔
37
    assert_series_equal(exp, out)
2✔
38

39
    # test sorting by name
40
    out = from_memberships([typ(['hello']),
2✔
41
                            typ(['world'])])
2✔
42
    exp = pd.DataFrame([[True, False, 1],
2✔
43
                        [False, True, 1]],
2✔
44
                       columns=['hello', 'world', 'ones']
2✔
45
                       ).set_index(['hello', 'world'])['ones']
2✔
46
    assert_series_equal(exp, out)
2✔
47
    out = from_memberships([typ(['world']),
2✔
48
                            typ(['hello'])])
2✔
49
    exp = pd.DataFrame([[False, True, 1],
2✔
50
                        [True, False, 1]],
2✔
51
                       columns=['hello', 'world', 'ones']
2✔
52
                       ).set_index(['hello', 'world'])['ones']
2✔
53
    assert_series_equal(exp, out)
2✔
54

55

56
@pytest.mark.parametrize('data,ndim', [
2✔
57
    ([1, 2, 3, 4], 1),
2✔
58
    (np.array([1, 2, 3, 4]), 1),
2✔
59
    (pd.Series([1, 2, 3, 4], name='foo'), 1),
2✔
60
    ([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']], 2),
2✔
61
    (pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']],
2✔
62
                  columns=['foo', 'bar'],
2✔
63
                  index=['q', 'r', 's', 't']), 2),
2✔
64
])
65
def test_from_memberships_with_data(data, ndim):
1✔
66
    memberships = [[],
2✔
67
                   ['hello'],
2✔
68
                   ['world'],
2✔
69
                   ['hello', 'world']]
2✔
70
    out = from_memberships(memberships, data=data)
2✔
71
    assert out is not data  # make sure frame is copied
2✔
72
    if hasattr(data, 'loc') and np.asarray(data).dtype.kind in 'ifb':
2✔
73
        # but not deepcopied when possible
74
        if LooseVersion(pd.__version__) > LooseVersion('0.35'):
2✔
75
            assert out.values.base is np.asarray(data).base
2✔
76
    if ndim == 1:
2✔
77
        assert isinstance(out, pd.Series)
2✔
UNCOV
78
    else:
×
79
        assert isinstance(out, pd.DataFrame)
2✔
80
    assert_frame_equal(pd.DataFrame(out).reset_index(drop=True),
2✔
81
                       pd.DataFrame(data).reset_index(drop=True))
2✔
82
    no_data = from_memberships(memberships=memberships)
2✔
83
    assert_index_equal(out.index, no_data.index)
2✔
84

85
    with pytest.raises(ValueError, match='length'):
2✔
86
        from_memberships(memberships[:-1], data=data)
2✔
87

88

89
@pytest.mark.parametrize('data', [None,
2✔
90
                                  {'attr1': [3, 4, 5, 6, 7, 8],
2✔
91
                                   'attr2': list('qrstuv')}])
2✔
92
@pytest.mark.parametrize('typ', [set, list, tuple, iter])
2✔
93
@pytest.mark.parametrize('id_column', ['id', 'blah'])
2✔
94
def test_from_contents_vs_memberships(data, typ, id_column):
1✔
95
    contents = OrderedDict([('cat1', typ(['aa', 'bb', 'cc'])),
2✔
96
                            ('cat2', typ(['cc', 'dd'])),
2✔
97
                            ('cat3', typ(['ee']))])
2✔
98
    # Note that ff is not present in contents
99
    data_df = pd.DataFrame(data,
2✔
100
                           index=['aa', 'bb', 'cc', 'dd', 'ee', 'ff'])
2✔
101
    baseline = from_contents(contents, data=data_df,
2✔
102
                             id_column=id_column)
2✔
103
    # compare from_contents to from_memberships
104
    expected = from_memberships(memberships=[{'cat1'},
2✔
105
                                             {'cat1'},
2✔
106
                                             {'cat1', 'cat2'},
2✔
107
                                             {'cat2'},
2✔
108
                                             {'cat3'},
2✔
109
                                             []],
2✔
110
                                data=data_df)
2✔
111
    assert_series_equal(baseline[id_column].reset_index(drop=True),
2✔
112
                        pd.Series(['aa', 'bb', 'cc', 'dd', 'ee', 'ff'],
2✔
113
                                  name=id_column))
2✔
114
    baseline_without_id = baseline.drop([id_column], axis=1)
2✔
115
    assert_frame_equal(baseline_without_id, expected, check_column_type=baseline_without_id.shape[1] > 0)
2✔
116

117

118
def test_from_contents(typ=set, id_column='id'):
2✔
119
    contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),
2✔
120
                            ('cat2', {'cc', 'dd'}),
2✔
121
                            ('cat3', {'ee'})])
2✔
122
    empty_data = pd.DataFrame(index=['aa', 'bb', 'cc', 'dd', 'ee'])
2✔
123
    baseline = from_contents(contents, data=empty_data,
2✔
124
                             id_column=id_column)
2✔
125
    # data=None
126
    out = from_contents(contents, id_column=id_column)
2✔
127
    assert_frame_equal(out.sort_values(id_column), baseline)
2✔
128

129
    # unordered contents dict
130
    out = from_contents({'cat3': contents['cat3'],
2✔
131
                         'cat2': contents['cat2'],
2✔
132
                         'cat1': contents['cat1']},
2✔
133
                        data=empty_data, id_column=id_column)
2✔
134
    assert_frame_equal(out.reorder_levels(['cat1', 'cat2', 'cat3']),
2✔
135
                       baseline)
2✔
136

137
    # empty category
138
    out = from_contents({'cat1': contents['cat1'],
2✔
139
                         'cat2': contents['cat2'],
2✔
140
                         'cat3': contents['cat3'],
2✔
141
                         'cat4': []},
2✔
142
                        data=empty_data,
2✔
143
                        id_column=id_column)
2✔
144
    assert not out.index.to_frame()['cat4'].any()  # cat4 should be all-false
2✔
145
    assert len(out.index.names) == 4
2✔
146
    out.index = out.index.to_frame().set_index(['cat1', 'cat2', 'cat3']).index
2✔
147
    assert_frame_equal(out, baseline)
2✔
148

149

150
@pytest.mark.parametrize('id_column', ['id', 'blah'])
2✔
151
def test_from_contents_invalid(id_column):
1✔
152
    contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),
2✔
153
                            ('cat2', {'cc', 'dd'}),
2✔
154
                            ('cat3', {'ee'})])
2✔
155
    with pytest.raises(ValueError, match='columns overlap'):
2✔
156
        from_contents(contents,
2✔
157
                      data=pd.DataFrame({'cat1': [1, 2, 3, 4, 5]}),
2✔
158
                      id_column=id_column)
2✔
159
    with pytest.raises(ValueError, match='duplicate ids'):
2✔
160
        from_contents({'cat1': ['aa', 'bb'],
2✔
161
                       'cat2': ['dd', 'dd']}, id_column=id_column)
2✔
162
    # category named id
163
    with pytest.raises(ValueError, match='cannot be named'):
2✔
164
        from_contents({id_column: {'aa', 'bb', 'cc'},
2✔
165
                       'cat2': {'cc', 'dd'},
2✔
166
                       }, id_column=id_column)
2✔
167
    # category named id
168
    with pytest.raises(ValueError, match='cannot contain'):
2✔
169
        from_contents(contents,
2✔
170
                      data=pd.DataFrame({id_column: [1, 2, 3, 4, 5]},
2✔
171
                                        index=['aa', 'bb', 'cc', 'dd', 'ee']),
2✔
172
                      id_column=id_column)
2✔
173
    with pytest.raises(ValueError, match='identifiers in contents'):
2✔
174
        from_contents({'cat1': ['aa']},
2✔
175
                      data=pd.DataFrame([[1]]),
2✔
176
                      id_column=id_column)
2✔
177

178

179
@pytest.mark.parametrize('indicators,data,exc_type,match', [
2✔
180
    (["a", "b"], None, ValueError, "data must be provided"),
2✔
181
    (lambda df: [True, False, True], None, ValueError,
2✔
182
     "data must be provided"),
2✔
183
    (["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),
2✔
184
    (("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),
2✔
185
    ({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),
2✔
186
    (pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),
2✔
187
     {"A": [1, 2, 3]},
2✔
188
     ValueError, "all its values must be present"),
2✔
189
])
190
def test_from_indicators_invalid(indicators, data, exc_type, match):
1✔
191
    with pytest.raises(exc_type, match=match):
2✔
192
        from_indicators(indicators=indicators, data=data)
2✔
193

194

195
@pytest.mark.parametrize('indicators', [
2✔
196
    pd.DataFrame({"cat1": [False, True, False]}),
2✔
197
    pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),
2✔
198
    {"cat1": [False, True, False]},
2✔
199
    lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},
2✔
200
])
201
@pytest.mark.parametrize('data', [
2✔
202
    pd.DataFrame({"val1": [3, 4, 5]}),
2✔
203
    pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),
2✔
204
    {"val1": [3, 4, 5]},
2✔
205
])
206
def test_from_indicators_equivalence(indicators, data):
1✔
207
    assert_frame_equal(from_indicators(indicators, data),
2✔
208
                       from_memberships([[], ["cat1"], []], data))
2✔
209

210

211
def test_generate_data_warning():
2✔
212
    with pytest.warns(DeprecationWarning):
2✔
213
        generate_data()
2✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc