jnothman/UpSetPlot | Build 7342943552 | upsetplot/tests/test_data.py | Coveralls

1

from collections import OrderedDict

2✔

2

import pytest

2✔

3

import pandas as pd

2✔

4

import numpy as np

2✔

5

from distutils.version import LooseVersion

2✔

6

from pandas.testing import (assert_series_equal, assert_frame_equal,

2✔

8

from upsetplot import (from_memberships, from_contents, from_indicators,

2✔

12

@pytest.mark.parametrize('typ', [set, list, tuple, iter])

2✔

13

def test_from_memberships_no_data(typ):

1✔

14

    with pytest.raises(ValueError, match='at least one category'):

2✔

15

        from_memberships([])

2✔

16

    with pytest.raises(ValueError, match='at least one category'):

2✔

17

        from_memberships([[], []])

2✔

18

    with pytest.raises(ValueError, match='strings'):

2✔

19

        from_memberships([[1]])

2✔

20

    with pytest.raises(ValueError, match='strings'):

2✔

21

        from_memberships([[1, 'str']])

2✔

22

    with pytest.raises(TypeError):

2✔

23

        from_memberships([1])

2✔

25

    out = from_memberships([typ([]),

2✔

26

                            typ(['hello']),

2✔

27

                            typ(['world']),

2✔

28

                            typ(['hello', 'world']),

2✔

30

    exp = pd.DataFrame([[False, False, 1],

2✔

31

                        [True, False, 1],

2✔

32

                        [False, True, 1],

2✔

33

                        [True, True, 1]],

2✔

34

                       columns=['hello', 'world', 'ones']

2✔

35

                       ).set_index(['hello', 'world'])['ones']

2✔

36

    assert isinstance(exp.index, pd.MultiIndex)

2✔

37

    assert_series_equal(exp, out)

2✔

40

    out = from_memberships([typ(['hello']),

2✔

41

                            typ(['world'])])

2✔

42

    exp = pd.DataFrame([[True, False, 1],

2✔

43

                        [False, True, 1]],

2✔

44

                       columns=['hello', 'world', 'ones']

2✔

45

                       ).set_index(['hello', 'world'])['ones']

2✔

46

    assert_series_equal(exp, out)

2✔

47

    out = from_memberships([typ(['world']),

2✔

48

                            typ(['hello'])])

2✔

49

    exp = pd.DataFrame([[False, True, 1],

2✔

50

                        [True, False, 1]],

2✔

51

                       columns=['hello', 'world', 'ones']

2✔

52

                       ).set_index(['hello', 'world'])['ones']

2✔

53

    assert_series_equal(exp, out)

2✔

56

@pytest.mark.parametrize('data,ndim', [

2✔

57

    ([1, 2, 3, 4], 1),

2✔

58

    (np.array([1, 2, 3, 4]), 1),

2✔

59

    (pd.Series([1, 2, 3, 4], name='foo'), 1),

2✔

60

    ([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']], 2),

2✔

61

    (pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']],

2✔

62

                  columns=['foo', 'bar'],

2✔

63

                  index=['q', 'r', 's', 't']), 2),

2✔

65

def test_from_memberships_with_data(data, ndim):

1✔

66

    memberships = [[],

2✔

67

                   ['hello'],

2✔

68

                   ['world'],

2✔

69

                   ['hello', 'world']]

2✔

70

    out = from_memberships(memberships, data=data)

2✔

71

    assert out is not data  # make sure frame is copied

2✔

72

    if hasattr(data, 'loc') and np.asarray(data).dtype.kind in 'ifb':

2✔

74

        if LooseVersion(pd.__version__) > LooseVersion('0.35'):

2✔

75

            assert out.values.base is np.asarray(data).base

2✔

76

    if ndim == 1:

2✔

77

        assert isinstance(out, pd.Series)

2✔

79

        assert isinstance(out, pd.DataFrame)

2✔

80

    assert_frame_equal(pd.DataFrame(out).reset_index(drop=True),

2✔

81

                       pd.DataFrame(data).reset_index(drop=True))

2✔

82

    no_data = from_memberships(memberships=memberships)

2✔

83

    assert_index_equal(out.index, no_data.index)

2✔

85

    with pytest.raises(ValueError, match='length'):

2✔

86

        from_memberships(memberships[:-1], data=data)

2✔

89

@pytest.mark.parametrize('data', [None,

2✔

90

                                  {'attr1': [3, 4, 5, 6, 7, 8],

2✔

91

                                   'attr2': list('qrstuv')}])

2✔

92

@pytest.mark.parametrize('typ', [set, list, tuple, iter])

2✔

93

@pytest.mark.parametrize('id_column', ['id', 'blah'])

2✔

94

def test_from_contents_vs_memberships(data, typ, id_column):

1✔

95

    contents = OrderedDict([('cat1', typ(['aa', 'bb', 'cc'])),

2✔

96

                            ('cat2', typ(['cc', 'dd'])),

2✔

97

                            ('cat3', typ(['ee']))])

2✔

99

    data_df = pd.DataFrame(data,

2✔

100

                           index=['aa', 'bb', 'cc', 'dd', 'ee', 'ff'])

2✔

101

    baseline = from_contents(contents, data=data_df,

2✔

102

                             id_column=id_column)

2✔

104

    expected = from_memberships(memberships=[{'cat1'},

2✔

105

                                             {'cat1'},

2✔

106

                                             {'cat1', 'cat2'},

2✔

107

                                             {'cat2'},

2✔

108

                                             {'cat3'},

2✔

109

                                             []],

2✔

110

                                data=data_df)

2✔

111

    assert_series_equal(baseline[id_column].reset_index(drop=True),

2✔

112

                        pd.Series(['aa', 'bb', 'cc', 'dd', 'ee', 'ff'],

2✔

113

                                  name=id_column))

2✔

114

    baseline_without_id = baseline.drop([id_column], axis=1)

2✔

115

    assert_frame_equal(baseline_without_id, expected, check_column_type=baseline_without_id.shape[1] > 0)

2✔

118

def test_from_contents(typ=set, id_column='id'):

2✔

119

    contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),

2✔

120

                            ('cat2', {'cc', 'dd'}),

2✔

121

                            ('cat3', {'ee'})])

2✔

122

    empty_data = pd.DataFrame(index=['aa', 'bb', 'cc', 'dd', 'ee'])

2✔

123

    baseline = from_contents(contents, data=empty_data,

2✔

124

                             id_column=id_column)

2✔

126

    out = from_contents(contents, id_column=id_column)

2✔

127

    assert_frame_equal(out.sort_values(id_column), baseline)

2✔

130

    out = from_contents({'cat3': contents['cat3'],

2✔

131

                         'cat2': contents['cat2'],

2✔

132

                         'cat1': contents['cat1']},

2✔

133

                        data=empty_data, id_column=id_column)

2✔

134

    assert_frame_equal(out.reorder_levels(['cat1', 'cat2', 'cat3']),

2✔

135

                       baseline)

2✔

138

    out = from_contents({'cat1': contents['cat1'],

2✔

139

                         'cat2': contents['cat2'],

2✔

140

                         'cat3': contents['cat3'],

2✔

141

                         'cat4': []},

2✔

142

                        data=empty_data,

2✔

143

                        id_column=id_column)

2✔

144

    assert not out.index.to_frame()['cat4'].any()  # cat4 should be all-false

2✔

145

    assert len(out.index.names) == 4

2✔

146

    out.index = out.index.to_frame().set_index(['cat1', 'cat2', 'cat3']).index

2✔

147

    assert_frame_equal(out, baseline)

2✔

150

@pytest.mark.parametrize('id_column', ['id', 'blah'])

2✔

151

def test_from_contents_invalid(id_column):

1✔

152

    contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),

2✔

153

                            ('cat2', {'cc', 'dd'}),

2✔

154

                            ('cat3', {'ee'})])

2✔

155

    with pytest.raises(ValueError, match='columns overlap'):

2✔

156

        from_contents(contents,

2✔

157

                      data=pd.DataFrame({'cat1': [1, 2, 3, 4, 5]}),

2✔

158

                      id_column=id_column)

2✔

159

    with pytest.raises(ValueError, match='duplicate ids'):

2✔

160

        from_contents({'cat1': ['aa', 'bb'],

2✔

161

                       'cat2': ['dd', 'dd']}, id_column=id_column)

2✔

163

    with pytest.raises(ValueError, match='cannot be named'):

2✔

164

        from_contents({id_column: {'aa', 'bb', 'cc'},

2✔

165

                       'cat2': {'cc', 'dd'},

2✔

166

                       }, id_column=id_column)

2✔

168

    with pytest.raises(ValueError, match='cannot contain'):

2✔

169

        from_contents(contents,

2✔

170

                      data=pd.DataFrame({id_column: [1, 2, 3, 4, 5]},

2✔

171

                                        index=['aa', 'bb', 'cc', 'dd', 'ee']),

2✔

172

                      id_column=id_column)

2✔

173

    with pytest.raises(ValueError, match='identifiers in contents'):

2✔

174

        from_contents({'cat1': ['aa']},

2✔

175

                      data=pd.DataFrame([[1]]),

2✔

176

                      id_column=id_column)

2✔

179

@pytest.mark.parametrize('indicators,data,exc_type,match', [

2✔

180

    (["a", "b"], None, ValueError, "data must be provided"),

2✔

181

    (lambda df: [True, False, True], None, ValueError,

2✔

182

     "data must be provided"),

2✔

183

    (["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),

2✔

184

    (("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),

2✔

185

    ({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),

2✔

186

    (pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),

2✔

187

     {"A": [1, 2, 3]},

2✔

188

     ValueError, "all its values must be present"),

2✔

190

def test_from_indicators_invalid(indicators, data, exc_type, match):

1✔

191

    with pytest.raises(exc_type, match=match):

2✔

192

        from_indicators(indicators=indicators, data=data)

2✔

195

@pytest.mark.parametrize('indicators', [

2✔

196

    pd.DataFrame({"cat1": [False, True, False]}),

2✔

197

    pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),

2✔

198

    {"cat1": [False, True, False]},

2✔

199

    lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},

2✔

201

@pytest.mark.parametrize('data', [

2✔

202

    pd.DataFrame({"val1": [3, 4, 5]}),

2✔

203

    pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),

2✔

204

    {"val1": [3, 4, 5]},

2✔

206

def test_from_indicators_equivalence(indicators, data):

1✔

207

    assert_frame_equal(from_indicators(indicators, data),

2✔

208

                       from_memberships([[], ["cat1"], []], data))

2✔

211

def test_generate_data_warning():

2✔

212

    with pytest.warns(DeprecationWarning):

2✔

213

        generate_data()

2✔

jnothman / UpSetPlot / 7342943552

Source File
Press 'n' to go to next uncovered line, 'b' for previous

1	from collections import OrderedDict	2✔
2	import pytest	2✔
3	import pandas as pd	2✔
4	import numpy as np	2✔
5	from distutils.version import LooseVersion	2✔
6	from pandas.testing import (assert_series_equal, assert_frame_equal,	2✔
UNCOV 7	assert_index_equal)	×
8	from upsetplot import (from_memberships, from_contents, from_indicators,	2✔
UNCOV 9	generate_data)	×
10
11
12	@pytest.mark.parametrize('typ', [set, list, tuple, iter])	2✔
13	def test_from_memberships_no_data(typ):	1✔
14	with pytest.raises(ValueError, match='at least one category'):	2✔
15	from_memberships([])	2✔
16	with pytest.raises(ValueError, match='at least one category'):	2✔
17	from_memberships([[], []])	2✔
18	with pytest.raises(ValueError, match='strings'):	2✔
19	from_memberships([[1]])	2✔
20	with pytest.raises(ValueError, match='strings'):	2✔
21	from_memberships([[1, 'str']])	2✔
22	with pytest.raises(TypeError):	2✔
23	from_memberships([1])	2✔
24
25	out = from_memberships([typ([]),	2✔
26	typ(['hello']),	2✔
27	typ(['world']),	2✔
28	typ(['hello', 'world']),	2✔
UNCOV 29	])	×
30	exp = pd.DataFrame([[False, False, 1],	2✔
31	[True, False, 1],	2✔
32	[False, True, 1],	2✔
33	[True, True, 1]],	2✔
34	columns=['hello', 'world', 'ones']	2✔
35	).set_index(['hello', 'world'])['ones']	2✔
36	assert isinstance(exp.index, pd.MultiIndex)	2✔
37	assert_series_equal(exp, out)	2✔
38
39	# test sorting by name
40	out = from_memberships([typ(['hello']),	2✔
41	typ(['world'])])	2✔
42	exp = pd.DataFrame([[True, False, 1],	2✔
43	[False, True, 1]],	2✔
44	columns=['hello', 'world', 'ones']	2✔
45	).set_index(['hello', 'world'])['ones']	2✔
46	assert_series_equal(exp, out)	2✔
47	out = from_memberships([typ(['world']),	2✔
48	typ(['hello'])])	2✔
49	exp = pd.DataFrame([[False, True, 1],	2✔
50	[True, False, 1]],	2✔
51	columns=['hello', 'world', 'ones']	2✔
52	).set_index(['hello', 'world'])['ones']	2✔
53	assert_series_equal(exp, out)	2✔
54
55
56	@pytest.mark.parametrize('data,ndim', [	2✔
57	([1, 2, 3, 4], 1),	2✔
58	(np.array([1, 2, 3, 4]), 1),	2✔
59	(pd.Series([1, 2, 3, 4], name='foo'), 1),	2✔
60	([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']], 2),	2✔
61	(pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c'], [4, 'd']],	2✔
62	columns=['foo', 'bar'],	2✔
63	index=['q', 'r', 's', 't']), 2),	2✔
64	])
65	def test_from_memberships_with_data(data, ndim):	1✔
66	memberships = [[],	2✔
67	['hello'],	2✔
68	['world'],	2✔
69	['hello', 'world']]	2✔
70	out = from_memberships(memberships, data=data)	2✔
71	assert out is not data # make sure frame is copied	2✔
72	if hasattr(data, 'loc') and np.asarray(data).dtype.kind in 'ifb':	2✔
73	# but not deepcopied when possible
74	if LooseVersion(pd.__version__) > LooseVersion('0.35'):	2✔
75	assert out.values.base is np.asarray(data).base	2✔
76	if ndim == 1:	2✔
77	assert isinstance(out, pd.Series)	2✔
UNCOV 78	else:	×
79	assert isinstance(out, pd.DataFrame)	2✔
80	assert_frame_equal(pd.DataFrame(out).reset_index(drop=True),	2✔
81	pd.DataFrame(data).reset_index(drop=True))	2✔
82	no_data = from_memberships(memberships=memberships)	2✔
83	assert_index_equal(out.index, no_data.index)	2✔
84
85	with pytest.raises(ValueError, match='length'):	2✔
86	from_memberships(memberships[:-1], data=data)	2✔
87
88
89	@pytest.mark.parametrize('data', [None,	2✔
90	{'attr1': [3, 4, 5, 6, 7, 8],	2✔
91	'attr2': list('qrstuv')}])	2✔
92	@pytest.mark.parametrize('typ', [set, list, tuple, iter])	2✔
93	@pytest.mark.parametrize('id_column', ['id', 'blah'])	2✔
94	def test_from_contents_vs_memberships(data, typ, id_column):	1✔
95	contents = OrderedDict([('cat1', typ(['aa', 'bb', 'cc'])),	2✔
96	('cat2', typ(['cc', 'dd'])),	2✔
97	('cat3', typ(['ee']))])	2✔
98	# Note that ff is not present in contents
99	data_df = pd.DataFrame(data,	2✔
100	index=['aa', 'bb', 'cc', 'dd', 'ee', 'ff'])	2✔
101	baseline = from_contents(contents, data=data_df,	2✔
102	id_column=id_column)	2✔
103	# compare from_contents to from_memberships
104	expected = from_memberships(memberships=[{'cat1'},	2✔
105	{'cat1'},	2✔
106	{'cat1', 'cat2'},	2✔
107	{'cat2'},	2✔
108	{'cat3'},	2✔
109	[]],	2✔
110	data=data_df)	2✔
111	assert_series_equal(baseline[id_column].reset_index(drop=True),	2✔
112	pd.Series(['aa', 'bb', 'cc', 'dd', 'ee', 'ff'],	2✔
113	name=id_column))	2✔
114	baseline_without_id = baseline.drop([id_column], axis=1)	2✔
115	assert_frame_equal(baseline_without_id, expected, check_column_type=baseline_without_id.shape[1] > 0)	2✔
116
117
118	def test_from_contents(typ=set, id_column='id'):	2✔
119	contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),	2✔
120	('cat2', {'cc', 'dd'}),	2✔
121	('cat3', {'ee'})])	2✔
122	empty_data = pd.DataFrame(index=['aa', 'bb', 'cc', 'dd', 'ee'])	2✔
123	baseline = from_contents(contents, data=empty_data,	2✔
124	id_column=id_column)	2✔
125	# data=None
126	out = from_contents(contents, id_column=id_column)	2✔
127	assert_frame_equal(out.sort_values(id_column), baseline)	2✔
128
129	# unordered contents dict
130	out = from_contents({'cat3': contents['cat3'],	2✔
131	'cat2': contents['cat2'],	2✔
132	'cat1': contents['cat1']},	2✔
133	data=empty_data, id_column=id_column)	2✔
134	assert_frame_equal(out.reorder_levels(['cat1', 'cat2', 'cat3']),	2✔
135	baseline)	2✔
136
137	# empty category
138	out = from_contents({'cat1': contents['cat1'],	2✔
139	'cat2': contents['cat2'],	2✔
140	'cat3': contents['cat3'],	2✔
141	'cat4': []},	2✔
142	data=empty_data,	2✔
143	id_column=id_column)	2✔
144	assert not out.index.to_frame()['cat4'].any() # cat4 should be all-false	2✔
145	assert len(out.index.names) == 4	2✔
146	out.index = out.index.to_frame().set_index(['cat1', 'cat2', 'cat3']).index	2✔
147	assert_frame_equal(out, baseline)	2✔
148
149
150	@pytest.mark.parametrize('id_column', ['id', 'blah'])	2✔
151	def test_from_contents_invalid(id_column):	1✔
152	contents = OrderedDict([('cat1', {'aa', 'bb', 'cc'}),	2✔
153	('cat2', {'cc', 'dd'}),	2✔
154	('cat3', {'ee'})])	2✔
155	with pytest.raises(ValueError, match='columns overlap'):	2✔
156	from_contents(contents,	2✔
157	data=pd.DataFrame({'cat1': [1, 2, 3, 4, 5]}),	2✔
158	id_column=id_column)	2✔
159	with pytest.raises(ValueError, match='duplicate ids'):	2✔
160	from_contents({'cat1': ['aa', 'bb'],	2✔
161	'cat2': ['dd', 'dd']}, id_column=id_column)	2✔
162	# category named id
163	with pytest.raises(ValueError, match='cannot be named'):	2✔
164	from_contents({id_column: {'aa', 'bb', 'cc'},	2✔
165	'cat2': {'cc', 'dd'},	2✔
166	}, id_column=id_column)	2✔
167	# category named id
168	with pytest.raises(ValueError, match='cannot contain'):	2✔
169	from_contents(contents,	2✔
170	data=pd.DataFrame({id_column: [1, 2, 3, 4, 5]},	2✔
171	index=['aa', 'bb', 'cc', 'dd', 'ee']),	2✔
172	id_column=id_column)	2✔
173	with pytest.raises(ValueError, match='identifiers in contents'):	2✔
174	from_contents({'cat1': ['aa']},	2✔
175	data=pd.DataFrame([[1]]),	2✔
176	id_column=id_column)	2✔
177
178
179	@pytest.mark.parametrize('indicators,data,exc_type,match', [	2✔
180	(["a", "b"], None, ValueError, "data must be provided"),	2✔
181	(lambda df: [True, False, True], None, ValueError,	2✔
182	"data must be provided"),	2✔
183	(["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),	2✔
184	(("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),	2✔
185	({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),	2✔
186	(pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),	2✔
187	{"A": [1, 2, 3]},	2✔
188	ValueError, "all its values must be present"),	2✔
189	])
190	def test_from_indicators_invalid(indicators, data, exc_type, match):	1✔
191	with pytest.raises(exc_type, match=match):	2✔
192	from_indicators(indicators=indicators, data=data)	2✔
193
194
195	@pytest.mark.parametrize('indicators', [	2✔
196	pd.DataFrame({"cat1": [False, True, False]}),	2✔
197	pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),	2✔
198	{"cat1": [False, True, False]},	2✔
199	lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},	2✔
200	])
201	@pytest.mark.parametrize('data', [	2✔
202	pd.DataFrame({"val1": [3, 4, 5]}),	2✔
203	pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),	2✔
204	{"val1": [3, 4, 5]},	2✔
205	])
206	def test_from_indicators_equivalence(indicators, data):	1✔
207	assert_frame_equal(from_indicators(indicators, data),	2✔
208	from_memberships([[], ["cat1"], []], data))	2✔
209
210
211	def test_generate_data_warning():	2✔
212	with pytest.warns(DeprecationWarning):	2✔
213	generate_data()	2✔

jnothman / UpSetPlot / 7342943552

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous