jnothman/UpSetPlot | Build 7344254321 | upsetplot/tests/test_data.py | Coveralls

1

from collections import OrderedDict

2✔

2

import pytest

2✔

3

import pandas as pd

2✔

4

import numpy as np

2✔

5

from distutils.version import LooseVersion

2✔

6

from pandas.testing import assert_series_equal, assert_frame_equal, assert_index_equal

2✔

7

from upsetplot import from_memberships, from_contents, from_indicators, generate_data

2✔

10

@pytest.mark.parametrize("typ", [set, list, tuple, iter])

2✔

UNCOV

11

def test_from_memberships_no_data(typ):

1✔

12

    with pytest.raises(ValueError, match="at least one category"):

2✔

13

        from_memberships([])

2✔

14

    with pytest.raises(ValueError, match="at least one category"):

2✔

15

        from_memberships([[], []])

2✔

16

    with pytest.raises(ValueError, match="strings"):

2✔

17

        from_memberships([[1]])

2✔

18

    with pytest.raises(ValueError, match="strings"):

2✔

19

        from_memberships([[1, "str"]])

2✔

20

    with pytest.raises(TypeError):

2✔

21

        from_memberships([1])

2✔

23

    out = from_memberships(

2✔

24

1✔

25

            typ([]),

2✔

26

            typ(["hello"]),

2✔

27

            typ(["world"]),

2✔

28

            typ(["hello", "world"]),

2✔

31

    exp = pd.DataFrame(

2✔

32

        [[False, False, 1], [True, False, 1], [False, True, 1], [True, True, 1]],

2✔

33

        columns=["hello", "world", "ones"],

2✔

34

    ).set_index(["hello", "world"])["ones"]

2✔

35

    assert isinstance(exp.index, pd.MultiIndex)

2✔

36

    assert_series_equal(exp, out)

2✔

39

    out = from_memberships([typ(["hello"]), typ(["world"])])

2✔

40

    exp = pd.DataFrame(

2✔

41

        [[True, False, 1], [False, True, 1]], columns=["hello", "world", "ones"]

2✔

42

    ).set_index(["hello", "world"])["ones"]

2✔

43

    assert_series_equal(exp, out)

2✔

44

    out = from_memberships([typ(["world"]), typ(["hello"])])

2✔

45

    exp = pd.DataFrame(

2✔

46

        [[False, True, 1], [True, False, 1]], columns=["hello", "world", "ones"]

2✔

47

    ).set_index(["hello", "world"])["ones"]

2✔

48

    assert_series_equal(exp, out)

2✔

51

@pytest.mark.parametrize(

2✔

52

    "data,ndim",

2✔

53

1✔

54

        ([1, 2, 3, 4], 1),

2✔

55

        (np.array([1, 2, 3, 4]), 1),

2✔

56

        (pd.Series([1, 2, 3, 4], name="foo"), 1),

2✔

57

        ([[1, "a"], [2, "b"], [3, "c"], [4, "d"]], 2),

2✔

58

1✔

59

            pd.DataFrame(

2✔

60

                [[1, "a"], [2, "b"], [3, "c"], [4, "d"]],

2✔

61

                columns=["foo", "bar"],

2✔

62

                index=["q", "r", "s", "t"],

2✔

64

2,

2✔

68

def test_from_memberships_with_data(data, ndim):

1✔

69

    memberships = [[], ["hello"], ["world"], ["hello", "world"]]

2✔

70

    out = from_memberships(memberships, data=data)

2✔

71

    assert out is not data  # make sure frame is copied

2✔

72

    if hasattr(data, "loc") and np.asarray(data).dtype.kind in "ifb":

2✔

74

        if LooseVersion(pd.__version__) > LooseVersion("0.35"):

2✔

75

            assert out.values.base is np.asarray(data).base

2✔

76

    if ndim == 1:

2✔

77

        assert isinstance(out, pd.Series)

2✔

79

        assert isinstance(out, pd.DataFrame)

2✔

80

    assert_frame_equal(

2✔

81

        pd.DataFrame(out).reset_index(drop=True),

2✔

82

        pd.DataFrame(data).reset_index(drop=True),

2✔

84

    no_data = from_memberships(memberships=memberships)

2✔

85

    assert_index_equal(out.index, no_data.index)

2✔

87

    with pytest.raises(ValueError, match="length"):

2✔

88

        from_memberships(memberships[:-1], data=data)

2✔

91

@pytest.mark.parametrize(

2✔

92

    "data", [None, {"attr1": [3, 4, 5, 6, 7, 8], "attr2": list("qrstuv")}]

2✔

94

@pytest.mark.parametrize("typ", [set, list, tuple, iter])

2✔

95

@pytest.mark.parametrize("id_column", ["id", "blah"])

2✔

96

def test_from_contents_vs_memberships(data, typ, id_column):

1✔

97

    contents = OrderedDict(

2✔

98

1✔

99

            ("cat1", typ(["aa", "bb", "cc"])),

2✔

100

            ("cat2", typ(["cc", "dd"])),

2✔

101

            ("cat3", typ(["ee"])),

2✔

105

    data_df = pd.DataFrame(data, index=["aa", "bb", "cc", "dd", "ee", "ff"])

2✔

106

    baseline = from_contents(contents, data=data_df, id_column=id_column)

2✔

108

    expected = from_memberships(

2✔

109

        memberships=[{"cat1"}, {"cat1"}, {"cat1", "cat2"}, {"cat2"}, {"cat3"}, []],

2✔

110

        data=data_df,

2✔

112

    assert_series_equal(

2✔

113

        baseline[id_column].reset_index(drop=True),

2✔

114

        pd.Series(["aa", "bb", "cc", "dd", "ee", "ff"], name=id_column),

2✔

116

    baseline_without_id = baseline.drop([id_column], axis=1)

2✔

117

    assert_frame_equal(

2✔

118

        baseline_without_id,

2✔

119

        expected,

2✔

120

        check_column_type=baseline_without_id.shape[1] > 0,

2✔

124

def test_from_contents(typ=set, id_column="id"):

2✔

125

    contents = OrderedDict(

2✔

126

        [("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]

2✔

128

    empty_data = pd.DataFrame(index=["aa", "bb", "cc", "dd", "ee"])

2✔

129

    baseline = from_contents(contents, data=empty_data, id_column=id_column)

2✔

131

    out = from_contents(contents, id_column=id_column)

2✔

132

    assert_frame_equal(out.sort_values(id_column), baseline)

2✔

135

    out = from_contents(

2✔

136

        {"cat3": contents["cat3"], "cat2": contents["cat2"], "cat1": contents["cat1"]},

2✔

137

        data=empty_data,

2✔

138

        id_column=id_column,

2✔

140

    assert_frame_equal(out.reorder_levels(["cat1", "cat2", "cat3"]), baseline)

2✔

143

    out = from_contents(

2✔

144

1✔

145

            "cat1": contents["cat1"],

2✔

146

            "cat2": contents["cat2"],

2✔

147

            "cat3": contents["cat3"],

2✔

148

            "cat4": [],

2✔

150

        data=empty_data,

2✔

151

        id_column=id_column,

2✔

153

    assert not out.index.to_frame()["cat4"].any()  # cat4 should be all-false

2✔

154

    assert len(out.index.names) == 4

2✔

155

    out.index = out.index.to_frame().set_index(["cat1", "cat2", "cat3"]).index

2✔

156

    assert_frame_equal(out, baseline)

2✔

159

@pytest.mark.parametrize("id_column", ["id", "blah"])

2✔

160

def test_from_contents_invalid(id_column):

1✔

161

    contents = OrderedDict(

2✔

162

        [("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]

2✔

164

    with pytest.raises(ValueError, match="columns overlap"):

2✔

165

        from_contents(

2✔

166

            contents, data=pd.DataFrame({"cat1": [1, 2, 3, 4, 5]}), id_column=id_column

2✔

168

    with pytest.raises(ValueError, match="duplicate ids"):

2✔

169

        from_contents({"cat1": ["aa", "bb"], "cat2": ["dd", "dd"]}, id_column=id_column)

2✔

171

    with pytest.raises(ValueError, match="cannot be named"):

2✔

172

        from_contents(

2✔

173

1✔

174

                id_column: {"aa", "bb", "cc"},

2✔

175

                "cat2": {"cc", "dd"},

2✔

177

            id_column=id_column,

2✔

180

    with pytest.raises(ValueError, match="cannot contain"):

2✔

181

        from_contents(

2✔

182

            contents,

2✔

183

            data=pd.DataFrame(

2✔

184

                {id_column: [1, 2, 3, 4, 5]}, index=["aa", "bb", "cc", "dd", "ee"]

2✔

186

            id_column=id_column,

2✔

188

    with pytest.raises(ValueError, match="identifiers in contents"):

2✔

189

        from_contents({"cat1": ["aa"]}, data=pd.DataFrame([[1]]), id_column=id_column)

2✔

192

@pytest.mark.parametrize(

2✔

193

    "indicators,data,exc_type,match",

2✔

194

1✔

195

        (["a", "b"], None, ValueError, "data must be provided"),

2✔

196

        (lambda df: [True, False, True], None, ValueError, "data must be provided"),

2✔

197

        (["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),

2✔

198

        (("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),

2✔

199

        ({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),

2✔

200

1✔

201

            pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),

2✔

202

            {"A": [1, 2, 3]},

2✔

203

            ValueError,

2✔

204

            "all its values must be present",

2✔

208

def test_from_indicators_invalid(indicators, data, exc_type, match):

1✔

209

    with pytest.raises(exc_type, match=match):

2✔

210

        from_indicators(indicators=indicators, data=data)

2✔

213

@pytest.mark.parametrize(

2✔

214

    "indicators",

2✔

215

1✔

216

        pd.DataFrame({"cat1": [False, True, False]}),

2✔

217

        pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),

2✔

218

        {"cat1": [False, True, False]},

2✔

219

        lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},

2✔

222

@pytest.mark.parametrize(

2✔

223

    "data",

2✔

224

1✔

225

        pd.DataFrame({"val1": [3, 4, 5]}),

2✔

226

        pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),

2✔

227

        {"val1": [3, 4, 5]},

2✔

230

def test_from_indicators_equivalence(indicators, data):

1✔

231

    assert_frame_equal(

2✔

232

        from_indicators(indicators, data), from_memberships([[], ["cat1"], []], data)

2✔

236

def test_generate_data_warning():

2✔

237

    with pytest.warns(DeprecationWarning):

2✔

238

        generate_data()

2✔

jnothman / UpSetPlot / 7344254321

Source File
Press 'n' to go to next uncovered line, 'b' for previous

1	from collections import OrderedDict	2✔
2	import pytest	2✔
3	import pandas as pd	2✔
4	import numpy as np	2✔
5	from distutils.version import LooseVersion	2✔
6	from pandas.testing import assert_series_equal, assert_frame_equal, assert_index_equal	2✔
7	from upsetplot import from_memberships, from_contents, from_indicators, generate_data	2✔
8
9
10	@pytest.mark.parametrize("typ", [set, list, tuple, iter])	2✔
UNCOV 11	def test_from_memberships_no_data(typ):	1✔
12	with pytest.raises(ValueError, match="at least one category"):	2✔
13	from_memberships([])	2✔
14	with pytest.raises(ValueError, match="at least one category"):	2✔
15	from_memberships([[], []])	2✔
16	with pytest.raises(ValueError, match="strings"):	2✔
17	from_memberships([[1]])	2✔
18	with pytest.raises(ValueError, match="strings"):	2✔
19	from_memberships([[1, "str"]])	2✔
20	with pytest.raises(TypeError):	2✔
21	from_memberships([1])	2✔
22
23	out = from_memberships(	2✔
24	[	1✔
25	typ([]),	2✔
26	typ(["hello"]),	2✔
27	typ(["world"]),	2✔
28	typ(["hello", "world"]),	2✔
29	]
30	)
31	exp = pd.DataFrame(	2✔
32	[[False, False, 1], [True, False, 1], [False, True, 1], [True, True, 1]],	2✔
33	columns=["hello", "world", "ones"],	2✔
34	).set_index(["hello", "world"])["ones"]	2✔
35	assert isinstance(exp.index, pd.MultiIndex)	2✔
36	assert_series_equal(exp, out)	2✔
37
38	# test sorting by name
39	out = from_memberships([typ(["hello"]), typ(["world"])])	2✔
40	exp = pd.DataFrame(	2✔
41	[[True, False, 1], [False, True, 1]], columns=["hello", "world", "ones"]	2✔
42	).set_index(["hello", "world"])["ones"]	2✔
43	assert_series_equal(exp, out)	2✔
44	out = from_memberships([typ(["world"]), typ(["hello"])])	2✔
45	exp = pd.DataFrame(	2✔
46	[[False, True, 1], [True, False, 1]], columns=["hello", "world", "ones"]	2✔
47	).set_index(["hello", "world"])["ones"]	2✔
48	assert_series_equal(exp, out)	2✔
49
50
51	@pytest.mark.parametrize(	2✔
52	"data,ndim",	2✔
53	[	1✔
54	([1, 2, 3, 4], 1),	2✔
55	(np.array([1, 2, 3, 4]), 1),	2✔
56	(pd.Series([1, 2, 3, 4], name="foo"), 1),	2✔
57	([[1, "a"], [2, "b"], [3, "c"], [4, "d"]], 2),	2✔
58	(	1✔
59	pd.DataFrame(	2✔
60	[[1, "a"], [2, "b"], [3, "c"], [4, "d"]],	2✔
61	columns=["foo", "bar"],	2✔
62	index=["q", "r", "s", "t"],	2✔
63	),
64	2,	2✔
65	),
66	],
67	)
68	def test_from_memberships_with_data(data, ndim):	1✔
69	memberships = [[], ["hello"], ["world"], ["hello", "world"]]	2✔
70	out = from_memberships(memberships, data=data)	2✔
71	assert out is not data # make sure frame is copied	2✔
72	if hasattr(data, "loc") and np.asarray(data).dtype.kind in "ifb":	2✔
73	# but not deepcopied when possible
74	if LooseVersion(pd.__version__) > LooseVersion("0.35"):	2✔
75	assert out.values.base is np.asarray(data).base	2✔
76	if ndim == 1:	2✔
77	assert isinstance(out, pd.Series)	2✔
78	else:
79	assert isinstance(out, pd.DataFrame)	2✔
80	assert_frame_equal(	2✔
81	pd.DataFrame(out).reset_index(drop=True),	2✔
82	pd.DataFrame(data).reset_index(drop=True),	2✔
83	)
84	no_data = from_memberships(memberships=memberships)	2✔
85	assert_index_equal(out.index, no_data.index)	2✔
86
87	with pytest.raises(ValueError, match="length"):	2✔
88	from_memberships(memberships[:-1], data=data)	2✔
89
90
91	@pytest.mark.parametrize(	2✔
92	"data", [None, {"attr1": [3, 4, 5, 6, 7, 8], "attr2": list("qrstuv")}]	2✔
93	)
94	@pytest.mark.parametrize("typ", [set, list, tuple, iter])	2✔
95	@pytest.mark.parametrize("id_column", ["id", "blah"])	2✔
96	def test_from_contents_vs_memberships(data, typ, id_column):	1✔
97	contents = OrderedDict(	2✔
98	[	1✔
99	("cat1", typ(["aa", "bb", "cc"])),	2✔
100	("cat2", typ(["cc", "dd"])),	2✔
101	("cat3", typ(["ee"])),	2✔
102	]
103	)
104	# Note that ff is not present in contents
105	data_df = pd.DataFrame(data, index=["aa", "bb", "cc", "dd", "ee", "ff"])	2✔
106	baseline = from_contents(contents, data=data_df, id_column=id_column)	2✔
107	# compare from_contents to from_memberships
108	expected = from_memberships(	2✔
109	memberships=[{"cat1"}, {"cat1"}, {"cat1", "cat2"}, {"cat2"}, {"cat3"}, []],	2✔
110	data=data_df,	2✔
111	)
112	assert_series_equal(	2✔
113	baseline[id_column].reset_index(drop=True),	2✔
114	pd.Series(["aa", "bb", "cc", "dd", "ee", "ff"], name=id_column),	2✔
115	)
116	baseline_without_id = baseline.drop([id_column], axis=1)	2✔
117	assert_frame_equal(	2✔
118	baseline_without_id,	2✔
119	expected,	2✔
120	check_column_type=baseline_without_id.shape[1] > 0,	2✔
121	)
122
123
124	def test_from_contents(typ=set, id_column="id"):	2✔
125	contents = OrderedDict(	2✔
126	[("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]	2✔
127	)
128	empty_data = pd.DataFrame(index=["aa", "bb", "cc", "dd", "ee"])	2✔
129	baseline = from_contents(contents, data=empty_data, id_column=id_column)	2✔
130	# data=None
131	out = from_contents(contents, id_column=id_column)	2✔
132	assert_frame_equal(out.sort_values(id_column), baseline)	2✔
133
134	# unordered contents dict
135	out = from_contents(	2✔
136	{"cat3": contents["cat3"], "cat2": contents["cat2"], "cat1": contents["cat1"]},	2✔
137	data=empty_data,	2✔
138	id_column=id_column,	2✔
139	)
140	assert_frame_equal(out.reorder_levels(["cat1", "cat2", "cat3"]), baseline)	2✔
141
142	# empty category
143	out = from_contents(	2✔
144	{	1✔
145	"cat1": contents["cat1"],	2✔
146	"cat2": contents["cat2"],	2✔
147	"cat3": contents["cat3"],	2✔
148	"cat4": [],	2✔
149	},
150	data=empty_data,	2✔
151	id_column=id_column,	2✔
152	)
153	assert not out.index.to_frame()["cat4"].any() # cat4 should be all-false	2✔
154	assert len(out.index.names) == 4	2✔
155	out.index = out.index.to_frame().set_index(["cat1", "cat2", "cat3"]).index	2✔
156	assert_frame_equal(out, baseline)	2✔
157
158
159	@pytest.mark.parametrize("id_column", ["id", "blah"])	2✔
160	def test_from_contents_invalid(id_column):	1✔
161	contents = OrderedDict(	2✔
162	[("cat1", {"aa", "bb", "cc"}), ("cat2", {"cc", "dd"}), ("cat3", {"ee"})]	2✔
163	)
164	with pytest.raises(ValueError, match="columns overlap"):	2✔
165	from_contents(	2✔
166	contents, data=pd.DataFrame({"cat1": [1, 2, 3, 4, 5]}), id_column=id_column	2✔
167	)
168	with pytest.raises(ValueError, match="duplicate ids"):	2✔
169	from_contents({"cat1": ["aa", "bb"], "cat2": ["dd", "dd"]}, id_column=id_column)	2✔
170	# category named id
171	with pytest.raises(ValueError, match="cannot be named"):	2✔
172	from_contents(	2✔
173	{	1✔
174	id_column: {"aa", "bb", "cc"},	2✔
175	"cat2": {"cc", "dd"},	2✔
176	},
177	id_column=id_column,	2✔
178	)
179	# category named id
180	with pytest.raises(ValueError, match="cannot contain"):	2✔
181	from_contents(	2✔
182	contents,	2✔
183	data=pd.DataFrame(	2✔
184	{id_column: [1, 2, 3, 4, 5]}, index=["aa", "bb", "cc", "dd", "ee"]	2✔
185	),
186	id_column=id_column,	2✔
187	)
188	with pytest.raises(ValueError, match="identifiers in contents"):	2✔
189	from_contents({"cat1": ["aa"]}, data=pd.DataFrame([[1]]), id_column=id_column)	2✔
190
191
192	@pytest.mark.parametrize(	2✔
193	"indicators,data,exc_type,match",	2✔
194	[	1✔
195	(["a", "b"], None, ValueError, "data must be provided"),	2✔
196	(lambda df: [True, False, True], None, ValueError, "data must be provided"),	2✔
197	(["a", "unknown_col"], {"a": [1, 2, 3]}, KeyError, "unknown_col"),	2✔
198	(("a",), {"a": [1, 2, 3]}, ValueError, "tuple"),	2✔
199	({"cat1": [0, 1, 1]}, {"a": [1, 2, 3]}, ValueError, "must all be boolean"),	2✔
200	(	1✔
201	pd.DataFrame({"cat1": [True, False, True]}, index=["a", "b", "c"]),	2✔
202	{"A": [1, 2, 3]},	2✔
203	ValueError,	2✔
204	"all its values must be present",	2✔
205	),
206	],
207	)
208	def test_from_indicators_invalid(indicators, data, exc_type, match):	1✔
209	with pytest.raises(exc_type, match=match):	2✔
210	from_indicators(indicators=indicators, data=data)	2✔
211
212
213	@pytest.mark.parametrize(	2✔
214	"indicators",	2✔
215	[	1✔
216	pd.DataFrame({"cat1": [False, True, False]}),	2✔
217	pd.DataFrame({"cat1": [False, True, False]}, dtype="O"),	2✔
218	{"cat1": [False, True, False]},	2✔
219	lambda data: {"cat1": {pd.DataFrame(data).index.values[1]: True}},	2✔
220	],
221	)
222	@pytest.mark.parametrize(	2✔
223	"data",	2✔
224	[	1✔
225	pd.DataFrame({"val1": [3, 4, 5]}),	2✔
226	pd.DataFrame({"val1": [3, 4, 5]}, index=["a", "b", "c"]),	2✔
227	{"val1": [3, 4, 5]},	2✔
228	],
229	)
230	def test_from_indicators_equivalence(indicators, data):	1✔
231	assert_frame_equal(	2✔
232	from_indicators(indicators, data), from_memberships([[], ["cat1"], []], data)	2✔
233	)
234
235
236	def test_generate_data_warning():	2✔
237	with pytest.warns(DeprecationWarning):	2✔
238	generate_data()	2✔

jnothman / UpSetPlot / 7344254321

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous