9391

pending completion

Build # 9391

Build Type

Pull #3638

travis-ci

Committed by

web-flow

Commit Message

Drop first dimension when computing determinant of the Jacobian of the transformation.

Pull Request Pull Request #3638: Simple stick breaking (Formerly #3620)

Run Details

23 of 23 new or added lines in 1 file covered. (100.0%)

52178 of 100270 relevant lines covered (52.04%)

2.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/pymc3/tests/test_glm.py

import numpy as np
from numpy.testing import assert_equal

from .helpers import SeededTest
import pymc3
from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample
from pymc3 import families, GLM, LinearComponent
import pandas as pd

# Generate data
def generate_data(intercept, slope, size=700):
    x = np.linspace(-1, 1, size)
    y = intercept + x * slope
    return x, y


class TestGLM(SeededTest):
    @classmethod
    def setup_class(cls):
        super().setup_class()
        cls.intercept = 1
        cls.slope = 3
        cls.sd = .05
        x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000)
        cls.y_linear += np.random.normal(size=1000, scale=cls.sd)
        cls.data_linear = pd.DataFrame(dict(x=x_linear, y=cls.y_linear))

        x_logistic, y_logistic = generate_data(cls.intercept, cls.slope, size=3000)
        y_logistic = 1 / (1 + np.exp(-y_logistic))
        bern_trials = np.random.binomial(1, y_logistic)
        cls.data_logistic = dict(x=x_logistic, y=bern_trials)

        n_trials = np.random.randint(1, 20, size=y_logistic.shape)
        binom_trials = np.random.binomial(n_trials, y_logistic)
        cls.data_logistic2 = dict(x=x_logistic, y=binom_trials, n=n_trials)

    def test_linear_component(self):
        with Model() as model:
            lm = LinearComponent.from_formula('y ~ x', self.data_linear)
            sigma = Uniform('sigma', 0, 20)
            Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear)
            start = find_MAP(vars=[sigma])
            step = Slice(model.vars)
            trace = sample(500, tune=0, step=step, start=start,
                           progressbar=False, random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0

    def test_glm(self):
        with Model() as model:
            GLM.from_formula('y ~ x', self.data_linear)
            step = Slice(model.vars)
            trace = sample(500, step=step, tune=0, progressbar=False,
                           random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0
            assert round(abs(np.mean(trace['sd'])-self.sd), 1) == 0

    def test_glm_offset(self):
        offset = 1.
        with Model() as model:
            GLM.from_formula('y ~ x', self.data_linear, offset=offset)
            step = Slice(model.vars)
            trace = sample(500, step=step, tune=0, progressbar=False,
                           random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept+offset), 1) == 0

    def test_glm_link_func(self):
        with Model() as model:
            GLM.from_formula('y ~ x', self.data_logistic,
                    family=families.Binomial(link=families.logit))
            step = Slice(model.vars)
            trace = sample(1000, step=step, tune=0, progressbar=False,
                           random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0

    def test_glm_link_func2(self):
        with Model() as model:
            GLM.from_formula('y ~ x', self.data_logistic2,
                    family=families.Binomial(priors={'n': self.data_logistic2['n']}))
            trace = sample(1000, progressbar=False,
                           random_seed=self.random_seed)

            assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0
            assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0

    def test_more_than_one_glm_is_ok(self):
        with Model():
            GLM.from_formula('y ~ x', self.data_logistic,
                    family=families.Binomial(link=families.logit),
                    name='glm1')
            GLM.from_formula('y ~ x', self.data_logistic,
                    family=families.Binomial(link=families.logit),
                    name='glm2')

    def test_from_xy(self):
        with Model():
            GLM(self.data_logistic['x'],
                self.data_logistic['y'],
                family=families.Binomial(link=families.logit),
                name='glm1')

    def test_boolean_y(self):
        model = GLM.from_formula('y ~ x', pd.DataFrame(
                {'x': self.data_logistic['x'],
                 'y': self.data_logistic['y']}
            )
        )
        model_bool = GLM.from_formula('y ~ x', pd.DataFrame(
                {'x': self.data_logistic['x'],
                 'y': [bool(i) for i in self.data_logistic['y']]}
            )
        )
        assert_equal(model.y.observations, model_bool.y.observations)

    def test_glm_formula_from_calling_scope(self):
        """Formula can extract variables from the calling scope."""
        z = pd.Series([10, 20, 30])
        df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
        GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial())

    def test_linear_component_formula_from_calling_scope(self):
        """Formula can extract variables from the calling scope."""
        z = pd.Series([10, 20, 30])
        df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
        LinearComponent.from_formula("y ~ x + z", df)

1	import numpy as np	×
2	from numpy.testing import assert_equal	×
3
4	from .helpers import SeededTest	×
5	import pymc3	×
6	from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample	×
7	from pymc3 import families, GLM, LinearComponent	×
8	import pandas as pd	×
9
10	# Generate data
11	def generate_data(intercept, slope, size=700):	×
12	x = np.linspace(-1, 1, size)	×
13	y = intercept + x * slope	×
14	return x, y	×
15
16
17	class TestGLM(SeededTest):	×
18	@classmethod	×
19	def setup_class(cls):
20	super().setup_class()	×
21	cls.intercept = 1	×
22	cls.slope = 3	×
23	cls.sd = .05	×
24	x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000)	×
25	cls.y_linear += np.random.normal(size=1000, scale=cls.sd)	×
26	cls.data_linear = pd.DataFrame(dict(x=x_linear, y=cls.y_linear))	×
27
28	x_logistic, y_logistic = generate_data(cls.intercept, cls.slope, size=3000)	×
29	y_logistic = 1 / (1 + np.exp(-y_logistic))	×
30	bern_trials = np.random.binomial(1, y_logistic)	×
31	cls.data_logistic = dict(x=x_logistic, y=bern_trials)	×
32
33	n_trials = np.random.randint(1, 20, size=y_logistic.shape)	×
34	binom_trials = np.random.binomial(n_trials, y_logistic)	×
35	cls.data_logistic2 = dict(x=x_logistic, y=binom_trials, n=n_trials)	×
36
37	def test_linear_component(self):	×
38	with Model() as model:	×
39	lm = LinearComponent.from_formula('y ~ x', self.data_linear)	×
40	sigma = Uniform('sigma', 0, 20)	×
41	Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear)	×
42	start = find_MAP(vars=[sigma])	×
43	step = Slice(model.vars)	×
44	trace = sample(500, tune=0, step=step, start=start,	×
45	progressbar=False, random_seed=self.random_seed)
46
47	assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0	×
48	assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0	×
49	assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0	×
50
51	def test_glm(self):	×
52	with Model() as model:	×
53	GLM.from_formula('y ~ x', self.data_linear)	×
54	step = Slice(model.vars)	×
55	trace = sample(500, step=step, tune=0, progressbar=False,	×
56	random_seed=self.random_seed)
57
58	assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0	×
59	assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0	×
60	assert round(abs(np.mean(trace['sd'])-self.sd), 1) == 0	×
61
62	def test_glm_offset(self):	×
63	offset = 1.	×
64	with Model() as model:	×
65	GLM.from_formula('y ~ x', self.data_linear, offset=offset)	×
66	step = Slice(model.vars)	×
67	trace = sample(500, step=step, tune=0, progressbar=False,	×
68	random_seed=self.random_seed)
69
70	assert round(abs(np.mean(trace['Intercept'])-self.intercept+offset), 1) == 0	×
71
72	def test_glm_link_func(self):	×
73	with Model() as model:	×
74	GLM.from_formula('y ~ x', self.data_logistic,	×
75	family=families.Binomial(link=families.logit))
76	step = Slice(model.vars)	×
77	trace = sample(1000, step=step, tune=0, progressbar=False,	×
78	random_seed=self.random_seed)
79
80	assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0	×
81	assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0	×
82
83	def test_glm_link_func2(self):	×
84	with Model() as model:	×
85	GLM.from_formula('y ~ x', self.data_logistic2,	×
86	family=families.Binomial(priors={'n': self.data_logistic2['n']}))
87	trace = sample(1000, progressbar=False,	×
88	random_seed=self.random_seed)
89
90	assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0	×
91	assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0	×
92
93	def test_more_than_one_glm_is_ok(self):	×
94	with Model():	×
95	GLM.from_formula('y ~ x', self.data_logistic,	×
96	family=families.Binomial(link=families.logit),
97	name='glm1')
98	GLM.from_formula('y ~ x', self.data_logistic,	×
99	family=families.Binomial(link=families.logit),
100	name='glm2')
101
102	def test_from_xy(self):	×
103	with Model():	×
104	GLM(self.data_logistic['x'],	×
105	self.data_logistic['y'],
106	family=families.Binomial(link=families.logit),
107	name='glm1')
108
109	def test_boolean_y(self):	×
110	model = GLM.from_formula('y ~ x', pd.DataFrame(	×
111	{'x': self.data_logistic['x'],
112	'y': self.data_logistic['y']}
113	)
114	)
115	model_bool = GLM.from_formula('y ~ x', pd.DataFrame(	×
116	{'x': self.data_logistic['x'],
117	'y': [bool(i) for i in self.data_logistic['y']]}
118	)
119	)
120	assert_equal(model.y.observations, model_bool.y.observations)	×
121
122	def test_glm_formula_from_calling_scope(self):	×
123	"""Formula can extract variables from the calling scope."""
124	z = pd.Series([10, 20, 30])	×
125	df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})	×
126	GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial())	×
127
128	def test_linear_component_formula_from_calling_scope(self):	×
129	"""Formula can extract variables from the calling scope."""
130	z = pd.Series([10, 20, 30])	×
131	df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})	×
132	LinearComponent.from_formula("y ~ x + z", df)	×

pymc-devs / pymc3 / 9391

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous