3214

pending completion

Build # 3214

Build Type

push

travis-ci-com

Committed by

chschan

Commit Message

Untracked: add dependency for car@3.1-0

Run Details

1039 of 1117 relevant lines covered (93.02%)

479.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.34

/R/calcpvalue.R

#' @importFrom stats pt qnorm pchisq
#' @importFrom survey svydesign svyranktest
#' @importFrom stats kruskal.test
#' @importFrom verbs Sum
calcPvaluesForOneVariable <- function(x, group, weights, is.binary = FALSE, non.parametric = FALSE)
{
    if (!is.factor(group))
        group <- factor(group)
    n.levels <- nlevels(group)
    levs <- levels(group)

    pval <- rep(NA, n.levels)
    if (n.levels < 2)
        return(pval)
    for (i in 1:n.levels)
    {
        y <- group == levs[i]
        if (non.parametric && !is.binary)
        {
            if (is.null(weights))
                pval[i] <- kruskal.test(x, y)$p.value
            else
            {
                df <- data.frame(x = x, y = y, w = weights)
                pval[i] <- svyranktest(x ~ y, svydesign(ids = ~1, weights = ~w, data = df))$p.value
            }
        }
        else
            pval[i] <- calcPvaluesForOneVarOneLevel(x, x.is.binary = is.binary, y = y, w = weights)
    }
    return(pval)
}

calcPvaluesForOneVarOneLevel = function(x,             # A binary or numeric variable
                      x.is.binary = TRUE,       # TRUE if x is a binary variable
                      y,                        # A binary variable
                      w = rep(1, length(x)))    # weight variable (same length as x)
{
    if (length(w) <= 1)
        w <- rep(1, length(x))

    Filter = function(x, f)
    {
        if (is.null(f))
            return(NULL)
        x[f]
    }

    # Identifying missing values; these are values that are:
    # - Missing in  x (e.g., if x is Pick Any and x = Coke | Pepsi,
    #       if either coke or Pepsi have missing values, then x is missing.
    # - Missing in y
    # - Missing or <= 0 in w
    m = is.na(x) | if(is.null(y)) FALSE else is.na(y) | is.na(w) | w <= 0
    x = Filter(x, !m)
    y = Filter(y, !m)
    w = Filter(w, !m)

    filters = list(which(y == 1), which(y == 0))
    a = computeNumericVarStats(Filter(x, filters[[1]]), Filter(w, filters[[1]]))
    b = computeNumericVarStats(Filter(x, filters[[2]]), Filter(w, filters[[2]]))

    # Here, the test.type for SegmentComparisonTable is determined based on
    # variable type but we will update this later to use the statistical
    # assumptions in QSettings
    # Need to update function signature and tests
    test.type <- if (!x.is.binary) "tTest" else "Nonparametric"
    return(compareTwoSamples(test.type, a, b, x.is.binary,
           is.weighted = length(unique(w)) > 1, bessel = 0))
}

# a and b are lists which contain the summary statistics of each sample
compareTwoSamples <- function(test.type, a, b,
    is.binary = TRUE, is.weighted = FALSE, bessel = 0, dEff = 1)
{
    if (test.type == "tTest")
        return(tTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],
            b[["Standard Error"]], a[["Base n"]], b[["Base n"]],
            is.binary, is.weighted, bessel, dEff))
    else if (test.type == "zTest")
        return(zTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],
            b[["Standard Error"]], a[["Base n"]], b[["Base n"]],
            is.binary, is.weighted, bessel))
    else # Nonparametric or Chisquare
        return(raoScottSecondOrderChiSquareTest(a, b, is.weighted))
    # Non-parametric tests for numeric variables are handled before
    # getting to this function
}


# Functions - these are all from the c# SamplingVariance class (albeit in slightly different forms)
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/StandardErrors/SamplingVariance.cs
computeVariances <- function(mean, is.binary, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.observations)
{
    if (is.binary) # Numerical precision
    {
        if (mean < 0.00000001)
            mean = 0
        else if (mean > 0.99999999)
            mean = 1
    }
    bessel.correction = n.observations / (n.observations - 1)
    mean2 = mean * mean
    sum_of_squares = sum.xxw - 2 * mean * sum.xw + mean2 * sum.w
    sum_of_squares.w = sum.xxww - 2 * mean * sum.xww + mean2 * sum.ww

    taylor = sum_of_squares.w / (sum.w * sum.w) * bessel.correction

    naive = if (is.binary) mean * (1 - mean) else sum_of_squares / sum.w
    naive = naive * bessel.correction / n.observations
    ess = if (!is.na(taylor) && taylor < 0.000001) # Due to numeric precision issues
        sum.w * sum.w / sum.ww
        else n.observations * naive / taylor

    list(taylor = taylor,
         naive = naive,
         ess = ess,
         se = sqrt(taylor))
}

# A simplification of RaoScottSecondOrder2b2 from Q's C#
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/ChiSquareTests.cs
# aa and bb contain summary statistics for each sample
#' @importFrom verbs Sum SumEachRow SumEachColumn
raoScottSecondOrderChiSquareTest <- function(aa, bb, is.weighted)
{
    if (!is.null(dim(aa[["Average"]])))
        pvals <- matrix(NA, nrow = NROW(aa[["Average"]]), ncol = NCOL(aa[["Average"]]))
    else
        pvals <- rep(NA, length = length(aa[["Average"]]))
    if (length(aa[["Average"]]) == 0)
        return(pvals)

    for (i in 1:length(aa[["Average"]]))
    {
        n.observations <- aa[["Base n"]][i] + bb[["Base n"]][i]
        if (is.weighted)
        {
            sum.w <- aa[["sumW"]][i] + bb[["sumW"]][i]
            sum.ww <- aa[["sumWW"]][i] + bb[["sumWW"]][i]
            p.a <- aa[["sumW"]][i] / sum.w
        } else
            p.a <- aa[["Base n"]][i] / n.observations
        p.b <- 1 - p.a

        proportiony = p.a
        mean.a <- aa[["Average"]][i]
        if (is.na(mean.a))
            mean.a <- 0
        mean.b <- bb[["Average"]][i]
        if (is.na(mean.b))
            mean.b <- 0

        sums.ww <- c(bb[["sumWW"]][i] - bb[["sumXWW"]][i], bb[["sumXWW"]][i],
                     aa[["sumWW"]][i] - aa[["sumXWW"]][i], aa[["sumXWW"]][i])
        proportions <- c(p.b * (1-mean.b), p.b * mean.b,
                         p.a * (1-mean.a), p.a * mean.a)
        counts = matrix(proportions * n.observations, 2)

        # If not weighted, this reduces to a chi-square test
        group_sizes = SumEachColumn(counts, remove.missing = FALSE)
        row.totals = SumEachRow(counts, remove.missing = FALSE)
        total = Sum(row.totals, remove.missing = FALSE)

        expected = matrix(c(group_sizes[1]*row.totals[1]/total,
                            group_sizes[1]*row.totals[2]/total,
                            group_sizes[2]*row.totals[1]/total,
                            group_sizes[2]*row.totals[2]/total), 2)

        pearson.statistic = Sum((counts - expected)^2/expected, remove.missing = FALSE)
        if (!is.weighted)
            pvals[i] <- pchisq(pearson.statistic, 1, lower.tail = FALSE)
        else
        {
            variance = multinomialCovarianceMatrix(proportions,
                    sums.ww, sum.ww, sum.w, n.observations)
            if (!is.na(pearson.statistic)) # If not a missing value
            {
                a = matrix(0, 4, 1)
                id_mat = d_mat = matrix(0, 4, 4)
                denominator = 0.0;
                for (j in 1:4)
                {
                    prop = proportions[j];
                    d_mat[j, j] = prop;
                    prop_is_0 = prop < 1e-12
                    j_prop = if (prop_is_0) 0 else 1.0 / prop;
                    if (!prop_is_0) id_mat[j, j] = j_prop;
                    a[j, 1] = j_prop / 4.0;
                    denominator = denominator + j_prop;
                }
                a[2, 1] = -a[2, 1];
                a[3, 1] = -a[3, 1];
                denominator = denominator * .0625 / n.observations;
                numerator = t(a) %*% variance %*% a
                delta = numerator / denominator;
                f = pearson.statistic / delta
            } else
                f <- NA
            pvals[i] <- (1 - pf(f, 1, n.observations - 1))
        }
    }
    return(pvals)
}


# Code modified from https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/TTests.cs and
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/CombinedZandTTest.cs
tTest <- function(mean1, mean2, se1, se2, n1, n2,
                  is.binary, is.weighted, bessel = 0, dEff = 1)
{
    if (!is.binary)
    {
        v1 <- se1 * se1
        v2 <- se2 * se2
        v <- v1 + v2
        se <- sqrt(v)
        df <- v * v / (v1 * v1 / (n1 - 1) + v2 * v2 / (n2 - 1))

    } else if (is.binary && !is.weighted)
    {
        m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)
        se <- sqrt(m12 * (1 - m12) * (1/n1 + 1/n2))
        df <- n1 + n2 - 2

    } else if (is.binary && is.weighted)
    {
        se <- sqrt(dEff * (se1 * se1 + se2 * se2))
        df <- (se1^2 + se2^2)^2 /
            ((se1^4)/(n1-bessel) + (se2^4)/(n2-bessel))
    }
    t = (mean1 - mean2)/se
    p = pt(-abs(t), df) * 2
    return(p)
}

#' @importFrom stats pnorm
zTest <- function(mean1, mean2, se1, se2, n1, n2, is.binary, is.weighted, bessel = 0)
{
    if (!is.binary)
        se <- sqrt(se1 * se1 + se2 * se2)
    else if(is.binary && !is.weighted)
    {
        m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)
        se <- sqrt(m12 * (1 - m12) * (n1 + n2) / (n1 + n2 - 2*bessel) * (1/n1 + 1/n2))

    } else if (is.binary && is.weighted)
    {
        se <- sqrt(se1 * se1 + se2 * se2)
    }
    z = (mean1 - mean2)/se
    p = pnorm(-abs(z)) * 2
    return(p)
}


multinomialCovarianceMatrix <- function(proportions, ww, ww_total, w_total, n)
{
    k =length(proportions)
    covariance = matrix(0, k, k)
    for (r in 1:4)
    {
        for (c in 1:4)
        {
            p1 = proportions[r];
            p2 = proportions[c];
            ww1 = ww[r];
            ww2 = ww[c];
            sc = if(r == c) computeSamplingVarianceForProportion(p1, ww1, ww_total, w_total, n)
                 else       samplingCovariance(p1, p2, ww1, ww2, ww_total, w_total, n)
            covariance[c, r] = covariance[r, c] = sc
        }
    }
    return(covariance)
}

computeSamplingVarianceForProportion <- function(input_proportion, ww, ww_total, w_total,sample_size)
{
    proportion = input_proportion
    if (is.na(proportion)) {
        proportion <- NA
    } else if (proportion < 1E-8) {
        proportion = 0.0;
    } else if (proportion > 1 - 1e-8)
        proportion = 1.0;
    sumSquaredWeights = ww_total;
    n = sample_size;
    mean = proportion;
    complement = (1.0 - proportion)
    variance = proportion * complement
    population = w_total;
    variance = variance * sample_size/(sample_size - 1.0);
    ww_sums_of_squares = complement * complement * ww + proportion * proportion * (ww_total - ww)
    return(ww_sums_of_squares / (w_total * w_total) * sample_size/(sample_size - 1.0))
}

# From the C# SamplingVariance(double proportion1, double proportion2, double ww1, double ww2, double ww_total, double w_total, int n, StatisticalAssumptions statistical_assumptions)
samplingCovariance <- function(proportion1,  proportion2,  ww1,  ww2,  ww_total,  w_total, n)
{
    ww_sums_of_squares = -proportion1 * -proportion2 * (ww_total - ww1 - ww2) + -proportion1 * (1 - proportion2) * ww2 + -proportion2 * (1 - proportion1) * ww1
    return(ww_sums_of_squares / (w_total * w_total) * (n / (n - 1)))
}

#' @importFrom verbs Sum
computeNumericVarStats <- function(x, w)
{
    n.observations <- length(x)
    ww = w * w
    xw = x * w
    xxw = x * xw
    xww = xw * w
    xxww = xxw * w
    sum.w = Sum(w, remove.missing = FALSE)
    sum.xw = Sum(xw, remove.missing = FALSE)
    sum.ww = Sum(ww, remove.missing = FALSE)
    sum.xxw = Sum(xxw, remove.missing = FALSE)
    sum.xww = Sum(xww, remove.missing = FALSE)
    sum.xxww = Sum(xxww, remove.missing = FALSE)
    mean.x = sum.xw / sum.w

    n.used.in.bessel.correction = n.observations
    var = computeVariances(mean.x, FALSE, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.used.in.bessel.correction)

    return(c("Average" = mean.x, "Base n" = n.observations,
        sumW = sum.w, sumWW = sum.ww, sumXWW = sum.xww,
        "Standard Error" = var$se))
}

1	#' @importFrom stats pt qnorm pchisq
2	#' @importFrom survey svydesign svyranktest
3	#' @importFrom stats kruskal.test
4	#' @importFrom verbs Sum
5	calcPvaluesForOneVariable <- function(x, group, weights, is.binary = FALSE, non.parametric = FALSE)
6	{
7	if (!is.factor(group))	438✔
8	group <- factor(group)	6✔
9	n.levels <- nlevels(group)	438✔
10	levs <- levels(group)	438✔
11
12	pval <- rep(NA, n.levels)	438✔
13	if (n.levels < 2)	438✔
14	return(pval)	×
15	for (i in 1:n.levels)	438✔
16	{
17	y <- group == levs[i]	3,565✔
18	if (non.parametric && !is.binary)	3,565✔
19	{
20	if (is.null(weights))	4✔
21	pval[i] <- kruskal.test(x, y)$p.value	2✔
22	else
23	{
24	df <- data.frame(x = x, y = y, w = weights)	2✔
25	pval[i] <- svyranktest(x ~ y, svydesign(ids = ~1, weights = ~w, data = df))$p.value	2✔
26	}
27	}
28	else
29	pval[i] <- calcPvaluesForOneVarOneLevel(x, x.is.binary = is.binary, y = y, w = weights)	3,561✔
30	}
31	return(pval)	438✔
32	}
33
34	calcPvaluesForOneVarOneLevel = function(x, # A binary or numeric variable
35	x.is.binary = TRUE, # TRUE if x is a binary variable
36	y, # A binary variable
37	w = rep(1, length(x))) # weight variable (same length as x)
38	{
39	if (length(w) <= 1)	3,561✔
40	w <- rep(1, length(x))	3,167✔
41
42	Filter = function(x, f)	3,561✔
43	{
44	if (is.null(f))	24,927✔
45	return(NULL)	×
46	x[f]	24,927✔
47	}
48
49	# Identifying missing values; these are values that are:
50	# - Missing in x (e.g., if x is Pick Any and x = Coke \| Pepsi,
51	# if either coke or Pepsi have missing values, then x is missing.
52	# - Missing in y
53	# - Missing or <= 0 in w
54	m = is.na(x) \| if(is.null(y)) FALSE else is.na(y) \| is.na(w) \| w <= 0	3,561✔
55	x = Filter(x, !m)	3,561✔
56	y = Filter(y, !m)	3,561✔
57	w = Filter(w, !m)	3,561✔
58
59	filters = list(which(y == 1), which(y == 0))	3,561✔
60	a = computeNumericVarStats(Filter(x, filters[[1]]), Filter(w, filters[[1]]))	3,561✔
61	b = computeNumericVarStats(Filter(x, filters[[2]]), Filter(w, filters[[2]]))	3,561✔
62
63	# Here, the test.type for SegmentComparisonTable is determined based on
64	# variable type but we will update this later to use the statistical
65	# assumptions in QSettings
66	# Need to update function signature and tests
67	test.type <- if (!x.is.binary) "tTest" else "Nonparametric"	3,561✔
68	return(compareTwoSamples(test.type, a, b, x.is.binary,	3,561✔
69	is.weighted = length(unique(w)) > 1, bessel = 0))	3,561✔
70	}
71
72	# a and b are lists which contain the summary statistics of each sample
73	compareTwoSamples <- function(test.type, a, b,
74	is.binary = TRUE, is.weighted = FALSE, bessel = 0, dEff = 1)
75	{
76	if (test.type == "tTest")	3,582✔
77	return(tTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],	1,222✔
78	b[["Standard Error"]], a[["Base n"]], b[["Base n"]],	1,222✔
79	is.binary, is.weighted, bessel, dEff))	1,222✔
80	else if (test.type == "zTest")	2,360✔
81	return(zTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],	12✔
82	b[["Standard Error"]], a[["Base n"]], b[["Base n"]],	12✔
83	is.binary, is.weighted, bessel))	12✔
84	else # Nonparametric or Chisquare
85	return(raoScottSecondOrderChiSquareTest(a, b, is.weighted))	2,348✔
86	# Non-parametric tests for numeric variables are handled before
87	# getting to this function
88	}
89
90
91	# Functions - these are all from the c# SamplingVariance class (albeit in slightly different forms)
92	# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/StandardErrors/SamplingVariance.cs
93	computeVariances <- function(mean, is.binary, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.observations)
94	{
95	if (is.binary) # Numerical precision	7,122✔
96	{
97	if (mean < 0.00000001)	×
98	mean = 0	×
99	else if (mean > 0.99999999)	×
100	mean = 1	×
101	}
102	bessel.correction = n.observations / (n.observations - 1)	7,122✔
103	mean2 = mean * mean	7,122✔
104	sum_of_squares = sum.xxw - 2 * mean * sum.xw + mean2 * sum.w	7,122✔
105	sum_of_squares.w = sum.xxww - 2 * mean * sum.xww + mean2 * sum.ww	7,122✔
106
107	taylor = sum_of_squares.w / (sum.w * sum.w) * bessel.correction	7,122✔
108
109	naive = if (is.binary) mean * (1 - mean) else sum_of_squares / sum.w	7,122✔
110	naive = naive * bessel.correction / n.observations	7,122✔
111	ess = if (!is.na(taylor) && taylor < 0.000001) # Due to numeric precision issues	7,122✔
112	sum.w * sum.w / sum.ww	7,122✔
113	else n.observations * naive / taylor	7,122✔
114
115	list(taylor = taylor,	7,122✔
116	naive = naive,	7,122✔
117	ess = ess,	7,122✔
118	se = sqrt(taylor))	7,122✔
119	}
120
121	# A simplification of RaoScottSecondOrder2b2 from Q's C#
122	# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/ChiSquareTests.cs
123	# aa and bb contain summary statistics for each sample
124	#' @importFrom verbs Sum SumEachRow SumEachColumn
125	raoScottSecondOrderChiSquareTest <- function(aa, bb, is.weighted)
126	{
127	if (!is.null(dim(aa[["Average"]])))	2,348✔
128	pvals <- matrix(NA, nrow = NROW(aa[["Average"]]), ncol = NCOL(aa[["Average"]]))	×
129	else
130	pvals <- rep(NA, length = length(aa[["Average"]]))	2,348✔
131	if (length(aa[["Average"]]) == 0)	2,348✔
132	return(pvals)	×
133
134	for (i in 1:length(aa[["Average"]]))	2,348✔
135	{
136	n.observations <- aa[["Base n"]][i] + bb[["Base n"]][i]	2,356✔
137	if (is.weighted)	2,356✔
138	{
139	sum.w <- aa[["sumW"]][i] + bb[["sumW"]][i]	194✔
140	sum.ww <- aa[["sumWW"]][i] + bb[["sumWW"]][i]	194✔
141	p.a <- aa[["sumW"]][i] / sum.w	194✔
142	} else
143	p.a <- aa[["Base n"]][i] / n.observations	2,162✔
144	p.b <- 1 - p.a	2,356✔
145
146	proportiony = p.a	2,356✔
147	mean.a <- aa[["Average"]][i]	2,356✔
148	if (is.na(mean.a))	2,356✔
149	mean.a <- 0	28✔
150	mean.b <- bb[["Average"]][i]	2,356✔
151	if (is.na(mean.b))	2,356✔
152	mean.b <- 0	9✔
153
154	sums.ww <- c(bb[["sumWW"]][i] - bb[["sumXWW"]][i], bb[["sumXWW"]][i],	2,356✔
155	aa[["sumWW"]][i] - aa[["sumXWW"]][i], aa[["sumXWW"]][i])	2,356✔
156	proportions <- c(p.b * (1-mean.b), p.b * mean.b,	2,356✔
157	p.a * (1-mean.a), p.a * mean.a)	2,356✔
158	counts = matrix(proportions * n.observations, 2)	2,356✔
159
160	# If not weighted, this reduces to a chi-square test
161	group_sizes = SumEachColumn(counts, remove.missing = FALSE)	2,356✔
162	row.totals = SumEachRow(counts, remove.missing = FALSE)	2,356✔
163	total = Sum(row.totals, remove.missing = FALSE)	2,356✔
164
165	expected = matrix(c(group_sizes[1]*row.totals[1]/total,	2,356✔
166	group_sizes[1]*row.totals[2]/total,	2,356✔
167	group_sizes[2]*row.totals[1]/total,	2,356✔
168	group_sizes[2]*row.totals[2]/total), 2)	2,356✔
169
170	pearson.statistic = Sum((counts - expected)^2/expected, remove.missing = FALSE)	2,356✔
171	if (!is.weighted)	2,356✔
172	pvals[i] <- pchisq(pearson.statistic, 1, lower.tail = FALSE)	2,162✔
173	else
174	{
175	variance = multinomialCovarianceMatrix(proportions,	194✔
176	sums.ww, sum.ww, sum.w, n.observations)	194✔
177	if (!is.na(pearson.statistic)) # If not a missing value	194✔
178	{
179	a = matrix(0, 4, 1)	112✔
180	id_mat = d_mat = matrix(0, 4, 4)	112✔
181	denominator = 0.0;	112✔
182	for (j in 1:4)	112✔
183	{
184	prop = proportions[j];	448✔
185	d_mat[j, j] = prop;	448✔
186	prop_is_0 = prop < 1e-12	448✔
187	j_prop = if (prop_is_0) 0 else 1.0 / prop;	448✔
188	if (!prop_is_0) id_mat[j, j] = j_prop;	408✔
189	a[j, 1] = j_prop / 4.0;	448✔
190	denominator = denominator + j_prop;	448✔
191	}
192	a[2, 1] = -a[2, 1];	112✔
193	a[3, 1] = -a[3, 1];	112✔
194	denominator = denominator * .0625 / n.observations;	112✔
195	numerator = t(a) %% variance %% a	112✔
196	delta = numerator / denominator;	112✔
197	f = pearson.statistic / delta	112✔
198	} else
199	f <- NA	82✔
200	pvals[i] <- (1 - pf(f, 1, n.observations - 1))	194✔
201	}
202	}
203	return(pvals)	2,348✔
204	}
205
206
207	# Code modified from https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/TTests.cs and
208	# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/CombinedZandTTest.cs
209	tTest <- function(mean1, mean2, se1, se2, n1, n2,
210	is.binary, is.weighted, bessel = 0, dEff = 1)
211	{
212	if (!is.binary)	1,222✔
213	{
214	v1 <- se1 * se1	1,219✔
215	v2 <- se2 * se2	1,219✔
216	v <- v1 + v2	1,219✔
217	se <- sqrt(v)	1,219✔
218	df <- v * v / (v1 * v1 / (n1 - 1) + v2 * v2 / (n2 - 1))	1,219✔
219
220	} else if (is.binary && !is.weighted)	3✔
221	{
222	m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)	1✔
223	se <- sqrt(m12 * (1 - m12) * (1/n1 + 1/n2))	1✔
224	df <- n1 + n2 - 2	1✔
225
226	} else if (is.binary && is.weighted)	2✔
227	{
228	se <- sqrt(dEff * (se1 * se1 + se2 * se2))	2✔
229	df <- (se1^2 + se2^2)^2 /	2✔
230	((se1^4)/(n1-bessel) + (se2^4)/(n2-bessel))	2✔
231	}
232	t = (mean1 - mean2)/se	1,222✔
233	p = pt(-abs(t), df) * 2	1,222✔
234	return(p)	1,222✔
235	}
236
237	#' @importFrom stats pnorm
238	zTest <- function(mean1, mean2, se1, se2, n1, n2, is.binary, is.weighted, bessel = 0)
239	{
240	if (!is.binary)	12✔
241	se <- sqrt(se1 * se1 + se2 * se2)	2✔
242	else if(is.binary && !is.weighted)	10✔
243	{
244	m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)	8✔
245	se <- sqrt(m12 * (1 - m12) * (n1 + n2) / (n1 + n2 - 2bessel) (1/n1 + 1/n2))	8✔
246
247	} else if (is.binary && is.weighted)	2✔
248	{
249	se <- sqrt(se1 * se1 + se2 * se2)	2✔
250	}
251	z = (mean1 - mean2)/se	12✔
252	p = pnorm(-abs(z)) * 2	12✔
253	return(p)	12✔
254	}
255
256
257	multinomialCovarianceMatrix <- function(proportions, ww, ww_total, w_total, n)
258	{
259	k =length(proportions)	194✔
260	covariance = matrix(0, k, k)	194✔
261	for (r in 1:4)	194✔
262	{
263	for (c in 1:4)	776✔
264	{
265	p1 = proportions[r];	3,104✔
266	p2 = proportions[c];	3,104✔
267	ww1 = ww[r];	3,104✔
268	ww2 = ww[c];	3,104✔
269	sc = if(r == c) computeSamplingVarianceForProportion(p1, ww1, ww_total, w_total, n)	3,104✔
270	else samplingCovariance(p1, p2, ww1, ww2, ww_total, w_total, n)	3,104✔
271	covariance[c, r] = covariance[r, c] = sc	3,104✔
272	}
273	}
274	return(covariance)	194✔
275	}
276
277	computeSamplingVarianceForProportion <- function(input_proportion, ww, ww_total, w_total,sample_size)
278	{
279	proportion = input_proportion	776✔
280	if (is.na(proportion)) {	776✔
281	proportion <- NA	40✔
282	} else if (proportion < 1E-8) {	736✔
283	proportion = 0.0;	184✔
284	} else if (proportion > 1 - 1e-8)	552✔
285	proportion = 1.0;	×
286	sumSquaredWeights = ww_total;	776✔
287	n = sample_size;	776✔
288	mean = proportion;	776✔
289	complement = (1.0 - proportion)	776✔
290	variance = proportion * complement	776✔
291	population = w_total;	776✔
292	variance = variance * sample_size/(sample_size - 1.0);	776✔
293	ww_sums_of_squares = complement * complement * ww + proportion * proportion * (ww_total - ww)	776✔
294	return(ww_sums_of_squares / (w_total * w_total) * sample_size/(sample_size - 1.0))	776✔
295	}
296
297	# From the C# SamplingVariance(double proportion1, double proportion2, double ww1, double ww2, double ww_total, double w_total, int n, StatisticalAssumptions statistical_assumptions)
298	samplingCovariance <- function(proportion1, proportion2, ww1, ww2, ww_total, w_total, n)
299	{
300	ww_sums_of_squares = -proportion1 * -proportion2 * (ww_total - ww1 - ww2) + -proportion1 * (1 - proportion2) * ww2 + -proportion2 * (1 - proportion1) * ww1	2,328✔
301	return(ww_sums_of_squares / (w_total * w_total) * (n / (n - 1)))	2,328✔
302	}
303
304	#' @importFrom verbs Sum
305	computeNumericVarStats <- function(x, w)
306	{
307	n.observations <- length(x)	7,122✔
308	ww = w * w	7,122✔
309	xw = x * w	7,122✔
310	xxw = x * xw	7,122✔
311	xww = xw * w	7,122✔
312	xxww = xxw * w	7,122✔
313	sum.w = Sum(w, remove.missing = FALSE)	7,122✔
314	sum.xw = Sum(xw, remove.missing = FALSE)	7,122✔
315	sum.ww = Sum(ww, remove.missing = FALSE)	7,122✔
316	sum.xxw = Sum(xxw, remove.missing = FALSE)	7,122✔
317	sum.xww = Sum(xww, remove.missing = FALSE)	7,122✔
318	sum.xxww = Sum(xxww, remove.missing = FALSE)	7,122✔
319	mean.x = sum.xw / sum.w	7,122✔
320
321	n.used.in.bessel.correction = n.observations	7,122✔
322	var = computeVariances(mean.x, FALSE, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.used.in.bessel.correction)	7,122✔
323
324	return(c("Average" = mean.x, "Base n" = n.observations,	7,122✔
325	sumW = sum.w, sumWW = sum.ww, sumXWW = sum.xww,	7,122✔
326	"Standard Error" = var$se))	7,122✔
327	}

Displayr / flipAnalysisOfVariance / 3214

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous