• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Displayr / flipAnalysisOfVariance / 3214

pending completion
3214

push

travis-ci-com

chschan
Untracked: add dependency for car@3.1-0

1039 of 1117 relevant lines covered (93.02%)

479.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.34
/R/calcpvalue.R
1
#' @importFrom stats pt qnorm pchisq
2
#' @importFrom survey svydesign svyranktest
3
#' @importFrom stats kruskal.test
4
#' @importFrom verbs Sum
5
calcPvaluesForOneVariable <- function(x, group, weights, is.binary = FALSE, non.parametric = FALSE)
6
{
7
    if (!is.factor(group))
438✔
8
        group <- factor(group)
6✔
9
    n.levels <- nlevels(group)
438✔
10
    levs <- levels(group)
438✔
11

12
    pval <- rep(NA, n.levels)
438✔
13
    if (n.levels < 2)
438✔
14
        return(pval)
×
15
    for (i in 1:n.levels)
438✔
16
    {
17
        y <- group == levs[i]
3,565✔
18
        if (non.parametric && !is.binary)
3,565✔
19
        {
20
            if (is.null(weights))
4✔
21
                pval[i] <- kruskal.test(x, y)$p.value
2✔
22
            else
23
            {
24
                df <- data.frame(x = x, y = y, w = weights)
2✔
25
                pval[i] <- svyranktest(x ~ y, svydesign(ids = ~1, weights = ~w, data = df))$p.value
2✔
26
            }
27
        }
28
        else
29
            pval[i] <- calcPvaluesForOneVarOneLevel(x, x.is.binary = is.binary, y = y, w = weights)
3,561✔
30
    }
31
    return(pval)
438✔
32
}
33

34
calcPvaluesForOneVarOneLevel = function(x,             # A binary or numeric variable
35
                      x.is.binary = TRUE,       # TRUE if x is a binary variable
36
                      y,                        # A binary variable
37
                      w = rep(1, length(x)))    # weight variable (same length as x)
38
{
39
    if (length(w) <= 1)
3,561✔
40
        w <- rep(1, length(x))
3,167✔
41

42
    Filter = function(x, f)
3,561✔
43
    {
44
        if (is.null(f))
24,927✔
45
            return(NULL)
×
46
        x[f]
24,927✔
47
    }
48

49
    # Identifying missing values; these are values that are:
50
    # - Missing in  x (e.g., if x is Pick Any and x = Coke | Pepsi,
51
    #       if either coke or Pepsi have missing values, then x is missing.
52
    # - Missing in y
53
    # - Missing or <= 0 in w
54
    m = is.na(x) | if(is.null(y)) FALSE else is.na(y) | is.na(w) | w <= 0
3,561✔
55
    x = Filter(x, !m)
3,561✔
56
    y = Filter(y, !m)
3,561✔
57
    w = Filter(w, !m)
3,561✔
58

59
    filters = list(which(y == 1), which(y == 0))
3,561✔
60
    a = computeNumericVarStats(Filter(x, filters[[1]]), Filter(w, filters[[1]]))
3,561✔
61
    b = computeNumericVarStats(Filter(x, filters[[2]]), Filter(w, filters[[2]]))
3,561✔
62

63
    # Here, the test.type for SegmentComparisonTable is determined based on
64
    # variable type but we will update this later to use the statistical
65
    # assumptions in QSettings
66
    # Need to update function signature and tests
67
    test.type <- if (!x.is.binary) "tTest" else "Nonparametric"
3,561✔
68
    return(compareTwoSamples(test.type, a, b, x.is.binary,
3,561✔
69
           is.weighted = length(unique(w)) > 1, bessel = 0))
3,561✔
70
}
71

72
# a and b are lists which contain the summary statistics of each sample
73
compareTwoSamples <- function(test.type, a, b,
74
    is.binary = TRUE, is.weighted = FALSE, bessel = 0, dEff = 1)
75
{
76
    if (test.type == "tTest")
3,582✔
77
        return(tTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],
1,222✔
78
            b[["Standard Error"]], a[["Base n"]], b[["Base n"]],
1,222✔
79
            is.binary, is.weighted, bessel, dEff))
1,222✔
80
    else if (test.type == "zTest")
2,360✔
81
        return(zTest(a[["Average"]], b[["Average"]], a[["Standard Error"]],
12✔
82
            b[["Standard Error"]], a[["Base n"]], b[["Base n"]],
12✔
83
            is.binary, is.weighted, bessel))
12✔
84
    else # Nonparametric or Chisquare
85
        return(raoScottSecondOrderChiSquareTest(a, b, is.weighted))
2,348✔
86
    # Non-parametric tests for numeric variables are handled before
87
    # getting to this function
88
}
89

90

91
# Functions - these are all from the c# SamplingVariance class (albeit in slightly different forms)
92
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/StandardErrors/SamplingVariance.cs
93
computeVariances <- function(mean, is.binary, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.observations)
94
{
95
    if (is.binary) # Numerical precision
7,122✔
96
    {
97
        if (mean < 0.00000001)
×
98
            mean = 0
×
99
        else if (mean > 0.99999999)
×
100
            mean = 1
×
101
    }
102
    bessel.correction = n.observations / (n.observations - 1)
7,122✔
103
    mean2 = mean * mean
7,122✔
104
    sum_of_squares = sum.xxw - 2 * mean * sum.xw + mean2 * sum.w
7,122✔
105
    sum_of_squares.w = sum.xxww - 2 * mean * sum.xww + mean2 * sum.ww
7,122✔
106

107
    taylor = sum_of_squares.w / (sum.w * sum.w) * bessel.correction
7,122✔
108

109
    naive = if (is.binary) mean * (1 - mean) else sum_of_squares / sum.w
7,122✔
110
    naive = naive * bessel.correction / n.observations
7,122✔
111
    ess = if (!is.na(taylor) && taylor < 0.000001) # Due to numeric precision issues
7,122✔
112
        sum.w * sum.w / sum.ww
7,122✔
113
        else n.observations * naive / taylor
7,122✔
114

115
    list(taylor = taylor,
7,122✔
116
         naive = naive,
7,122✔
117
         ess = ess,
7,122✔
118
         se = sqrt(taylor))
7,122✔
119
}
120

121
# A simplification of RaoScottSecondOrder2b2 from Q's C#
122
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/ChiSquareTests.cs
123
# aa and bb contain summary statistics for each sample
124
#' @importFrom verbs Sum SumEachRow SumEachColumn
125
raoScottSecondOrderChiSquareTest <- function(aa, bb, is.weighted)
126
{
127
    if (!is.null(dim(aa[["Average"]])))
2,348✔
128
        pvals <- matrix(NA, nrow = NROW(aa[["Average"]]), ncol = NCOL(aa[["Average"]]))
×
129
    else
130
        pvals <- rep(NA, length = length(aa[["Average"]]))
2,348✔
131
    if (length(aa[["Average"]]) == 0)
2,348✔
132
        return(pvals)
×
133

134
    for (i in 1:length(aa[["Average"]]))
2,348✔
135
    {
136
        n.observations <- aa[["Base n"]][i] + bb[["Base n"]][i]
2,356✔
137
        if (is.weighted)
2,356✔
138
        {
139
            sum.w <- aa[["sumW"]][i] + bb[["sumW"]][i]
194✔
140
            sum.ww <- aa[["sumWW"]][i] + bb[["sumWW"]][i]
194✔
141
            p.a <- aa[["sumW"]][i] / sum.w
194✔
142
        } else
143
            p.a <- aa[["Base n"]][i] / n.observations
2,162✔
144
        p.b <- 1 - p.a
2,356✔
145

146
        proportiony = p.a
2,356✔
147
        mean.a <- aa[["Average"]][i]
2,356✔
148
        if (is.na(mean.a))
2,356✔
149
            mean.a <- 0
28✔
150
        mean.b <- bb[["Average"]][i]
2,356✔
151
        if (is.na(mean.b))
2,356✔
152
            mean.b <- 0
9✔
153

154
        sums.ww <- c(bb[["sumWW"]][i] - bb[["sumXWW"]][i], bb[["sumXWW"]][i],
2,356✔
155
                     aa[["sumWW"]][i] - aa[["sumXWW"]][i], aa[["sumXWW"]][i])
2,356✔
156
        proportions <- c(p.b * (1-mean.b), p.b * mean.b,
2,356✔
157
                         p.a * (1-mean.a), p.a * mean.a)
2,356✔
158
        counts = matrix(proportions * n.observations, 2)
2,356✔
159

160
        # If not weighted, this reduces to a chi-square test
161
        group_sizes = SumEachColumn(counts, remove.missing = FALSE)
2,356✔
162
        row.totals = SumEachRow(counts, remove.missing = FALSE)
2,356✔
163
        total = Sum(row.totals, remove.missing = FALSE)
2,356✔
164

165
        expected = matrix(c(group_sizes[1]*row.totals[1]/total,
2,356✔
166
                            group_sizes[1]*row.totals[2]/total,
2,356✔
167
                            group_sizes[2]*row.totals[1]/total,
2,356✔
168
                            group_sizes[2]*row.totals[2]/total), 2)
2,356✔
169

170
        pearson.statistic = Sum((counts - expected)^2/expected, remove.missing = FALSE)
2,356✔
171
        if (!is.weighted)
2,356✔
172
            pvals[i] <- pchisq(pearson.statistic, 1, lower.tail = FALSE)
2,162✔
173
        else
174
        {
175
            variance = multinomialCovarianceMatrix(proportions,
194✔
176
                    sums.ww, sum.ww, sum.w, n.observations)
194✔
177
            if (!is.na(pearson.statistic)) # If not a missing value
194✔
178
            {
179
                a = matrix(0, 4, 1)
112✔
180
                id_mat = d_mat = matrix(0, 4, 4)
112✔
181
                denominator = 0.0;
112✔
182
                for (j in 1:4)
112✔
183
                {
184
                    prop = proportions[j];
448✔
185
                    d_mat[j, j] = prop;
448✔
186
                    prop_is_0 = prop < 1e-12
448✔
187
                    j_prop = if (prop_is_0) 0 else 1.0 / prop;
448✔
188
                    if (!prop_is_0) id_mat[j, j] = j_prop;
408✔
189
                    a[j, 1] = j_prop / 4.0;
448✔
190
                    denominator = denominator + j_prop;
448✔
191
                }
192
                a[2, 1] = -a[2, 1];
112✔
193
                a[3, 1] = -a[3, 1];
112✔
194
                denominator = denominator * .0625 / n.observations;
112✔
195
                numerator = t(a) %*% variance %*% a
112✔
196
                delta = numerator / denominator;
112✔
197
                f = pearson.statistic / delta
112✔
198
            } else
199
                f <- NA
82✔
200
            pvals[i] <- (1 - pf(f, 1, n.observations - 1))
194✔
201
        }
202
    }
203
    return(pvals)
2,348✔
204
}
205

206

207
# Code modified from https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/TTests.cs and
208
# https://github.com/Displayr/q/blob/master/Flip/DataAnalysis/Inference/Tests/CombinedZandTTest.cs
209
tTest <- function(mean1, mean2, se1, se2, n1, n2,
210
                  is.binary, is.weighted, bessel = 0, dEff = 1)
211
{
212
    if (!is.binary)
1,222✔
213
    {
214
        v1 <- se1 * se1
1,219✔
215
        v2 <- se2 * se2
1,219✔
216
        v <- v1 + v2
1,219✔
217
        se <- sqrt(v)
1,219✔
218
        df <- v * v / (v1 * v1 / (n1 - 1) + v2 * v2 / (n2 - 1))
1,219✔
219

220
    } else if (is.binary && !is.weighted)
3✔
221
    {
222
        m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)
1✔
223
        se <- sqrt(m12 * (1 - m12) * (1/n1 + 1/n2))
1✔
224
        df <- n1 + n2 - 2
1✔
225

226
    } else if (is.binary && is.weighted)
2✔
227
    {
228
        se <- sqrt(dEff * (se1 * se1 + se2 * se2))
2✔
229
        df <- (se1^2 + se2^2)^2 /
2✔
230
            ((se1^4)/(n1-bessel) + (se2^4)/(n2-bessel))
2✔
231
    }
232
    t = (mean1 - mean2)/se
1,222✔
233
    p = pt(-abs(t), df) * 2
1,222✔
234
    return(p)
1,222✔
235
}
236

237
#' @importFrom stats pnorm
238
zTest <- function(mean1, mean2, se1, se2, n1, n2, is.binary, is.weighted, bessel = 0)
239
{
240
    if (!is.binary)
12✔
241
        se <- sqrt(se1 * se1 + se2 * se2)
2✔
242
    else if(is.binary && !is.weighted)
10✔
243
    {
244
        m12 <- (n1 * mean1 + n2 * mean2)/(n1 + n2)
8✔
245
        se <- sqrt(m12 * (1 - m12) * (n1 + n2) / (n1 + n2 - 2*bessel) * (1/n1 + 1/n2))
8✔
246

247
    } else if (is.binary && is.weighted)
2✔
248
    {
249
        se <- sqrt(se1 * se1 + se2 * se2)
2✔
250
    }
251
    z = (mean1 - mean2)/se
12✔
252
    p = pnorm(-abs(z)) * 2
12✔
253
    return(p)
12✔
254
}
255

256

257
multinomialCovarianceMatrix <- function(proportions, ww, ww_total, w_total, n)
258
{
259
    k =length(proportions)
194✔
260
    covariance = matrix(0, k, k)
194✔
261
    for (r in 1:4)
194✔
262
    {
263
        for (c in 1:4)
776✔
264
        {
265
            p1 = proportions[r];
3,104✔
266
            p2 = proportions[c];
3,104✔
267
            ww1 = ww[r];
3,104✔
268
            ww2 = ww[c];
3,104✔
269
            sc = if(r == c) computeSamplingVarianceForProportion(p1, ww1, ww_total, w_total, n)
3,104✔
270
                 else       samplingCovariance(p1, p2, ww1, ww2, ww_total, w_total, n)
3,104✔
271
            covariance[c, r] = covariance[r, c] = sc
3,104✔
272
        }
273
    }
274
    return(covariance)
194✔
275
}
276

277
computeSamplingVarianceForProportion <- function(input_proportion, ww, ww_total, w_total,sample_size)
278
{
279
    proportion = input_proportion
776✔
280
    if (is.na(proportion)) {
776✔
281
        proportion <- NA
40✔
282
    } else if (proportion < 1E-8) {
736✔
283
        proportion = 0.0;
184✔
284
    } else if (proportion > 1 - 1e-8)
552✔
285
        proportion = 1.0;
×
286
    sumSquaredWeights = ww_total;
776✔
287
    n = sample_size;
776✔
288
    mean = proportion;
776✔
289
    complement = (1.0 - proportion)
776✔
290
    variance = proportion * complement
776✔
291
    population = w_total;
776✔
292
    variance = variance * sample_size/(sample_size - 1.0);
776✔
293
    ww_sums_of_squares = complement * complement * ww + proportion * proportion * (ww_total - ww)
776✔
294
    return(ww_sums_of_squares / (w_total * w_total) * sample_size/(sample_size - 1.0))
776✔
295
}
296

297
# From the C# SamplingVariance(double proportion1, double proportion2, double ww1, double ww2, double ww_total, double w_total, int n, StatisticalAssumptions statistical_assumptions)
298
samplingCovariance <- function(proportion1,  proportion2,  ww1,  ww2,  ww_total,  w_total, n)
299
{
300
    ww_sums_of_squares = -proportion1 * -proportion2 * (ww_total - ww1 - ww2) + -proportion1 * (1 - proportion2) * ww2 + -proportion2 * (1 - proportion1) * ww1
2,328✔
301
    return(ww_sums_of_squares / (w_total * w_total) * (n / (n - 1)))
2,328✔
302
}
303

304
#' @importFrom verbs Sum
305
computeNumericVarStats <- function(x, w)
306
{
307
    n.observations <- length(x)
7,122✔
308
    ww = w * w
7,122✔
309
    xw = x * w
7,122✔
310
    xxw = x * xw
7,122✔
311
    xww = xw * w
7,122✔
312
    xxww = xxw * w
7,122✔
313
    sum.w = Sum(w, remove.missing = FALSE)
7,122✔
314
    sum.xw = Sum(xw, remove.missing = FALSE)
7,122✔
315
    sum.ww = Sum(ww, remove.missing = FALSE)
7,122✔
316
    sum.xxw = Sum(xxw, remove.missing = FALSE)
7,122✔
317
    sum.xww = Sum(xww, remove.missing = FALSE)
7,122✔
318
    sum.xxww = Sum(xxww, remove.missing = FALSE)
7,122✔
319
    mean.x = sum.xw / sum.w
7,122✔
320

321
    n.used.in.bessel.correction = n.observations
7,122✔
322
    var = computeVariances(mean.x, FALSE, sum.w, sum.ww, sum.xw, sum.xww, sum.xxw, sum.xxww, n.used.in.bessel.correction)
7,122✔
323

324
    return(c("Average" = mean.x, "Base n" = n.observations,
7,122✔
325
        sumW = sum.w, sumWW = sum.ww, sumXWW = sum.xww,
7,122✔
326
        "Standard Error" = var$se))
7,122✔
327
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc