library(tidyverse)
library(gridExtra)

1 Simulations

1.1 Helper functions

# Convert array of numbers to dataframe with single column.
numbers2df <- function(X) {
  X <- as.numeric(X) # I don't know why, but plotting libraries need this.
  X <- tibble(X = X) # # ggplot needs a dataframe.
  return(X)
}

# Transparant theme for ggplot which looks better on website.
transparent_theme <- theme(
    panel.background = element_rect(fill = "transparent"), # bg of the panel
    plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
    panel.grid.major = element_blank(), # get rid of major grid
    panel.grid.minor = element_blank(), # get rid of minor grid
    legend.background = element_rect(fill = "transparent"), # get rid of legend bg
    legend.box.background = element_rect(fill = "transparent"), # get rid of legend panel bg
    legend.key = element_rect(fill = "transparent", colour = NA), # get rid of key legend fill, and of the surrounding
    axis.line = element_line(colour = "black") # adding a black line for x and y axis
)

image.path <- function(filename) sprintf("../images/power/%s.svg", filename)

# Remove all images to avoid unused images to be kept.
unlink(image.path("*"))

1.2 Number of accidents

set.seed(2020-02-10)

P_with = 0.0012

run_trial <- function(n_samples, P) {
  X <- runif(1:n_samples, 0, 1)
  # Number of accidents after taking n samples.
  return(sum(X < P))
}

run_trials <- function(n_samples, P, n_trials) {
  results <- 1:n_trials %>% map(function(x) run_trial(n_samples, P))
  return(numbers2df(results))
}

my_round <- function(x) format(round(x, 1), nsmall = 1)
variance <- function() TeX('$\\mu^2')
mean_var <- function(X) sprintf("mean = %s, and variance = %s", my_round(mean(X)), my_round(var(X)))

n_samples = 100000 # sample size per trial.

ggplot_hist <- function(n_samples, name, P, n_trials) {
  df <- run_trials(n_samples, P, n_trials)
  print(mean_var(df$X))
  p <- ggplot(df, aes(X)) + 
    geom_histogram(binwidth = 1) +
    xlab("number of accidents per 100 000 right turns") +
    ylab("frequency") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme
  ggsave(file=image.path(name), plot=p, width=7, height=4, bg="transparent")
  return(p)
}

ggplot_dens <- function(n_samples, name, P, n_trials) {
  df <- run_trials(n_samples, P, n_trials)
  print(mean_var(df$X))
  p <- ggplot(df, aes(X)) + 
    geom_density() +
    xlab("number of accidents per 100 000 right turns") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme +
    stat_function(fun=dbinom, args = list(size = 100000, prob = P_with), linetype="dashed") +
    annotate("text", 155, 0.038, label = "Number of accidents") +
    annotate("text", 90, 0.034, label = "B(100 000, 0.0012)")
  ggsave(file=image.path(name), plot=p, width=7, height=4, bg="transparent")
  return(p)
}

ggplot_hist(n_samples, "multiple-years", P_with, 20)

## [1] "mean = 119.7, and variance = 66.6"

ggplot_dens(n_samples, "many-years", P_with, 100)

## [1] "mean = 120.9, and variance = 106.6"

ggplot_dens(n_samples, "many-many-years", P_with, 1000)

## [1] "mean = 120.1, and variance = 117.3"

1.3 Comparing binomial and normal distributions

mu.n <- 120

p <- ggplot(data.frame(x = c(0,200)), aes(x)) +
  stat_function(fun=dbinom, args = list(size = 100000, prob = 0.0012), show.legend = TRUE, aes(colour = "B(100 000, 0.0012)")) +
  stat_function(fun=dbinom, args = list(size = 1000, prob = 0.12), show.legend = TRUE, aes(colour = "B(1000, 0.12)")) +
  stat_function(fun=dnorm, args = list(mean = mu.n, sd = sqrt(mu.n)), show.legend = TRUE, aes(colour = "N(120, sqrt(120))")) +
  transparent_theme +
  scale_colour_manual("Distributions", values = c("blue", "black", "green"))
ggsave(file=image.path("binomial-and-normal-distributions"), plot=p, width=7, height=4, bg="transparent")
p

1.4 Uniform to normal distribution

# Make more generic than above. 
# For the plot take a function.

# Returns a combined plot for a histogram function and distribution function.
plot_hist_and_fn <- function(n, hist_fn, name, dist_fn) {
  df <- numbers2df(1:n %>% map(hist_fn))
  print(mean_var(df$X))
  p = ggplot(df, aes(X)) + 
    geom_histogram(binwidth=1) +
    xlab("number of accidents per 100 000 right turns") +
    ylab("frequency") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme + 
    stat_function(fun=dist_fn, colour="blue")
  # ggsave(file=image.path(name), plot=p, bg="transparent")
  return(p)
}

1.5 Deriving the sample distribution

set.seed(2020-02-09) # To allow reproducing this section separately.

If we take only a few samples from a normal distribution these will have a higher variance than the normal distribution.

sd <- sqrt(mu.n)
generate_sample <- function(n) mean(rnorm(n, mean = mu.n, sd = sd))

generate_samples <- function(n_samples, n_trials) {
  results <- 1:n_trials %>% map(function(x) generate_sample(n_samples))
  return(results)
}

ggplot_dens <- function(n_samples, n_trials, name) {
  df <- numbers2df(generate_samples(n_samples, n_trials))
  print(mean_var(df$X))
  p <- ggplot(df, aes(X)) + 
    geom_histogram(binwidth=1) +
    xlab("number of accidents") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme
  ggsave(file=image.path(name), plot=p, width=7, height=4, bg="transparent")
  return(p)
}

n_trials <- 10000
ggplot_dens(1, n_trials, "one-sample-distribution")

## [1] "mean = 119.9, and variance = 120.0"

ggplot_dens(2, n_trials, "two-sample-distribution")

## [1] "mean = 120.0, and variance = 60.0"

dnorm(mu.n, mean=mu.n, sd=sd)

## [1] 0.03641828

dnorm(120, mean=mu.n, sd=sd)

## [1] 0.03641828

# Remove population mean
generate_Si <- function(n) rnorm(n, mean=mu.n, sd=sd)
df <- tibble(Samples = 1:1e5 %>% map(function(x) generate_Si(2)))
df$Mean <- as.numeric(df$Samples %>% map(mean))
df$Sd <- as.numeric(df$Samples %>% map(function(x) sd(x, na.rm=TRUE)))
df$X <- as.numeric(df$Mean - mu.n)

p <- ggplot(df, aes(X)) + 
  geom_histogram(binwidth = 0.1) +
  xlab("number of accidents") +
  transparent_theme
ggsave(file=image.path("two-sample-without-mean"), plot=p, width=7, height=4, bg="transparent")
p

# Remove distribution's standard deviation
df$W <- df$X / (sqrt(65) / sqrt(2))
p <- ggplot(df, aes(W)) + 
  geom_histogram(binwidth = 0.1) +
  scale_x_continuous(limits = c(-4, 4)) + 
  xlab("number of accidents") +
  transparent_theme
ggsave(file=image.path("two-sample-without-sd"), plot=p, bg="transparent")
p

# Add t distribution
p <- ggplot(df, aes(W)) + 
  geom_density() +
  xlab("number of accidents") +
  transparent_theme + 
  scale_y_continuous(limits = c(0, 0.42)) + 
  scale_x_continuous(limits = c(-4, 4)) + 
  stat_function(fun=dt, args=list(df = 1), show.legend = TRUE, aes(colour = "T(1)"))
ggsave(file=image.path("two-sample-with-t-1"), plot=p, width=7, height=4, bg="transparent")
p

Still incorrect, don’t know why

1.6 Comparing normal and t-distributions

p <- ggplot(data.frame(x = c(-4,4)), aes(x)) +
  stat_function(fun=dnorm, args = list(mean = 0, sd = 1)) +
  stat_function(fun=dt, args = list(df = 5), linetype="dotted") +
  stat_function(fun=dt, args = list(df = 1), linetype="dashed") +
  transparent_theme +
  annotate("text", 0, 0.42, label = "N(0, 1)") +
  annotate("text", 0, 0.35, label = "T(5)") +
  annotate("text", 0, 0.28, label = "T(1)") +
  scale_colour_manual("Distributions", values = c("blue", "black", "green", "red"))
ggsave(file=image.path("normal-and-t-distributions"), width=7, height=4, plot=p, bg="transparent")
p

1.7 Some samples

set.seed(2020-02-13)
n_samples <- 12
samples <- tibble(
  index = 1:n_samples,
  without = as.numeric(map(rnorm(n_samples, mean = 100, sd = sqrt(mu.n)), my_round)),
  with = as.numeric(map(rnorm(n_samples, mean = mu.n, sd = sqrt(mu.n)), my_round))
)
head(samples, 10)

index	without	with
1	110.6	117.0
2	113.9	112.9
3	105.2	120.7
4	105.6	98.5
5	95.5	131.2
6	100.8	126.0
7	96.9	104.3
8	102.8	114.0
9	110.5	127.6
10	84.6	130.3

1.8 Single calculation example

n = 2
x = samples$with[1:n]
t = (mean(x) - 100) /(sd(x) / sqrt(n))
t

## [1] 7.292683

s <- sd(x)
sprintf("s: %s", s)

## [1] "s: 2.89913780286484"

# Two-sided 95% CI
qt(0.975, df=n-1)

## [1] 12.7062

t.test(x, mu = 100, alternative="two.sided", var.equal = TRUE)

## 
##  One Sample t-test
## 
## data:  x
## t = 7.2927, df = 1, p-value = 0.08675
## alternative hypothesis: true mean is not equal to 100
## 95 percent confidence interval:
##   88.90228 140.99772
## sample estimates:
## mean of x 
##    114.95

1.9 t-distribution one versus two sided

n = 2
right.one.sided <- qt(0.80, df=n-1)
right.one.sided

## [1] 1.376382

p <- ggplot(data.frame(x = c(-4,4)), aes(x)) +
  stat_function(fun=dt, args = list(df = n-1), colour="black") +
  stat_function(fun=dt, args = list(df = n-1), xlim=c(-4,right.one.sided), geom="area", alpha=0.4, fill="blue", show.legend = FALSE) +
  stat_function(fun=dt, args = list(df = n-1), xlim=c(-right.one.sided,right.one.sided), geom="area", alpha=0.2, fill="blue", show.legend = TRUE) +
  transparent_theme +
  annotate("text", -2.8, 0.38, label = "20%") +
  annotate("text", -right.one.sided - 0.25, 0.35, label = "-a") +
  geom_vline(aes(xintercept=-right.one.sided), colour="blue", alpha=0.2, linetype="dashed") +
  annotate("text", 0, 0.38, label = "60%") +
  geom_vline(aes(xintercept=right.one.sided), colour="blue", alpha=0.2, linetype="dashed") +
  annotate("text", 2.8, 0.38, label = "20%") +
  ylab("probability") +
  annotate("text", right.one.sided - 0.25, 0.35, label = "a") +
  theme(legend.position = "none")
ggsave(file=image.path("t-one-versus-two-sided"), width=8, height=4, plot=p, bg="transparent")
p

1.10 Output for two samples plotted

one.sided.95 <- qt(0.95, df=1)
two.sided.95 <- qt(0.975, df=1)
round.3 <- function(x) format(round(x, 3), nsmall=3)

p <- ggplot(data.frame(x = c(-0.5,5)), aes(x)) +
  stat_function(fun=dt, args = list(df = 1), show.legend = FALSE, aes(colour = "T(n-1) = T(1)")) +
  transparent_theme +
  geom_vline(aes(xintercept=one.sided.95, colour = sprintf("x = %s (one-sided 95%% critical value)", round.3(one.sided.95))), linetype="dashed") +
  geom_vline(aes(xintercept=two.sided.95, colour = sprintf("x = %s (two-sided 95%% critical value)", round.3(two.sided.95))), linetype="dashed") +
  geom_vline(aes(xintercept=t, colour = sprintf("x = %s (calculated t-value)", round.3(t))), linetype="dashed") +
  ylab("probability") + 
  theme(legend.title = element_blank())
ggsave(file=image.path("t-and-critical-values"), width=7, height=3, plot=p, bg="transparent")
p

1.11 Doing multiple t-tests

get.p <- function(n, alternative, use.population=FALSE) {
  X <- samples$without[1:n]
  Y <- samples$with[1:n]
  if (use.population) {
    return(t.test(Y, mu = 100, alternative = alternative, var.equal = TRUE)$p.value)
  } else {
    return(t.test(Y, X, alternative = alternative, var.equal = TRUE)$p.value)
  }
}

plot.power <- function(name, use.population=FALSE) {
  df <- tibble(x = 2:n_samples)
  df$p.greater <- as.numeric(df$x %>% map(function(x) get.p(x, "greater", use.population = use.population)))
  df$p.two.sided <- as.numeric(df$x %>% map(function(x) get.p(x, "two.sided", use.population = use.population)))
  
  df1 <- tibble(x = 2:n_samples)
  df1$p <- as.numeric(df$x %>% map(function(x) get.p(x, "two.sided", use.population = use.population)))
  df1$alternative = "two.sided"
  
  df2 <- tibble(x = 2:n_samples)
  df2$p <- as.numeric(df$x %>% map(function(x) get.p(x, "greater", use.population = use.population)))
  df2$alternative = "greater"
  
  df3 <- tibble(x = 2:n_samples)
  df3$p <- as.numeric(df$x %>% map(function(x) get.p(x, "less", use.population = use.population)))
  df3$alternative = "less"
  
  df <- rbind(df1, df2, df3)
  
  # To compare manual calculations to.
  head(df2, 3)
  head(df1, 3)
  
  p <- ggplot(df, aes(x, p)) +
    geom_point(aes(colour = alternative)) +
    scale_x_continuous(breaks = 1:n_samples) +
    geom_hline(yintercept = 0.05) + 
    geom_line(aes(colour = alternative)) +
    annotate("text", n_samples - 1, 0.05, vjust = -1, label = "p = 0.05") +
    xlab("sample size") +
    ylab("p-value") + 
    transparent_theme 
  ggsave(file=image.path(name), plot=p, bg="transparent")
  return(p)
}

plot.power("power-one-sample", use.population = TRUE)

## Saving 7 x 5 in image

plot.power("power-two-sample", use.population = FALSE)

## Saving 7 x 5 in image

1.12 Manual cohen’s d calculation

library(pwr)
# Checkout the generated html to see variable name and output.
samples_a <- samples$with[1:2]
samples_a

## [1] 117.0 112.9

samples_b <- samples$without[1:2]
samples_b

## [1] 110.6 113.9

s_a <- sd(samples_a)
s_a

## [1] 2.899138

s_b <- sd(samples_b)
s_b

## [1] 2.333452

m_a <- mean(samples_a)
m_a

## [1] 114.95

m_b <- mean(samples_b)
m_b

## [1] 112.25

s_p <- sqrt((s_a^2 + s_b^2) / 2)
s_p

## [1] 2.631539

d <- (m_a - m_b) / s_p
d

## [1] 1.026015

pwr.t.test(n=2, d=d, sig.level=0.05, alternative="two.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 2
##               d = 1.026015
##       sig.level = 0.05
##           power = 0.09752365
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

1.13 Plotting the value for d over multiple samples

plot.d <- function(n, name) {
  
  get.d <- function(n) {
    S_a <- samples$with[1:n]
    S_b <- samples$without[1:n]
    
    s_p <- function(S_a, S_b) sqrt((sd(S_a)^2 + sd(S_b)^2) / 2)
    d <- function(S_a, S_b) (mean(S_a) - mean(S_b)) / s_p(S_a, S_b)
    
    print(sprintf("n: %s, s_p: %s, d: %s", n, s_p(S_a, S_b), d(S_a, S_b)))
    return(d(S_a, S_b))
  }
  
  get.d(2)
  df <- tibble(x = 2:n)
  df$d <- as.numeric(df$x %>% map(get.d))
  
  print(head(df, 4))
  
  p <- ggplot(df, aes(x, d)) +
    geom_point() +
    geom_line() +
    scale_x_continuous(breaks = 1:n_samples) +
    xlab("sample size") +
    expand_limits(y=0) + 
    ylab("Cohen's d") + 
    transparent_theme 
  ggsave(file=image.path(name), width=7, height=3, plot=p, bg="transparent")
  return(p)
}

plot.d(12, "d")

## [1] "n: 2, s_p: 2.63153947338815, d: 1.02601539034628"
## [1] "n: 2, s_p: 2.63153947338815, d: 1.02601539034628"
## [1] "n: 3, s_p: 4.1541144262847, d: 1.67705218291192"
## [1] "n: 4, s_p: 7.48214541425118, d: 0.461097694443203"
## [1] "n: 5, s_p: 9.77537723057274, d: 1.01274864043482"
## [1] "n: 6, s_p: 9.33135395677748, d: 1.33421152575157"
## [1] "n: 7, s_p: 9.50891811982435, d: 1.23342858575252"
## [1] "n: 8, s_p: 8.82076992914208, d: 1.32216349521479"
## [1] "n: 9, s_p: 8.86140194576707, d: 1.3842805846908"
## [1] "n: 10, s_p: 9.94241475251919, d: 1.57004112064876"
## [1] "n: 11, s_p: 9.75088807144345, d: 1.69774745984699"
## [1] "n: 12, s_p: 9.34224479869736, d: 1.81701511422254"
## # A tibble: 4 x 2
##       x     d
##   <int> <dbl>
## 1     2 1.03 
## 2     3 1.68 
## 3     4 0.461
## 4     5 1.01

## Power analysis (as simple and straightforward as possible)

# Estimating effect size as a function of n, d, and alpha
# This assumes that our effect size is 1.
# Combine plot with one for effect size is 0.5.
pwr.t.test(n=2, d=1, sig.level=0.05, alternative="two.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 2
##               d = 1
##       sig.level = 0.05
##           power = 0.09520176
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

partial.test.8 <- function(n) pwr.t.test(n=n, d=0.8, sig.level=0.05, alternative="two.sided")$power
partial.test.5 <- function(n) pwr.t.test(n=n, d=0.5, sig.level=0.05, alternative="two.sided")$power
partial.test.2 <- function(n) pwr.t.test(n=n, d=0.2, sig.level=0.05, alternative="two.sided")$power

p <- ggplot(data.frame(x = c(2, 200)), aes(x)) +
  stat_function(fun=partial.test.8) +
  stat_function(fun=partial.test.5) +
  stat_function(fun=partial.test.2) +
  ylab("power") +
  xlab("sample size") +
  transparent_theme +
  annotate("text", 25, 0.95, label="d = 0.80") +
  annotate("text", 60, 0.65, label="d = 0.50") + 
  annotate("text", 120, 0.4, label="d = 0.20")
ggsave(file=image.path("varying-d"), width=7, height=4, plot=p, bg="transparent")
p

# For example in text
pwr.t.test(d=0.5, power=0.6, sig.level=0.05, alternative="two.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 40.16953
##               d = 0.5
##       sig.level = 0.05
##           power = 0.6
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

pwr.t.test(n=2, d=0.5, sig.level=0.05, alternative="two.sided")

## 
##      Two-sample t test power calculation 
## 
##               n = 2
##               d = 0.5
##       sig.level = 0.05
##           power = 0.06150786
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

# Type I error versus type II error
partial.test.n2 <- function(a) pwr.t.test(n=2, d=0.5, sig.level=a, alternative="two.sided")$power
partial.test.n20 <- function(a) pwr.t.test(n=20, d=0.5, sig.level=a, alternative="two.sided")$power
partial.test.n80 <- function(a) pwr.t.test(n=80, d=0.5, sig.level=a, alternative="two.sided")$power

p <- ggplot(data.frame(x = c(0,1)), aes(x)) +
  stat_function(fun=partial.test.n2) +
  stat_function(fun=partial.test.n20) +
  stat_function(fun=partial.test.n80) +
  ylab("power") +
  xlab("significance level") +
  scale_x_continuous(trans = "reverse") +
  transparent_theme +
  annotate("text", 0.50, 0.45, label="n = 2") +
  annotate("text", 0.32, 0.65, label="n = 20") +
  annotate("text", 0.12, 0.85, label="n = 80") +
  geom_vline(aes(xintercept=0.05), linetype="dashed") +
  annotate("text", 0.13, 0.02, label="p = 0.05")
ggsave(file=image.path("varying-n"), width=7, height=4, plot=p, bg="transparent")
p

pwr.t.test(n=2, d=0.5, sig.level=0.05, alternative="two.sided")$power

## [1] 0.06150786

pwr.t.test(n=20, d=0.5, sig.level=0.05, alternative="two.sided")$power

## [1] 0.337939

pwr.t.test(n=80, d=0.5, sig.level=0.05, alternative="two.sided")$power

## [1] 0.8816025

2 Appendix

2.1 R version

sessionInfo()

## R version 3.6.2 (2019-12-12)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: NixOS 20.03 (Markhor)
## 
## Matrix products: default
## BLAS/LAPACK: /nix/store/xa7fmynmfqfpmsf5vamhny6cwl61kvbh-openblas-0.3.7/lib/libopenblasp-r0.3.7.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pwr_1.2-2       gdtools_0.2.1   gridExtra_2.3   forcats_0.4.0  
##  [5] stringr_1.4.0   dplyr_0.8.3     purrr_0.3.3     readr_1.3.1    
##  [9] tidyr_1.0.2     tibble_2.1.3    ggplot2_3.2.1   tidyverse_1.3.0
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5  xfun_0.12         haven_2.2.0       lattice_0.20-38  
##  [5] colorspace_1.4-1  vctrs_0.2.2       generics_0.0.2    htmltools_0.4.0  
##  [9] yaml_2.2.0        utf8_1.1.4        rlang_0.4.3       pillar_1.4.3     
## [13] withr_2.1.2       glue_1.3.1        DBI_1.1.0         dbplyr_1.4.2     
## [17] modelr_0.1.5      readxl_1.3.1      lifecycle_0.1.0   munsell_0.5.0    
## [21] gtable_0.3.0      cellranger_1.1.0  rvest_0.3.5       evaluate_0.14    
## [25] labeling_0.3      knitr_1.27        fansi_0.4.1       highr_0.8        
## [29] broom_0.5.3       Rcpp_1.0.3        scales_1.1.0      backports_1.1.5  
## [33] jsonlite_1.6      farver_2.0.3      systemfonts_0.1.1 fs_1.3.1         
## [37] hms_0.5.3         digest_0.6.23     stringi_1.4.5     bookdown_0.17    
## [41] grid_3.6.2        cli_2.0.1         tools_3.6.2       magrittr_1.5     
## [45] lazyeval_0.2.2    crayon_1.3.4      pkgconfig_2.0.3   xml2_1.2.2       
## [49] reprex_0.3.0      lubridate_1.7.4   svglite_1.2.2     assertthat_0.2.1 
## [53] rmarkdown_2.1     httr_1.4.1        rstudioapi_0.10   R6_2.4.1         
## [57] nlme_3.1-143      compiler_3.6.2

Statistical power

Rik Huijzer

2020-05-24