library(tidyverse)
library(gridExtra)

1 Simulations

1.1 Helper functions

# Convert array of numbers to dataframe with single column.
numbers2df <- function(X) {
  X <- as.numeric(X) # I don't know why, but plotting libraries need this.
  X <- tibble(X = X) # # ggplot needs a dataframe.
  return(X)
}

# Transparant theme for ggplot which looks better on website.
transparent_theme <- theme(
    panel.background = element_rect(fill = "transparent"), # bg of the panel
    plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
    panel.grid.major = element_blank(), # get rid of major grid
    panel.grid.minor = element_blank(), # get rid of minor grid
    legend.background = element_rect(fill = "transparent"), # get rid of legend bg
    legend.box.background = element_rect(fill = "transparent"), # get rid of legend panel bg
    legend.key = element_rect(fill = "transparent", colour = NA), # get rid of key legend fill, and of the surrounding
    axis.line = element_line(colour = "black") # adding a black line for x and y axis
)

image.path <- function(filename) sprintf("../images/power/%s.svg", filename)
# Remove all images to avoid unused images to be kept.
unlink(image.path("*"))

1.2 Number of accidents

set.seed(2020-02-10)

P_with = 0.0012

run_trial <- function(n_samples, P) {
  X <- runif(1:n_samples, 0, 1)
  # Number of accidents after taking n samples.
  return(sum(X < P))
}

run_trials <- function(n_samples, P, n_trials) {
  results <- 1:n_trials %>% map(function(x) run_trial(n_samples, P))
  return(numbers2df(results))
}

my_round <- function(x) format(round(x, 1), nsmall = 1)
variance <- function() TeX('$\\mu^2')
mean_var <- function(X) sprintf("mean = %s, and variance = %s", my_round(mean(X)), my_round(var(X)))

n_samples = 100000 # sample size per trial.

ggplot_hist <- function(n_samples, name, P, n_trials) {
  df <- run_trials(n_samples, P, n_trials)
  print(mean_var(df$X))
  p <- ggplot(df, aes(X)) + 
    geom_histogram(binwidth = 1) +
    xlab("number of accidents per 100 000 right turns") +
    ylab("frequency") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme
  ggsave(file=image.path(name), plot=p, width=7, height=4, bg="transparent")
  return(p)
}

ggplot_dens <- function(n_samples, name, P, n_trials) {
  df <- run_trials(n_samples, P, n_trials)
  print(mean_var(df$X))
  p <- ggplot(df, aes(X)) + 
    geom_density() +
    xlab("number of accidents per 100 000 right turns") +
    scale_x_continuous(limits = c(0, 200)) + 
    transparent_theme +
    stat_function(fun=dbinom, args = list(size = 100000, prob = P_with), linetype="dashed") +
    annotate("text", 155, 0.038, label = "Number of accidents") +
    annotate("text", 90, 0.034, label = "B(100 000, 0.0012)")
  ggsave(file=image.path(name), plot=p, width=7, height=4, bg="transparent")
  return(p)
}

ggplot_hist(n_samples, "multiple-years", P_with, 20)
## [1] "mean = 119.7, and variance = 66.6"

ggplot_dens(n_samples, "many-years", P_with, 100)
## [1] "mean = 120.9, and variance = 106.6"