Browse Source

Multiple Comparisons

Will Koehrsen 7 years ago
parent
commit
0cebbd3847

+ 57 - 0
statistical_significance/bonferroni.Rmd

@@ -0,0 +1,57 @@
+---
+title: "Multiple Comparisons and the Bonferroni Correction"
+author: "Will Koehrsen"
+date: "January 25, 2018"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+# Multiple Comparison
+
+We have a feature vector of 100 random data points. We want to test this against 
+100 other vectors. We will use an $\alpha = 0.05$ for all of our comparisons. 
+
+```{r}
+# Create a random vector of 100 x values
+x <- 5 * abs(rnorm(100))
+
+# 100 random vectors of 100 random values
+y <- runif(1e4, 0, 5)
+y <- matrix(y, nrow=100, ncol=100)
+df <- as.data.frame(y)
+df$x <- x
+p_values <- as.data.frame(matrix(ncol=2, nrow=100))
+names(p_values) <- c('variable', 'p_value')
+
+i = 1
+for (term in names(df)) {
+  if (term != 'x') {
+  m <- lm(df$x ~ df[[term]])
+  r <- summary(m)
+  p <- r$coefficients[2, 4]
+  p_values[i, 'variable'] = term
+  p_values[i, 'p_value'] = p
+  i = i + 1
+  }
+}
+
+
+```
+
+```{r}
+library(tidyverse)
+library(ggthemes)
+p_values <- arrange(p_values, p_value)
+
+sig_vars <- dplyr::filter(p_values, p_value < 0.05)$variable
+sig_df <- df[, c(sig_vars)]
+sig_df <- gather(sig_df)
+x <- rep(df$x, 4)
+
+ggplot(data = sig_df) + geom_point(aes(x = x, y = sig_df$value, color = sig_df$key)) + 
+  geom_smooth(method= 'lm', se = FALSE, aes(x = x, y = sig_df$value, color = sig_df$key)) + 
+  theme_fivethirtyeight(14) + xlab('x') + ylab('Y') + ggtitle('Y vs X for Random Variables')
+
+```

BIN
statistical_significance/images/no_correction.png


BIN
statistical_significance/images/spurious.png


BIN
statistical_significance/images/with_correction.png


File diff suppressed because it is too large
+ 161 - 0
statistical_significance/multiple_comparisons.ipynb


+ 22 - 0
statistical_significance/style/bmh_matplotlibrc.json

@@ -0,0 +1,22 @@
+{
+  "lines.linewidth": 2.0,
+  "axes.edgecolor": "#bcbcbc",
+  "patch.linewidth": 0.5,
+  "legend.fancybox": true,
+  "axes.color_cycle": [
+    "#348ABD",
+    "#A60628",
+    "#7A68A6",
+    "#467821",
+    "#CF4457",
+    "#188487",
+    "#E24A33"
+  ],
+  "axes.facecolor": "#eeeeee",
+  "axes.labelsize": "large",
+  "axes.grid": true,
+  "patch.edgecolor": "#eeeeee",
+  "axes.titlesize": "x-large",
+  "svg.fonttype": "path",
+  "examples.directory": ""
+}