123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- ---
- title: "Multiple Comparisons and the Bonferroni Correction"
- author: "Will Koehrsen"
- date: "January 25, 2018"
- output: html_document
- ---
- ```{r setup, include=FALSE}
- knitr::opts_chunk$set(echo = TRUE)
- ```
- # Multiple Comparison
- We have a feature vector of 100 random data points. We want to test this against
- 100 other vectors. We will use an $\alpha = 0.05$ for all of our comparisons.
- ```{r}
- # Create a random vector of 100 x values
- x <- 5 * abs(rnorm(100))
- # 100 random vectors of 100 random values
- y <- runif(1e4, 0, 5)
- y <- matrix(y, nrow=100, ncol=100)
- df <- as.data.frame(y)
- df$x <- x
- p_values <- as.data.frame(matrix(ncol=2, nrow=100))
- names(p_values) <- c('variable', 'p_value')
- i = 1
- for (term in names(df)) {
- if (term != 'x') {
- m <- lm(df$x ~ df[[term]])
- r <- summary(m)
- p <- r$coefficients[2, 4]
- p_values[i, 'variable'] = term
- p_values[i, 'p_value'] = p
- i = i + 1
- }
- }
- ```
- ```{r}
- library(tidyverse)
- library(ggthemes)
- p_values <- arrange(p_values, p_value)
- sig_vars <- dplyr::filter(p_values, p_value < 0.05)$variable
- sig_df <- df[, c(sig_vars)]
- sig_df <- gather(sig_df)
- x <- rep(df$x, 4)
- ggplot(data = sig_df) + geom_point(aes(x = x, y = sig_df$value, color = sig_df$key)) +
- geom_smooth(method= 'lm', se = FALSE, aes(x = x, y = sig_df$value, color = sig_df$key)) +
- theme_fivethirtyeight(14) + xlab('x') + ylab('Y') + ggtitle('Y vs X for Random Variables')
- ```
|