bonferroni.Rmd 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. ---
  2. title: "Multiple Comparisons and the Bonferroni Correction"
  3. author: "Will Koehrsen"
  4. date: "January 25, 2018"
  5. output: html_document
  6. ---
  7. ```{r setup, include=FALSE}
  8. knitr::opts_chunk$set(echo = TRUE)
  9. ```
  10. # Multiple Comparison
  11. We have a feature vector of 100 random data points. We want to test this against
  12. 100 other vectors. We will use an $\alpha = 0.05$ for all of our comparisons.
  13. ```{r}
  14. # Create a random vector of 100 x values
  15. x <- 5 * abs(rnorm(100))
  16. # 100 random vectors of 100 random values
  17. y <- runif(1e4, 0, 5)
  18. y <- matrix(y, nrow=100, ncol=100)
  19. df <- as.data.frame(y)
  20. df$x <- x
  21. p_values <- as.data.frame(matrix(ncol=2, nrow=100))
  22. names(p_values) <- c('variable', 'p_value')
  23. i = 1
  24. for (term in names(df)) {
  25. if (term != 'x') {
  26. m <- lm(df$x ~ df[[term]])
  27. r <- summary(m)
  28. p <- r$coefficients[2, 4]
  29. p_values[i, 'variable'] = term
  30. p_values[i, 'p_value'] = p
  31. i = i + 1
  32. }
  33. }
  34. ```
  35. ```{r}
  36. library(tidyverse)
  37. library(ggthemes)
  38. p_values <- arrange(p_values, p_value)
  39. sig_vars <- dplyr::filter(p_values, p_value < 0.05)$variable
  40. sig_df <- df[, c(sig_vars)]
  41. sig_df <- gather(sig_df)
  42. x <- rep(df$x, 4)
  43. ggplot(data = sig_df) + geom_point(aes(x = x, y = sig_df$value, color = sig_df$key)) +
  44. geom_smooth(method= 'lm', se = FALSE, aes(x = x, y = sig_df$value, color = sig_df$key)) +
  45. theme_fivethirtyeight(14) + xlab('x') + ylab('Y') + ggtitle('Y vs X for Random Variables')
  46. ```