## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # # if (!require("remotes", quietly = TRUE)) # # install.packages("remotes") # # # # remotes::install_github("statdivlab/radEmu") ## ----setup, message = FALSE--------------------------------------------------- library(magrittr) library(dplyr) library(ggplot2) library(stringr) library(radEmu) ## ----------------------------------------------------------------------------- data("wirbel_sample") dim(wirbel_sample) head(wirbel_sample) ## ----------------------------------------------------------------------------- wirbel_sample %>% group_by(Group) %>% summarize(count = n()) ## ----------------------------------------------------------------------------- wirbel_sample %>% group_by(Country) %>% summarize(count = n()) ## ----------------------------------------------------------------------------- wirbel_sample %>% group_by(Country, Group) %>% summarize(n = n()) ## ----------------------------------------------------------------------------- data("wirbel_otu") dim(wirbel_otu) # let's check out a subset wirbel_otu[1:5, 1:3] ## ----------------------------------------------------------------------------- mOTU_names <- colnames(wirbel_otu) ## ----------------------------------------------------------------------------- wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC")) ## ----------------------------------------------------------------------------- chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas") mOTU_name_df <- data.frame(name = mOTU_names) %>% mutate(base_name = stringr::str_remove(mOTU_names, "unknown ") %>% stringr::str_remove("uncultured ")) %>% mutate(genus_name = stringr::word(base_name, 1)) restricted_mOTU_names <- mOTU_name_df %>% filter(genus_name %in% chosen_genera) %>% pull(name) ## ----------------------------------------------------------------------------- ch_study_obs <- which(wirbel_sample$Country %in% c("CHI")) ## ----------------------------------------------------------------------------- small_Y <- wirbel_otu[ch_study_obs, restricted_mOTU_names] sum(rowSums(small_Y) == 0) # no samples have a count sum of 0 sum(colSums(small_Y) == 0) # one category has a count sum of 0 category_to_rm <- which(colSums(small_Y) == 0) small_Y <- small_Y[, -category_to_rm] sum(colSums(small_Y) == 0) ## ----------------------------------------------------------------------------- ch_fit <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, run_score_tests = FALSE) ## ----------------------------------------------------------------------------- ch_fit ## ----fig.height = 6, fig.width = 6-------------------------------------------- plot(ch_fit)$plots ## ----fig.height = 6, fig.width = 6-------------------------------------------- #create data frame that has simplified names for taxa taxa_names <- data.frame("category" = ch_fit$coef$category) %>% mutate(cat_small = stringr::str_remove(paste0("mOTU_", stringr::str_split(category, 'mOTU_v2_', simplify = TRUE)[, 2]), "\\]")) #produce plot with cleaner taxa labels plot(ch_fit, taxon_names = taxa_names)$plots ## ----------------------------------------------------------------------------- taxa_to_test <- c(which(str_detect(restricted_mOTU_names, "0777")), which(str_detect(restricted_mOTU_names, "7116"))) design <- make_design_matrix(formula = ~ Group, data = wirbel_sample[ch_study_obs, ]) colnames(design) covariate_to_test <- 2 # we see in the previous line that the covariate we care about corresponds to the second column of the design matrix ch_fit$B %>% rownames two_robust_score_tests <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], B = ch_fit$B, test_kj = data.frame(k = covariate_to_test, j = taxa_to_test), Y = small_Y) ## ----------------------------------------------------------------------------- two_robust_score_tests$coef[taxa_to_test, c("covariate", "category", "estimate", "pval")] ## ----------------------------------------------------------------------------- data.frame(counts = wirbel_otu[ch_study_obs, "unknown Eubacterium [meta_mOTU_v2_7116]"], group = wirbel_sample$Group[ch_study_obs]) %>% mutate(eubact_present = counts > 0) %>% group_by(group, eubact_present) %>% count() ## ----------------------------------------------------------------------------- data.frame(counts = wirbel_otu[ch_study_obs, "Fusobacterium nucleatum s. nucleatum [ref_mOTU_v2_0777]"], group = wirbel_sample$Group[ch_study_obs]) %>% mutate(fuso_present = counts > 0) %>% group_by(group, fuso_present) %>% count() ## ----eval = FALSE------------------------------------------------------------- # test_all <- emuFit(formula = ~ Group, # data = wirbel_sample[ch_study_obs, ], # B = ch_fit$B, # Y = small_Y, # run_score_tests = TRUE, # test_kj = data.frame(k = covariate_to_test, # j = 1:ncol(small_Y))) ## ----eval=FALSE--------------------------------------------------------------- # all_fit <- emuFit(formula = ~ Group + Study + Gender + # Age_spline.1 + Age_spline.2 + # BMI_spline.1 + BMI_spline.2 + Sampling, # data = wirbel_sample, # Y = wirbel_otu[, restricted_mOTU_names], # run_score_tests = FALSE) ## ----eval = FALSE------------------------------------------------------------- # age_spline <- splines2::bSpline(wirbel_sample$Age, degree = 1, knots = median(wirbel_sample$Age)) # age_spline[,1] <- (age_spline[,1] - mean(age_spline[,1]))/sd(age_spline[,1]) # age_spline[,2] <- (age_spline[,2] - mean(age_spline[,2]))/sd(age_spline[,2]) # # bmi_spline <- splines2::bSpline(wirbel_sample$BMI, degree = 1, knots = median(wirbel_sample$BMI)) # bmi_spline[,1] <- (bmi_spline[,1] - mean(bmi_spline[,1]))/sd(bmi_spline[,1]) # bmi_spline[,2] <- (bmi_spline[,2] - mean(bmi_spline[,2]))/sd(bmi_spline[,2])