## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # # if (!require("remotes", quietly = TRUE)) # # install.packages("remotes") # # # # remotes::install_github("statdivlab/radEmu") ## ----setup, message = FALSE--------------------------------------------------- library(magrittr) library(dplyr) library(ggplot2) library(stringr) library(radEmu) ## ----------------------------------------------------------------------------- data("wirbel_sample") data("wirbel_otu") mOTU_names <- colnames(wirbel_otu) ## ----------------------------------------------------------------------------- wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC")) chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas") mOTU_name_df <- data.frame(name = mOTU_names) %>% mutate(base_name = stringr::str_remove(mOTU_names, "unknown ") %>% stringr::str_remove("uncultured ")) %>% mutate(genus_name = stringr::word(base_name, 1)) restricted_mOTU_names <- mOTU_name_df %>% filter(genus_name %in% chosen_genera) %>% pull(name) ch_study_obs <- which(wirbel_sample$Country %in% c("CHI")) small_Y <- wirbel_otu[ch_study_obs, restricted_mOTU_names] sum(rowSums(small_Y) == 0) # no samples have a count sum of 0 sum(colSums(small_Y) == 0) # one category has a count sum of 0 category_to_rm <- which(colSums(small_Y) == 0) small_Y <- small_Y[, -category_to_rm] sum(colSums(small_Y) == 0) ## ----------------------------------------------------------------------------- ch_fit_default <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, run_score_tests = FALSE) ## ----------------------------------------------------------------------------- head(ch_fit_default$coef) ## ----------------------------------------------------------------------------- ch_fit_default$coef %>% ggplot(aes(x = estimate)) + geom_boxplot() + theme_bw() ## ----------------------------------------------------------------------------- ref_taxon <- which(stringr::str_detect(colnames(small_Y), "Fusobacterium nucleatum s. animalis")) ref_taxon # the index of F. nucleatum s. animalis in our Y matrix ## ----------------------------------------------------------------------------- # use `make_design_matrix` to find p, the number of columns in our design matrix head(make_design_matrix(formula = ~ Group, data = wirbel_sample[ch_study_obs, ])) # p = 2 reference_constraint_lists <- make_reference_constraints(p = 2, j = ref_taxon) ## ----------------------------------------------------------------------------- ch_fit_ref <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, run_score_tests = FALSE, constraint_fn = reference_constraint_lists$constraint_list, constraint_grad_fn = reference_constraint_lists$constraint_grad_list) ## ----------------------------------------------------------------------------- head(ch_fit_ref$coef) ## ----------------------------------------------------------------------------- ch_fit_ref$coef %>% ggplot(aes(x = estimate)) + geom_boxplot() + theme_bw() ## ----------------------------------------------------------------------------- ch_test_default <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, fitted_model = ch_fit_default, # provide our previous fit refit = FALSE, # avoid refitting model # test the first taxon, F. nucleatum s. vincentii test_kj = data.frame(j = 1, # test the second column in the design matrix # associated with the case/control covariate k = 2)) ## ----------------------------------------------------------------------------- head(ch_test_default$coef) ## ----------------------------------------------------------------------------- ch_test_ref <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, # constraint functions from above constraint_fn = reference_constraint_lists$constraint_list, constraint_grad_fn = reference_constraint_lists$constraint_grad_list, fitted_model = ch_fit_ref, # provide our previous fit refit = FALSE, # avoid refitting model # test the first taxon, F. nucleatum s. vincentii test_kj = data.frame(j = 1, # test the second column in the design matrix # associated with the case/control covariate k = 2)) ## ----------------------------------------------------------------------------- head(ch_test_ref$coef)