## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval = FALSE-------------------------------------------------------------
# # if (!require("remotes", quietly = TRUE))
# #     install.packages("remotes")
# #
# # remotes::install_github("statdivlab/radEmu")

## ----setup, message = FALSE---------------------------------------------------
library(magrittr)
library(dplyr)
library(ggplot2)
library(stringr)
library(radEmu)

## -----------------------------------------------------------------------------
data("wirbel_sample")
data("wirbel_otu")
mOTU_names <- colnames(wirbel_otu)

## -----------------------------------------------------------------------------
wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC"))

chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas")
mOTU_name_df <- data.frame(name = mOTU_names) %>% 
  mutate(base_name = stringr::str_remove(mOTU_names, "unknown ") %>%
                      stringr::str_remove("uncultured ")) %>%
  mutate(genus_name = stringr::word(base_name, 1))
restricted_mOTU_names <- mOTU_name_df %>%
  filter(genus_name %in% chosen_genera) %>%
  pull(name)

ch_study_obs <- which(wirbel_sample$Country %in% c("CHI"))

small_Y <- wirbel_otu[ch_study_obs, restricted_mOTU_names]
sum(rowSums(small_Y) == 0) # no samples have a count sum of 0 
sum(colSums(small_Y) == 0) # one category has a count sum of 0

category_to_rm <- which(colSums(small_Y) == 0)
small_Y <- small_Y[, -category_to_rm]
sum(colSums(small_Y) == 0)

## -----------------------------------------------------------------------------
ch_fit_default <- emuFit(formula = ~ Group, 
                         data = wirbel_sample[ch_study_obs, ],
                         Y = small_Y,
                         run_score_tests = FALSE) 

## -----------------------------------------------------------------------------
head(ch_fit_default$coef)

## -----------------------------------------------------------------------------
ch_fit_default$coef %>% ggplot(aes(x = estimate)) + geom_boxplot() + theme_bw() 

## -----------------------------------------------------------------------------
ref_taxon <- which(stringr::str_detect(colnames(small_Y), "Fusobacterium nucleatum s. animalis"))
ref_taxon # the index of F. nucleatum s. animalis in our Y matrix

## -----------------------------------------------------------------------------
# use `make_design_matrix` to find p, the number of columns in our design matrix
head(make_design_matrix(formula = ~ Group, data = wirbel_sample[ch_study_obs, ])) # p = 2
reference_constraint_lists <- make_reference_constraints(p = 2, j = ref_taxon)

## -----------------------------------------------------------------------------
ch_fit_ref <- emuFit(formula = ~ Group, 
                     data = wirbel_sample[ch_study_obs, ],
                     Y = small_Y,
                     run_score_tests = FALSE,
                     constraint_fn = reference_constraint_lists$constraint_list,
                     constraint_grad_fn = reference_constraint_lists$constraint_grad_list) 

## -----------------------------------------------------------------------------
head(ch_fit_ref$coef)

## -----------------------------------------------------------------------------
ch_fit_ref$coef %>% ggplot(aes(x = estimate)) + geom_boxplot() + theme_bw() 

## -----------------------------------------------------------------------------
ch_test_default <- emuFit(formula = ~ Group, 
                         data = wirbel_sample[ch_study_obs, ],
                         Y = small_Y,
                         fitted_model = ch_fit_default, # provide our previous fit
                         refit = FALSE, # avoid refitting model
                         # test the first taxon, F. nucleatum s. vincentii
                         test_kj = data.frame(j = 1, 
                         # test the second column in the design matrix
                         # associated with the case/control covariate
                                              k = 2)) 

## -----------------------------------------------------------------------------
head(ch_test_default$coef)

## -----------------------------------------------------------------------------
ch_test_ref <- emuFit(formula = ~ Group, 
                      data = wirbel_sample[ch_study_obs, ],
                      Y = small_Y,
                      # constraint functions from above 
                      constraint_fn = reference_constraint_lists$constraint_list,
                      constraint_grad_fn = reference_constraint_lists$constraint_grad_list, 
                      fitted_model = ch_fit_ref, # provide our previous fit
                      refit = FALSE, # avoid refitting model
                      # test the first taxon, F. nucleatum s. vincentii
                      test_kj = data.frame(j = 1, 
                      # test the second column in the design matrix
                      # associated with the case/control covariate
                                           k = 2)) 

## -----------------------------------------------------------------------------
head(ch_test_ref$coef)