## ----configure-knitr, include = FALSE-----------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = '#>')
options(rmarkdown.html_vignette.check_title = FALSE)

## ----clear-memory, eval = TRUE------------------------------------------------
rm(list = ls())

## -----------------------------------------------------------------------------
execute.vignette <- FALSE

## ----invitro-data, eval = execute.vignette------------------------------------
# ac50.dt <- httk::thyroid.ac50s
# 

## ----libraries, message = FALSE, eval = execute.vignette----------------------
# library(httk)
# library(data.table)
# library(dplyr)
# library(openxlsx)
# library(ggpubr)
# library(cowplot)
# library(ggrepel)
# library(latex2exp)
# library(viridis)
# library(scales)
# library(ggstar)

## ----exp-TK-chems, warning = FALSE, eval = execute.vignette-------------------
# # how many chems had experimental TK params?
# human.data.pre <- subset(get_cheminfo(info = 'all', species = 'Human',
#                                       model = '3compartmentss'),
#                          DTXSID %in% ac50.dt$dsstox_substance_id)
# 
# nrow(human.data.pre)
# 

## ----insilico-preds, message=FALSE, warning=FALSE, eval = execute.vignette----
# # predict Clint/fup values for missing chems
# # this loads new chem.physical_and_invitro.data table into the global env
# load_dawson2021() # most data for ToxCast chems
# load_sipes2017() # most data for pharma compounds
# load_pradeep2020() # best ML method for in silico prediction
# 

## ----missing-chems, warning = FALSE, eval = execute.vignette------------------
# human.httk.dt <- subset(ac50.dt, dsstox_substance_id %in% get_cheminfo(info = 'DTXSID',
#                                                                        species = 'Human',
#                                                                        model = '3compartmentss',
#                                                                        physchem.exclude = FALSE,
#                                                                        median.only = TRUE))
# 
# missing.dtxsids <- setdiff(ac50.dt$dsstox_substance_id, human.httk.dt$dsstox_substance_id)
# 
# View(ac50.dt[dsstox_substance_id %in% missing.dtxsids])
# 

## ----3compss-aeds, warning = FALSE, message = FALSE, eval = execute.vignette----
# 
# for (i in 1:nrow(human.httk.dt)) {
# 
#   message('Calculating for chemical: ', human.httk.dt$chnm[i], '\n')
#   set.seed(123)
#   out <- calc_mc_css(dtxsid = human.httk.dt$dsstox_substance_id[i],
#                      species = 'Human',
#                      daily.dose = 1,
#                      which.quantile = c(0.5, 0.95),
#                      model = '3compartmentss',
#                      output.units = 'uM',
#                      parameterize.args.list=list(physchem.exclude=FALSE),
#                      suppress.messages = TRUE)
# 
#   human.httk.dt$mc.css50[i] <- out["50%"]
# 
#   set.seed(123)
#   oralequiv.out <- calc_mc_oral_equiv(conc = human.httk.dt$min_ac50[i],
#                                       dtxsid = human.httk.dt$dsstox_substance_id[i],
#                                       species = 'Human',
#                                       which.quantile = c(0.5, 0.95),
#                                       input.units = 'uM',
#                                       output.units = 'mgpkgpday',
#                                       model = '3compartmentss',
#                                       restrictive.clearance = TRUE,
#                                       parameterize.args.list=list(physchem.exclude=FALSE),
#                                       suppress.messages = TRUE)
# 
#    human.httk.dt$oral_equiv50[i] <- oralequiv.out["50%"]
# }
# 
# human.httk.dt[, calc.aed50 := min_ac50/mc.css50]
# human.httk.dt[, fold_diff50 := log10(oral_equiv50/calc.aed50)]
# 

## ----seem3, eval = execute.vignette-------------------------------------------
# 
# # let's use the aed values from the division
# human.httk.dt2 <- human.httk.dt[, c('dsstox_substance_id', 'chnm',
#                                     'DIO1', 'DIO2', 'DIO3', 'IYD', 'min_ac50',
#                                     'mc.css50', 'calc.aed50')]
# 
# SEEM <- httk::truong25.seem3
# 
# # integrate SEEM3 predictions with merge
# human.httk.dt2.seem3 <- merge.data.table(human.httk.dt2,
#                                          SEEM[, c("dsstox_substance_id", "seem3", "seem3.u95")],
#                                          by = c("dsstox_substance_id"),
#                                          all.x = TRUE)
# 
# human.httk.dt2.seem3[, plasmaBER50 := calc.aed50/seem3.u95]
# human.httk.dt2.seem3[, log.pBER50 := log10(plasmaBER50)]
# 
# ivive.moe.tb <- human.httk.dt2.seem3
# setnames(ivive.moe.tb, "dsstox_substance_id", "dtxsid")
# 

## ----half-lives, warning = FALSE, eval = execute.vignette---------------------
# for (i in 1:nrow(ivive.moe.tb)) {
#   ivive.moe.tb$half.life[i] <- calc_half_life(dtxsid = ivive.moe.tb$dtxsid[i], # t1/2 in hours
#                                               physchem.exclude = FALSE)
#   ivive.moe.tb$half.life.days[i] <- ivive.moe.tb$half.life[i]/24 # in days
# }
# 

## ----fullpreg-sim, message = FALSE, warning = FALSE, eval = execute.vignette----
# 
# # tissues where DIO enzymes live
# impacted_tissues <- c('Cliver', 'Cthyroid', 'Cfliver', 'Cfbrain', 'Cfthyroid',
#                       'Cplacenta', 'Cconceptus', 'Cplasma')
# targets <- c('DIO1', 'DIO2', 'DIO3', 'IYD')
# 
# # max conc in every tissue for every chemical
# Cmax.tissues <- data.frame(matrix(NA, ncol = length(impacted_tissues) + 1,
#                                   nrow = nrow(ivive.moe.tb),
#                                   dimnames = list(NULL, c('dtxsid', impacted_tissues))))
# 
# for(i in 1:nrow(ivive.moe.tb)) {
# 
#   sol.out <- solve_full_pregnancy(dtxsid = ivive.moe.tb$dtxsid[i],
#                             daily.dose = 1,
#                             doses.per.day = 1,
#                             time.course = seq(0, 40*7, 1/24), # view solution by hour
#                             track.vars = impacted_tissues,
#                             physchem.exclude = FALSE)
# 
# 
#   # get max of every column re: compt
#   Cmax.tissues[i, impacted_tissues] <- apply(sol.out[, impacted_tissues], 2, max, na.rm = T)
#   Cmax.tissues[i, 'dtxsid'] <- ivive.moe.tb$dtxsid[i]
# 
# }
# 

## ----preg-aeds, eval = execute.vignette---------------------------------------
# tissue.aeds <- list()
# 
# # tissues specific for each target
# tissue_list <- list(DIO1 = c('Cliver', 'Cfliver', 'Cthyroid', 'Cfthyroid', 'Cconceptus'),
#                     DIO2 = c('Cthyroid', 'Cfthyroid', 'Cfbrain', 'Cconceptus'),
#                     DIO3 = c('Cthyroid', 'Cfthyroid', 'Cfbrain', 'Cplacenta', 'Cconceptus'),
#                     IYD = c('Cthyroid', 'Cfthyroid', 'Cconceptus'))
# 
# for (i in names(tissue_list)) {
#   active.chem.ind <- which(!is.na(ivive.moe.tb[, ..i]))
#   n.chems <- length(active.chem.ind)
# 
#   empty.mat <- matrix(NA, nrow = n.chems, ncol = length(tissue_list[[i]]) + 1)
#   df.target <- data.frame(empty.mat)
#   colnames(df.target) <- c('dtxsid', tissue_list[[i]])
# 
#   df.target[, 2: (length(tissue_list[[i]])+1) ] <- mapply(function(x,y) x/y, ivive.moe.tb[active.chem.ind, ..i], Cmax.tissues[active.chem.ind, tissue_list[[i]]])
# 
#   df.target$dtxsid <- ivive.moe.tb$dtxsid[active.chem.ind]
#   tissue.aeds[[i]] <- df.target
# }
# 

## ----fig9-wrangling, eval = execute.vignette----------------------------------
# impacted_tissues <- c('Cliver', 'Cthyroid', 'Cfliver', 'Cfbrain', 'Cfthyroid',
#                       'Cplacenta', 'Cconceptus', 'Cplasma')
# 
# get_ber_data <- function(name, df) {
# 
#   df$exposure <- ivive.moe.tb$seem3.u95[match(df$dtxsid, ivive.moe.tb$dtxsid)]
#   df.m <- df %>% select(-exposure) %>%
#     reshape2::melt(id.vars = c('dtxsid'),
#                    variable.name = 'compt', value.name = 'aed')
#   df.m$exposure <- ivive.moe.tb$seem3.u95[match(df.m$dtxsid, ivive.moe.tb$dtxsid)]
#   df.m$ber = df.m$aed/df.m$exposure
#   df.m2 <- df.m %>%
#     select(-exposure) %>%
#     reshape2::melt(id.vars = c('dtxsid', 'compt', 'ber'))
# 
#   # add melted ExpoCast data as rows for each unique chem
#   expocast.dat <- df %>% select(dtxsid, exposure) %>%
#     reshape2::melt(id.vars = c('dtxsid'))
# 
#   df.m2 <- bind_rows(df.m2, expocast.dat)
#   df.m2$value <- log10(df.m2$value)
#   df.m2$ber <- log10(df.m2$ber)
#   df.m2$chnm <- ivive.moe.tb$chnm[match(df.m2$dtxsid, ivive.moe.tb$dtxsid)]
#   df.m2$enzyme <- name
#   return(df.m2)
# }
# 
# new.list <- Map(get_ber_data, names(tissue.aeds), tissue.aeds)
# ggdata = bind_rows(new.list)
# setDT(ggdata)
# 
# # order chemicals within each enzyme group by the most sensitive tissue/lifestage BER
# ggdata[, min_ber := min(ber, na.rm = TRUE), keyby = .(enzyme, dtxsid)]
# ggdata <- ggdata[order(min_ber), .SD, keyby = .(enzyme)]
# 
# # exposure values are dependent on dtxsid not httk compt
# ggdata[variable == 'exposure', compt := 'exposure']
# 
# # make a deep copy or else it will be modified by reference
# tissue.bers <- copy(ggdata)
# 
# # round all numerical values including BERs to 2 sigfigs
# num.cols <- c("value", "ber", "min_ber")
# tissue.bers[, c(num.cols) := lapply(.SD, signif, digits = 2), .SDcols = num.cols]
# 

## ----fig9-plt, eval = execute.vignette----------------------------------------
# min_val <- tissue.bers[min_ber <= 3, min(value), by = enzyme][, min(V1)]
# max_val <- tissue.bers[min_ber <= 3, max(value), by = enzyme][, max(V1)]
# ylims <- c(min_val, max_val)
# 
# # generate a set of 3 viridis colors for lifestage
# # yellow represents neither maternal or fetal, i.e., conceptus
# my_viridis_colors <- viridis(n = 3)
# 
# listgg <- list()
# for(k in names(tissue.aeds)) {
# 
#   aeds <- tissue.bers[enzyme == k & ber <= 3]
#   chems <- tissue.bers[enzyme == k & ber <= 3, dtxsid]
#   exposures <- tissue.bers[enzyme == k & variable == "exposure" & dtxsid %in% chems]
# 
#   listgg[[k]] <- ggplot(data = rbind(aeds,exposures),
#                         mapping = aes(x = reorder(chnm, -min_ber),
#                                       y = value)) +
#     geom_star(aes(starshape = compt, fill = compt, color = compt),
#               alpha = 0.65, size = 1.25, # default: 1.5
#               starstroke = 0.25, # default: 0.5
#               show.legend = T) +
#     scale_y_continuous(breaks = seq(ylims[1], ylims[2], 1),
#                        limits = ylims) +
#     scale_starshape_manual(name = "Compartment corresponding to AED \n or Exposure",
#                        labels = c("maternal liver", "fetal liver",
#                                   "maternal thyroid", "fetal thyroid",
#                                   "conceptus",
#                                   "fetal brain",
#                                   "placenta", "exposure"),
#                        values = c('Cfbrain' = 15,
#                                   'exposure' = 28,
#                                   'Cliver' = 11,
#                                   'Cfliver' = 11,
#                                   'Cplacenta' = 13,
#                                   'Cthyroid' = 1,
#                                   'Cfthyroid' = 1,
#                                   'Cconceptus' = 23),
#                        drop = F) + # this makes sure all levels of a factor are displayed even if unused
#     scale_fill_manual(name = "Compartment corresponding to AED \n or Exposure",
#                        labels = c("maternal liver", "fetal liver",
#                                   "maternal thyroid", "fetal thyroid",
#                                   "conceptus",
#                                   "fetal brain",
#                                   "placenta", "exposure"),
#                        values = c('Cfbrain' = my_viridis_colors[2],
#                                   'exposure' = 'dimgrey',
#                                   'Cliver' = my_viridis_colors[1],
#                                   'Cfliver' = my_viridis_colors[2],
#                                   'Cplacenta' = '#990066',
#                                   'Cthyroid' = my_viridis_colors[1],
#                                   'Cfthyroid' = my_viridis_colors[2],
#                                   'Cconceptus' = '#73D055FF'),
#                        drop = F) +
#      scale_color_manual(name = "Compartment corresponding to AED \n or Exposure",
#                        labels = c("maternal liver", "fetal liver",
#                                   "maternal thyroid", "fetal thyroid",
#                                   "conceptus",
#                                   "fetal brain",
#                                   "placenta", "exposure"),
#                        values = c('Cfbrain' = my_viridis_colors[2],
#                                   'exposure' = 'dimgrey',
#                                   'Cliver' = my_viridis_colors[1],
#                                   'Cfliver' = my_viridis_colors[2],
#                                   'Cplacenta' = '#990066',
#                                   'Cthyroid' = my_viridis_colors[1],
#                                   'Cfthyroid' = my_viridis_colors[2],
#                                   'Cconceptus' = '#73D055FF'),
#                        drop = F) +
#     labs(x = 'Chemical', y = 'Oral AED or Exposure (log10 mg/kg-bw/day)', title = k) +
#     theme_bw() +
#     theme(legend.position = "none",
#           plot.title = element_text(size = 8, face = "bold"),
#           plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm"),
#           axis.title = element_text(size = 6),
#           axis.text.y = element_text(size = 5),
#           axis.text.x = element_text(size = 6),
#           legend.text = element_text(size = 6),
#           legend.title = element_text(size = 6, hjust = 0.5))+
#     coord_flip()
# }
# 
# # remove extra axis labels
# listgg$IYD <- listgg$IYD + labs(x = '', y = '')
# listgg$DIO3 <- listgg$DIO3 +
#   labs(x = '')
# listgg$DIO1 <- listgg$DIO1 + labs(y = '')
# 
# # arrange DIO1 and DIO2 with equal sizes since they have ~20 chems each (21 vs 24)
# pcol1 <- plot_grid(listgg$DIO1, listgg$DIO2,
#                   align = "hv", nrow = 2,
#                   rel_heights = c(1,1.1))
# 
# # make DIO3 3x taller than IYD (44 chems vs 4)
# pcol2 <- plot_grid(listgg$IYD, listgg$DIO3,
#                    align = "hv", nrow = 2,
#                    rel_heights = c(1,3))
# 
# # combine two columns of plots
# prow <- plot_grid(pcol1, pcol2,
#                     align = "h", ncol = 2,
#                     rel_widths = c(1,1))
# 
# # extract legend from one of the plots
# grobs <- ggplotGrob(
#   listgg$DIO2 +
#   guides(
#     starshape = guide_legend(ncol = 2, byrow = F,
#                              keyheight = unit(0.25, "cm")),
#     fill = guide_legend(ncol = 2, byrow = F,
#                         keyheight = unit(0.25, "cm")),
#     color = guide_legend(ncol = 2, byrow = F,
#                          keyheight = unit(0.25, "cm")),
#   ) +
#   theme(legend.position = "bottom")
#   )$grobs
# legend2 <- grobs[[which(sapply(grobs, function(x) x$name) == "guide-box")]]
# 
# # add the legend underneath the plots; make it 10% of the height of the combined plots
# fig9 <- plot_grid(prow, legend2,
#           nrow = 2, rel_heights = c(0.88,0.12)) +
#    theme(plot.background = element_rect(fill = "white", color = "white"))
# 
# # ggsave(plot = fig9,
# #        units = "mm",
# #        dpi = 300,
# #        width = 190, height = 150,
# #        device = "png",
# #        filename = "./figures/preg_prioritization_by_BER.png")
# 

## ----table-8, warning = FALSE, eval = execute.vignette------------------------
# targets <- c('DIO1', 'DIO2', 'DIO3', 'IYD')
# 
# # DATA WRANGLING
# tissue.bers[min_ber == ber, most_sensitive_tissue := compt]
# 
# # table matching order of chemicals in enzyme subplots with all calculated BERs
# # every enzyme x chem x min BER constitutes a row
# minber.by.chem <- tissue.bers[!is.na(most_sensitive_tissue),
#                        .(chnm, min_ber, most_sensitive_tissue), by = .(enzyme, dtxsid)]
# 
# # check if there are multiple most-sensitive tissues per chem in each enzyme group
# minber.by.chem[, if(.N > 1) .SD, by = .(enzyme, dtxsid)][, length(unique(dtxsid))] #70 chems
# 
# # collapse multiple most-sensitive tissues per chem in each enzyme group
# min.ber.reduced <- minber.by.chem[, most_sensitive_compts := paste(most_sensitive_tissue, collapse = ", "), by = .(enzyme, dtxsid)][, .(chnm, min_ber, most_sensitive_compts), by = .(enzyme, dtxsid)]
# min.ber.reduced <- unique(min.ber.reduced)
# 
# # min BER matrix
# min.ber.mat <- data.table::dcast(min.ber.reduced[, 1:4],
#                      dtxsid + chnm ~ enzyme, value.var = c('min_ber'))
# min.ber.mat[, min_across_enzyme := pmin(DIO1, DIO2, DIO3, IYD, na.rm = T)]
# min.ber.mat <- min.ber.mat[order(min_across_enzyme)]
# 
# # find the target corresponding to the min_across_enzyme
# min.ber.mat[, enzyme := mapply(function(x) targets[x], apply(min.ber.mat[, targets, with = FALSE], 1, which.min))]
# 
# # merge data on most sensitive tissues reflecting min BER
# new.ranking.tissues <- merge.data.table(min.ber.mat,
#                                  minber.by.chem[, .(dtxsid, chnm, enzyme, min_ber, most_sensitive_tissue)],
#                                  by = c('dtxsid', 'chnm', 'enzyme'),
#                                  all.x = TRUE)
# setnames(new.ranking.tissues, old = 'enzyme', new = 'target_min')
# 
# # round all BERs to 2 sigfigs for easy table viewing
# ranked.by.plasma <- ivive.moe.tb[, lmoe50 := signif(log.pBER50, digits = 2)][order(lmoe50)]
# 
# final.new.ranking <- new.ranking.tissues[, -c("target_min", "min_ber")]
# final.new.ranking <- final.new.ranking[order(min_across_enzyme)]
# 
# # comparing tissue BERs from full preg model with plasma BERs from 3compss model
# plasma.tissue.bers <- merge.data.table(ranked.by.plasma[, c('dtxsid', 'chnm', 'lmoe50')],
#                            final.new.ranking[, -c('chnm')],
#                            by = c('dtxsid'))
# 
# setnames(plasma.tissue.bers, old = colnames(plasma.tissue.bers)[3:8],
#          new = c('plasma_BER50', 'DIO1_BER', 'DIO2_BER', 'DIO3_BER', 'IYD_BER', 'min_BER'))
# 
# plasma.tissue.bers <- plasma.tissue.bers[order(min_BER)]
# 

## ----fig10, eval = execute.vignette-------------------------------------------
# # merge min BER with plasma BER for each chem with unique row for each chem
# # ignoring multiple sensitive tissues
# ber.comp <- merge.data.table(ranked.by.plasma[, c('dtxsid', 'chnm', 'lmoe50')],
#                              min.ber.mat[, -c("chnm")],
#                              by = c("dtxsid"))
# 
# setnames(ber.comp, old = colnames(ber.comp)[3:8],
#          new = c('plasma_BER50', 'DIO1_BER', 'DIO2_BER', 'DIO3_BER', 'IYD_BER', 'min_BER'))
# 
# max_ber <- max(ber.comp[, c('plasma_BER50', 'min_BER')], na.rm = T)
# min_ber <- min(ber.comp[, c('plasma_BER50', 'min_BER')], na.rm = T)
# ber.lims <- c(floor(min_ber), ceiling(max_ber))
# 
# # chems with labels
# chem.labels <- ber.comp[min_BER < 0][order(min_BER)][1:6, chnm]
# 
# # theme for the next 2 figures
# my_theme <- theme_bw() +
#   theme(axis.title = element_text(size = 8, face = "bold"),
#         axis.text = element_text(size = 7))
# 
# preg.vs.nonpreg.pp <- ggplot(data = ber.comp, aes(x = plasma_BER50, y = min_BER)) +
#   geom_point(alpha = 0.75, size = 1,
#              show.legend = T) +
#   geom_abline(slope = 1, linewidth = 0.5, linetype = 'solid', color = "#666666") +
#   geom_abline(aes(slope = 1, intercept = 1), linetype = 'longdash', color = 'grey', linewidth = 0.5) + # default linesize = 1
#   geom_abline(aes(slope = 1, intercept = -1), linetype = 'longdash', color = 'grey', linewidth = 0.5) +
#   geom_label_repel(aes(label = chnm),
#                    data = ber.comp[chnm %in% chem.labels],
#                    max.overlaps = Inf,
#                    force = 50,
#                    size = 2,
#                    label.padding = 0.1,
#                    label.size = 0.1,
#                    label.r = 0.08,
#                    segment.size = 0.2,
#                    min.segment.length = 0) +
#   my_theme +
#   scale_x_continuous(breaks = seq(ber.lims[1], ber.lims[2], 1),
#                      limits = ber.lims) +
#   scale_y_continuous(breaks = seq(ber.lims[1], ber.lims[2], 1),
#                      limits = ber.lims) +
#   labs(x = 'plasma BER using 3compss model', y = 'min BER across DIO/IYD targets and tissues')
# 
# # how many chems to the right of y = x?
# ber.comp[plasma_BER50 > min_BER, .N] #89 chems
# 
# # plot SEEM3 uncertainty
# ivive.moe.tb$uncertainty = log10(ivive.moe.tb$seem3.u95/ivive.moe.tb$seem3)
# ber.comp$uncertainty <- ivive.moe.tb$uncertainty[match(ber.comp$dtxsid, ivive.moe.tb$dtxsid)]
# 
# min.min.ber <- min(ber.comp$min_BER)
# max.min.ber <- max(ber.comp$min_BER)
# min.ber.lims <- c(floor(min.min.ber), ceiling(max.min.ber))
# max.uncertainty <- max(ber.comp$uncertainty)
# 
# seem3pp <- ggplot(data = ber.comp,
#        mapping = aes(x = min_BER,
#                      y = uncertainty)) +
#   geom_point(size = 1, alpha = 0.8) +
#   geom_label_repel(aes(label = chnm),
#                    data = subset(ber.comp, chnm %in% chem.labels),
#                    max.overlaps = Inf,
#                    force = 75,
#                    size = 2,
#                    label.padding = 0.1,
#                    label.size = 0.1,
#                    label.r = 0.08,
#                    segment.size = 0.2,
#                    min.segment.length = 0) +
#   my_theme +
#   scale_x_continuous(breaks = seq(min.ber.lims[1], min.ber.lims[2], 1),
#                      limits = min.ber.lims) +
#   scale_y_continuous(breaks = seq(0, ceiling(max.uncertainty), 1),
#                      limits = c(0, ceiling(max.uncertainty))) +
#   labs(x = 'min BER by Chemical', y = TeX(r'($log_{10}ExpoCast_{u95} - log_{10}ExpoCast_{med}$)'))
# 
# median(ber.comp$min_BER)
# #> [1] 2.9
# 
# fig10 <- plot_grid(preg.vs.nonpreg.pp, seem3pp,
#           labels = "AUTO", label_size = 10)
# 
# # ggsave(plot = fig10,
# #        units = "mm",
# #        dpi = 300,
# #        width = 190, height = 100,
# #        device = "png",
# #        filename = "./figures/BER_uncertainty.png")
# 

## ----create_distributable_data, eval = FALSE----------------------------------
# thyroid.ac50s <- ac50.dt
# truong25.seem3 <- SEEM
# 
# save(thyroid.ac50s, truong25.seem3,
#      file="./httk/Truong2025Vignette.RData")