## ----setup--------------------------------------------------------------------
#| include: false
library(forrest)
has_broom            <- requireNamespace("broom",            quietly = TRUE)
has_marginaleffects  <- requireNamespace("marginaleffects",  quietly = TRUE)
has_matchit          <- requireNamespace("MatchIt",          quietly = TRUE)


## ----sim-data-----------------------------------------------------------------
set.seed(42)
n <- 800

cohort <- data.frame(
  age      = round(runif(n, 30, 75)),
  female   = rbinom(n, 1, 0.52),
  bmi      = rnorm(n, 26, 4.5),
  smoker   = rbinom(n, 1, 0.20),
  activity = rbinom(n, 1, 0.45),   # 1 = physically active
  educ     = sample(c("Low", "Medium", "High"), n,
                    replace = TRUE, prob = c(0.25, 0.45, 0.30))
)

# Systolic blood pressure (mmHg): continuous outcome with true signal
cohort$sbp <- 110 +
  0.40 * cohort$age    -
  4.50 * cohort$female +
  0.50 * cohort$bmi    -
  2.00 * cohort$smoker +
  2.50 * cohort$activity +
  rnorm(n, sd = 8)

# Hypertension (SBP ≥ 140): binary outcome
cohort$htn <- as.integer(cohort$sbp >= 140)

# Education as ordered factor (for dose-response example)
cohort$educ <- factor(cohort$educ,
                      levels = c("Low", "Medium", "High"), ordered = FALSE)
cohort$educ <- relevel(cohort$educ, ref = "Low")


## ----dlm----------------------------------------------------------------------
#| fig-height: 9
set.seed(2025)

# Simulate lag-specific estimates for three environmental exposures
# Each exposure has a distinct lag-response shape
make_lags <- function(pattern) {
  se  <- runif(length(pattern), 0.03, 0.06)
  data.frame(
    estimate = pattern + rnorm(length(pattern), sd = 0.02),
    lower    = pattern - 1.96 * se,
    upper    = pattern + 1.96 * se
  )
}

pm25  <- make_lags(c(0.04, 0.11, 0.10, 0.06, 0.03,  0.01,  0.00))
noise <- make_lags(c(0.09, 0.05,  0.02, 0.01, 0.00, -0.01,  0.00))
green <- make_lags(c(-0.02, -0.04, -0.07, -0.09, -0.10, -0.09, -0.08))

lag_labels <- paste("Lag", 0:6, "(weeks)")

dlm_dat <- rbind(
  data.frame(exposure = "PM2.5",        lag = lag_labels, pm25,  is_sum = FALSE),
  data.frame(exposure = "PM2.5",        lag = "Cumulative", estimate = 0.35,
             lower = 0.18, upper = 0.52, is_sum = TRUE),
  data.frame(exposure = "Road noise",   lag = lag_labels, noise, is_sum = FALSE),
  data.frame(exposure = "Road noise",   lag = "Cumulative", estimate = 0.16,
             lower = 0.04, upper = 0.28, is_sum = TRUE),
  data.frame(exposure = "Green space",  lag = lag_labels, green, is_sum = FALSE),
  data.frame(exposure = "Green space",  lag = "Cumulative", estimate = -0.49,
             lower = -0.68, upper = -0.30, is_sum = TRUE)
)

dlm_dat$est_text <- ifelse(
  dlm_dat$is_sum,
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper),
  sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper)
)

forrest(
  dlm_dat,
  estimate   = "estimate",
  lower      = "lower",
  upper      = "upper",
  label      = "lag",
  section    = "exposure",
  is_summary = "is_sum",
  ref_line   = 0,
  header     = "Lag",
  cols       = c("Coef (95% CI)" = "est_text"),
  widths     = c(3.5, 4, 2.8),
  xlab       = "Regression coefficient per IQR increase (95% CI)"
)


## ----life-course--------------------------------------------------------------
#| fig-height: 7
set.seed(99)

make_row <- function(exposure, life_stage, est, se) {
  data.frame(
    life_stage = life_stage,
    exposure   = exposure,
    estimate   = est  + rnorm(1, sd = 0.01),
    lower      = est  - 1.96 * se,
    upper      = est  + 1.96 * se
  )
}

lc_dat <- rbind(
  make_row("Noise exposure",         "Prenatal",        0.08, 0.03),
  make_row("Green space access",     "Prenatal",       -0.05, 0.04),
  make_row("Traffic-related air",    "Prenatal",        0.06, 0.03),
  make_row("Noise exposure",         "Early childhood", 0.12, 0.04),
  make_row("Green space access",     "Early childhood",-0.09, 0.05),
  make_row("Traffic-related air",    "Early childhood", 0.10, 0.04),
  make_row("Noise exposure",         "Mid-childhood",   0.15, 0.05),
  make_row("Green space access",     "Mid-childhood",  -0.13, 0.05),
  make_row("Traffic-related air",    "Mid-childhood",   0.14, 0.05),
  make_row("Noise exposure",         "Adolescence",     0.09, 0.04),
  make_row("Green space access",     "Adolescence",    -0.07, 0.05),
  make_row("Traffic-related air",    "Adolescence",     0.08, 0.04)
)

forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  section  = "life_stage",
  group    = "exposure",
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)


## ----life-course-dodge--------------------------------------------------------
#| fig-height: 5
#| fig-width: 10
# Same data — one row per exposure × life stage
forrest(
  lc_dat,
  estimate = "estimate",
  lower    = "lower",
  upper    = "upper",
  label    = "exposure",
  group    = "life_stage",
  dodge    = TRUE,
  ref_line = 0,
  header   = "Exposure",
  xlab     = "Coefficient per IQR increase in SBP (95% CI)"
)


## ----save---------------------------------------------------------------------
#| eval: false
# save_forrest(
#   file   = "regression_forest.pdf",
#   plot   = function() {
#     forrest(
#       coefs,
#       estimate = "estimate",
#       lower    = "conf.low",
#       upper    = "conf.high",
#       label    = "term",
#       stripe   = TRUE,
#       xlab     = "Regression coefficient (95% CI)"
#     )
#   },
#   width  = 9,
#   height = 5
# )