## ----setup-------------------------------------------------------------------- #| include: false library(forrest) has_broom <- requireNamespace("broom", quietly = TRUE) has_marginaleffects <- requireNamespace("marginaleffects", quietly = TRUE) has_matchit <- requireNamespace("MatchIt", quietly = TRUE) ## ----sim-data----------------------------------------------------------------- set.seed(42) n <- 800 cohort <- data.frame( age = round(runif(n, 30, 75)), female = rbinom(n, 1, 0.52), bmi = rnorm(n, 26, 4.5), smoker = rbinom(n, 1, 0.20), activity = rbinom(n, 1, 0.45), # 1 = physically active educ = sample(c("Low", "Medium", "High"), n, replace = TRUE, prob = c(0.25, 0.45, 0.30)) ) # Systolic blood pressure (mmHg): continuous outcome with true signal cohort$sbp <- 110 + 0.40 * cohort$age - 4.50 * cohort$female + 0.50 * cohort$bmi - 2.00 * cohort$smoker + 2.50 * cohort$activity + rnorm(n, sd = 8) # Hypertension (SBP ≥ 140): binary outcome cohort$htn <- as.integer(cohort$sbp >= 140) # Education as ordered factor (for dose-response example) cohort$educ <- factor(cohort$educ, levels = c("Low", "Medium", "High"), ordered = FALSE) cohort$educ <- relevel(cohort$educ, ref = "Low") ## ----dlm---------------------------------------------------------------------- #| fig-height: 9 set.seed(2025) # Simulate lag-specific estimates for three environmental exposures # Each exposure has a distinct lag-response shape make_lags <- function(pattern) { se <- runif(length(pattern), 0.03, 0.06) data.frame( estimate = pattern + rnorm(length(pattern), sd = 0.02), lower = pattern - 1.96 * se, upper = pattern + 1.96 * se ) } pm25 <- make_lags(c(0.04, 0.11, 0.10, 0.06, 0.03, 0.01, 0.00)) noise <- make_lags(c(0.09, 0.05, 0.02, 0.01, 0.00, -0.01, 0.00)) green <- make_lags(c(-0.02, -0.04, -0.07, -0.09, -0.10, -0.09, -0.08)) lag_labels <- paste("Lag", 0:6, "(weeks)") dlm_dat <- rbind( data.frame(exposure = "PM2.5", lag = lag_labels, pm25, is_sum = FALSE), data.frame(exposure = "PM2.5", lag = "Cumulative", estimate = 0.35, lower = 0.18, upper = 0.52, is_sum = TRUE), data.frame(exposure = "Road noise", lag = lag_labels, noise, is_sum = FALSE), data.frame(exposure = "Road noise", lag = "Cumulative", estimate = 0.16, lower = 0.04, upper = 0.28, is_sum = TRUE), data.frame(exposure = "Green space", lag = lag_labels, green, is_sum = FALSE), data.frame(exposure = "Green space", lag = "Cumulative", estimate = -0.49, lower = -0.68, upper = -0.30, is_sum = TRUE) ) dlm_dat$est_text <- ifelse( dlm_dat$is_sum, sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper), sprintf("%.2f (%.2f, %.2f)", dlm_dat$estimate, dlm_dat$lower, dlm_dat$upper) ) forrest( dlm_dat, estimate = "estimate", lower = "lower", upper = "upper", label = "lag", section = "exposure", is_summary = "is_sum", ref_line = 0, header = "Lag", cols = c("Coef (95% CI)" = "est_text"), widths = c(3.5, 4, 2.8), xlab = "Regression coefficient per IQR increase (95% CI)" ) ## ----life-course-------------------------------------------------------------- #| fig-height: 7 set.seed(99) make_row <- function(exposure, life_stage, est, se) { data.frame( life_stage = life_stage, exposure = exposure, estimate = est + rnorm(1, sd = 0.01), lower = est - 1.96 * se, upper = est + 1.96 * se ) } lc_dat <- rbind( make_row("Noise exposure", "Prenatal", 0.08, 0.03), make_row("Green space access", "Prenatal", -0.05, 0.04), make_row("Traffic-related air", "Prenatal", 0.06, 0.03), make_row("Noise exposure", "Early childhood", 0.12, 0.04), make_row("Green space access", "Early childhood",-0.09, 0.05), make_row("Traffic-related air", "Early childhood", 0.10, 0.04), make_row("Noise exposure", "Mid-childhood", 0.15, 0.05), make_row("Green space access", "Mid-childhood", -0.13, 0.05), make_row("Traffic-related air", "Mid-childhood", 0.14, 0.05), make_row("Noise exposure", "Adolescence", 0.09, 0.04), make_row("Green space access", "Adolescence", -0.07, 0.05), make_row("Traffic-related air", "Adolescence", 0.08, 0.04) ) forrest( lc_dat, estimate = "estimate", lower = "lower", upper = "upper", label = "exposure", section = "life_stage", group = "exposure", ref_line = 0, header = "Exposure", xlab = "Coefficient per IQR increase in SBP (95% CI)" ) ## ----life-course-dodge-------------------------------------------------------- #| fig-height: 5 #| fig-width: 10 # Same data — one row per exposure × life stage forrest( lc_dat, estimate = "estimate", lower = "lower", upper = "upper", label = "exposure", group = "life_stage", dodge = TRUE, ref_line = 0, header = "Exposure", xlab = "Coefficient per IQR increase in SBP (95% CI)" ) ## ----save--------------------------------------------------------------------- #| eval: false # save_forrest( # file = "regression_forest.pdf", # plot = function() { # forrest( # coefs, # estimate = "estimate", # lower = "conf.low", # upper = "conf.high", # label = "term", # stripe = TRUE, # xlab = "Regression coefficient (95% CI)" # ) # }, # width = 9, # height = 5 # )