## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----data--------------------------------------------------------------------- library(hhdynamics) data("inputdata") str(inputdata) head(inputdata) ## ----si----------------------------------------------------------------------- data("SI") print(SI) barplot(SI, names.arg = 1:14, xlab = "Days", ylab = "Probability", main = "Default serial interval distribution (influenza)") ## ----fit, eval = FALSE-------------------------------------------------------- # # With covariates (uses default influenza SI) # fit <- household_dynamics(inputdata, inf_factor = ~sex, sus_factor = ~age, # n_iteration = 50000, burnin = 10000, thinning = 10) # # # Without covariates # fit <- household_dynamics(inputdata, # n_iteration = 50000, burnin = 10000, thinning = 10) # # # With a custom serial interval # my_SI <- c(0, 0.01, 0.05, 0.15, 0.25, 0.25, 0.15, 0.08, 0.04, 0.015, 0.005, 0, 0, 0) # fit <- household_dynamics(inputdata, SI = my_SI, # n_iteration = 50000, burnin = 10000, thinning = 10) ## ----print, eval = FALSE------------------------------------------------------ # print(fit) # # Household transmission model fit # # Data: 386 households, 1533 individuals # # MCMC: 4000 post-burnin samples (50000 iterations, burnin: 10000, thin: 10) # # Runtime: 85 seconds # # Parameters: community, household, sex1.0, age1.0, age2.0 # # # # Use summary() for estimates, coef() for posterior means. ## ----summary, eval = FALSE---------------------------------------------------- # summary(fit) # # Variable Point estimate Lower bound Upper bound ... # # Daily probability of infection from community 0.004 0.002 0.007 # # Probability of person-to-person transmission in households 0.057 0.034 0.084 # # sex1.0 -0.081 -0.733 0.488 # # age1.0 -0.065 -0.537 0.412 # # age2.0 -0.312 -0.831 0.170 ## ----coef, eval = FALSE------------------------------------------------------- # coef(fit) # # re_sd community household size_param sex1.0 age1.0 age2.0 # # 1.000000000 0.004239978 0.058555948 0.000000000 -0.080689680 -0.065001794 -0.312414284 ## ----access, eval = FALSE----------------------------------------------------- # # Posterior samples matrix (post-burnin, thinned) # dim(fit$samples) # e.g. 4000 x 7 # # # Log-likelihood trace (full chain, for convergence checks) # dim(fit$log_likelihood) # e.g. 50000 x 3 # # # Per-parameter acceptance rates # fit$acceptance # # # Trace plot for community parameter # plot(fit$samples[, "community"], type = "l", # ylab = "Community rate", xlab = "Iteration") # # # Posterior density # hist(fit$samples[, "household"], breaks = 50, probability = TRUE, # main = "Household transmission probability (raw scale)", # xlab = "Rate parameter") ## ----plot_diag, eval = FALSE-------------------------------------------------- # # Trace plots and posterior densities for all parameters # plot_diagnostics(fit) # # # Specific parameters only # plot_diagnostics(fit, params = c("community", "household")) ## ----plot_trans, eval = FALSE------------------------------------------------- # # Daily probability of transmission with 95% CrI # plot_transmission(fit) # # # Save to PDF # pdf("transmission.pdf", width = 7, height = 5) # plot_transmission(fit) # dev.off() ## ----plot_cov, eval = FALSE--------------------------------------------------- # # Forest plot of covariate effects (relative risks) # plot_covariates(fit) # # # With custom labels for variable headers and factor levels # plot_covariates(fit, file = "covariates.pdf", # labels = list( # sex = list(name = "Sex", levels = c("Male", "Female")), # age = list(name = "Age Group", levels = c("0-5", "6-17", "18+")))) ## ----plot_sar, eval = FALSE--------------------------------------------------- # # Overall attack rate # plot_attack_rate(fit) # # # Stratified by a single variable # plot_attack_rate(fit, by = ~age) # # # Combine overall + multiple strata in one figure # plot_attack_rate(fit, by = list(~sex, ~age), include_overall = TRUE, # labels = list( # sex = list(name = "Sex", levels = c("Male", "Female")), # age = list(name = "Age Group", levels = c("0-5", "6-17", "18+")))) ## ----plot_hh, eval = FALSE---------------------------------------------------- # # Visualize a single household: index (triangle), infected contacts # # (filled circles), uninfected contacts (open circles) # plot_household(fit, hh_id = 1) ## ----tab_param, eval = FALSE-------------------------------------------------- # # Posterior mean, median, 95% CrI, and acceptance rate # table_parameters(fit) # # # Include effective sample size # table_parameters(fit, show_ess = TRUE) ## ----tab_cov, eval = FALSE---------------------------------------------------- # # Relative risks with 95% CrIs, grouped by infectivity/susceptibility # table_covariates(fit) ## ----tab_sar, eval = FALSE---------------------------------------------------- # # Overall SAR with Wilson CI # table_attack_rates(fit) # # # Stratified by covariate # table_attack_rates(fit, by = ~age) ## ----validation, eval = FALSE------------------------------------------------- # # Missing column # household_dynamics(inputdata[, -1]) # # Error: 'input' is missing required columns: hhID # # # Wrong formula variable # household_dynamics(inputdata, inf_factor = ~nonexistent) # # Error: Variables in inf_factor not found in input: nonexistent # # # Old string syntax (no longer supported) # household_dynamics(inputdata, inf_factor = "~sex") # # Error: 'inf_factor' must be a formula (e.g. ~sex) or NULL. ## ----missing_onset, eval = FALSE---------------------------------------------- # # Some infected contacts have unknown symptom onset # inputdata_missing_onset <- inputdata # infected_contacts <- which(inputdata$member > 0 & inputdata$inf == 1) # inputdata_missing_onset$onset[infected_contacts[1:5]] <- NA # # fit_onset <- household_dynamics(inputdata_missing_onset, ~sex, ~age, # n_iteration = 50000, burnin = 10000, thinning = 10) # # Note: 5 of 92 infected contact(s) (5.4%) have missing onset times. These will be imputed during MCMC. ## ----missing, eval = FALSE---------------------------------------------------- # # Introduce some missing values # inputdata_missing <- inputdata # inputdata_missing$sex[c(5, 12, 30)] <- NA # inputdata_missing$age[c(8, 20)] <- NA # # # Fit as usual — missing values are imputed automatically # fit_missing <- household_dynamics(inputdata_missing, ~sex, ~age, # n_iteration = 50000, burnin = 10000, thinning = 10) # # Note: Covariate 'sex' has 3 missing value(s) (0.2%). These will be imputed during MCMC. # # Note: Covariate 'age' has 2 missing value(s) (0.1%). These will be imputed during MCMC. # summary(fit_missing) ## ----estimate_si, eval = FALSE------------------------------------------------ # fit_si <- household_dynamics(inputdata, inf_factor = ~sex, sus_factor = ~age, # n_iteration = 50000, burnin = 10000, thinning = 10, # estimate_SI = TRUE) # summary(fit_si) # # Output now includes si_shape and si_scale parameters # # # Reconstruct the estimated SI distribution # est_shape <- mean(fit_si$samples[, "si_shape"]) # est_scale <- mean(fit_si$samples[, "si_scale"]) # est_SI <- pweibull(2:15, est_shape, est_scale) - pweibull(1:14, est_shape, est_scale) # barplot(est_SI, names.arg = 1:14, xlab = "Days", ylab = "Probability", # main = "Estimated serial interval") ## ----simulate, eval = FALSE--------------------------------------------------- # # Use fitted parameter values # para <- coef(fit) # # # Simulate 10 replicates of the original data structure # sim <- simulate_data(inputdata, rep_num = 10, inf_factor = ~sex, sus_factor = ~age, # para = para, with_rm = 0)