## ----echo = FALSE, message = FALSE-------------------------------------------- library(dplyr); library(flextable); library(knitr); library(officer); library(tidyr) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") options(tibble.print_min = 4L, tibble.print_max = 4L) ## ----echo = FALSE, results = 'asis'------------------------------------------- twobytwo.df <- data.frame("exp" = c("Exposure+","Exposure-","Total"), "dpos" = c("a","c","a + c"), "dneg" = c("b","c","b + c"), "total" = c("a + b","c + d","a + b + c + d")) # Create a header key data frame: hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total"), h1 = c("", "Outcome+", "Outcome-", "Total"), stringsAsFactors = FALSE) # Create table: border_h = fp_border(color = "black", width = 2) flextable(twobytwo.df) %>% width(j = 1, width = 2.00) %>% width(j = 2, width = 2.00) %>% width(j = 3, width = 2.00) %>% width(j = 4, width = 4.00) %>% set_header_df(mapping = hkey.df, key = "col_keys") %>% bg(bg = "grey80", part = "header") %>% hline_top(border = border_h, part = "all" ) %>% align(align = "left", part = "all") %>% set_caption("A 2 x 2 contingency table.") ## ----echo = FALSE, results = 'asis'------------------------------------------- irr.df <- data.frame("exp" = c("Exposure+","Exposure-","Total"), "dpos" = c("a","c","a + c"), "dneg" = c("b","c","b + c"), "total" = c("a + b","c + d","a + b + c + d"), risk = c("RE+ = a \U00F7 (a + b)", "RE- = c \U00F7 (c + d)", "RT = (a + c) \U00F7 (a + b + c + d)")) # Create a header key data frame: hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total","risk"), h1 = c("", "Outcome+", "Outcome-", "Total", "Risk"), stringsAsFactors = FALSE) # Create table: border_h = fp_border(color = "black", width = 2) flextable(irr.df) %>% width(j = 1, width = 2.00) %>% width(j = 2, width = 2.00) %>% width(j = 3, width = 2.00) %>% width(j = 4, width = 2.00) %>% width(j = 5, width = 4.00) %>% set_header_df(mapping = hkey.df, key = "col_keys") %>% bg(bg = "grey80", part = "header") %>% hline_top(border = border_h, part = "all" ) %>% align(align = "left", part = "all") %>% set_caption("A 2 x 2 table with incidence risks calculated for the exposed, the unexposed and the total study population.") ## ----risk_ratio, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "The incidence risk ratio."---- knitr::include_graphics("risk_ratio.png") ## ----echo = FALSE, results = 'asis'------------------------------------------- orcohort.df <- data.frame("exp" = c("Exposure+","Exposure-","Total"), "dpos" = c("a","c","a + c"), "dneg" = c("b","d","b + d"), "total" = c("a + b","c + d","a + b + c + d"), odds = c("OE+ = a \U00F7 b","OE- = c \U00F7 d", "OT = (a + c) \U00F7 (b + d)")) # Create a header key data frame: hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total","odds"), h1 = c("", "Outcome+", "Outcome-", "Total", "Odds"), stringsAsFactors = FALSE) # Create table: border_h = fp_border(color = "black", width = 2) flextable(orcohort.df) %>% width(j = 1, width = 2.00) %>% width(j = 2, width = 2.00) %>% width(j = 3, width = 2.00) %>% width(j = 4, width = 2.00) %>% width(j = 5, width = 4.00) %>% set_header_df(mapping = hkey.df, key = "col_keys") %>% bg(bg = "grey80", part = "header") %>% hline_top(border = border_h, part = "all" ) %>% align(align = "left", part = "all") %>% set_caption("A 2 x 2 table with the outcome odds calculated for the exposed, unexposed and the total study population.") ## ----echo = FALSE, results = 'asis'------------------------------------------- orcc.df <- data.frame("exp" = c("Outcome+ (case)","Outcome- (control)","Total"), "dpos" = c("a","c","a + c"), "dneg" = c("b","d","b + d"), "total" = c("a + b","c + d","a + b + c + d"), odds = c("OD+ = a \U00F7 b","OD- = c \U00F7 d", "OT = (a + c) \U00F7 (b + d)")) # Create a header key data frame: hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total","odds"), h1 = c("", "Exposure+", "Exposure-", "Total", "Odds"), stringsAsFactors = FALSE) # Create table: border_h = fp_border(color = "black", width = 2) flextable(orcc.df) %>% width(j = 1, width = 2.00) %>% width(j = 2, width = 2.00) %>% width(j = 3, width = 2.00) %>% width(j = 4, width = 2.00) %>% width(j = 5, width = 4.00) %>% set_header_df(mapping = hkey.df, key = "col_keys") %>% bg(bg = "grey80", part = "header") %>% hline_top(border = border_h, part = "all" ) %>% align(align = "left", part = "all") %>% set_caption("A 2 x 2 table with the exposure odds calculated for cases, controls and the total study population.") ## ----attributable_risk, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "The attributable risk in the exposed."---- knitr::include_graphics("attributable_risk.png") ## ----attributable_fraction, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "The attributable fraction in the exposed."---- knitr::include_graphics("attributable_fraction.png") ## ----population_attributable_risk, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "The attributable risk in the population."---- knitr::include_graphics("population_attributable_risk.png") ## ----population_attributable_fraction, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "The attributable fraction in the population."---- knitr::include_graphics("population_attributable_fraction.png") ## ----------------------------------------------------------------------------- dat.v01 <- c(13,2163,5,3349); dat.v01 # View the data in the usual 2 by 2 table format: matrix(dat.v01, nrow = 2, byrow = TRUE) ## ----message = FALSE---------------------------------------------------------- library(epiR) epi.2by2(dat = dat.v01, method = "cross.sectional", elab = "Dry food", olab = "FLUTD", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") ## ----message = FALSE---------------------------------------------------------- epi.2by2(dat = dat.v01, method = "cross.sectional", elab = "Dry food", olab = "FLUTD", digits = 2, conf.level = 0.95, units = 100, interpret = TRUE, outcome = "as.columns") ## ----message = FALSE---------------------------------------------------------- library(MASS) # Load and view the data: dat.df02 <- birthwt; head(dat.df02) ## ----message = FALSE---------------------------------------------------------- dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 ## ----message = FALSE---------------------------------------------------------- dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02 dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02 ## ----message = FALSE---------------------------------------------------------- # Variables low, smoke and race as factors. Put an 'f' in front of the variable names to remind you that they're factors: dat.df02$flow <- factor(dat.df02$low, levels = c(1,0)) dat.df02$fsmoke <- factor(dat.df02$smoke, levels = c(1,0)) dat.df02$frace <- factor(dat.df02$race, levels = c(1,2,3)) dat.tab02 <- table(dat.df02$fsmoke, dat.df02$flow, dnn = c("Smoke", "Low BW")); dat.tab02 ## ----message = FALSE---------------------------------------------------------- dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi02 ## ----message = FALSE---------------------------------------------------------- names(dat.epi02$massoc.detail) ## ----message = FALSE---------------------------------------------------------- dat.epi02$massoc.detail$OR.strata.wald # Wald confidence intervals: 2.0 (95% CI 1.1 to 3.8) dat.epi02$massoc.detail$OR.strata.score # Score confidence intervals: 2.0 (95% CI 1.1 to 3.8) ## ----message = FALSE---------------------------------------------------------- library(dplyr); library(tidyr) dat.df03 <- birthwt; head(dat.df03) # Here we set the factor levels and tabulate the data in a single call using pipe operators: dat.tab03 <- dat.df03 %>% mutate(flow = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(fsmoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% group_by(fsmoke, flow) %>% summarise(n = n()) # View the data: dat.tab03 # View the data in conventional 2 by 2 table format: pivot_wider(dat.tab03, id_cols = c(fsmoke), names_from = flow, values_from = n) ## ----message = FALSE---------------------------------------------------------- dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi03 ## ----message = FALSE---------------------------------------------------------- dat.df04 <- birthwt; head(dat.df04) dat.df04$flow <- factor(dat.df04$low, levels = c(1,0)) dat.df04$fage <- ifelse(dat.df04$age > 23, 0,1) dat.df04$fage <- factor(dat.df04$fage, levels = c(1,0)) dat.df04$fsmoke <- factor(dat.df04$smoke, levels = c(1,0)) # Race is coded 1 = white, 2 = black and 3 = other. Set white as the reference level: dat.df04$frace <- ifelse(dat.df04$race == 1, 0, 1) dat.df04$frace <- factor(dat.df04$frace, levels = c(1,0)) # Empty vectors to collect results: rfactor <- ref <- or.p <- or.l <- or.u <- c() # The candidate risk factors are in columns 12 to 14 of data frame dat.df04: for(i in 12:14){ tdat.tab04 <- table(dat.df04[,i], dat.df04$flow) tdat.epi04 <- epi.2by2(dat = tdat.tab04, method = "cohort.count", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") trfactor <- as.character(names(dat.df04)[i]) rfactor <- c(rfactor, trfactor) tref <- as.character(paste("Reference: ", trfactor, " - ", levels(dat.df04[,i])[2], sep = "")) ref <- c(ref, tref) tor.p <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[1]) or.p <- c(or.p, tor.p) tor.l <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[2]) or.l <- c(or.l, tor.l) tor.u <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[3]) or.u <- c(or.u, tor.u) } gdat.df04 <- data.frame(ybrk = 1:3, ylab = rfactor, ref, or.p, or.l, or.u) gdat.df04 ## ----odds_ratios, echo = TRUE, message = FALSE, fig.align = "center", out.width = "80%", fig.cap = "Risk factors for low birth weight babies. Error bar plot showing the point estimate of the odds ratio and its 95% confidence interval for maternal age, smoking and race."---- library(ggplot2); library(scales) xbrk <- seq(from = -2, to = 2, by = 1) xlab <- 2^xbrk ggplot(data = gdat.df04, aes(x = log2(or.p), y = ybrk)) + theme_bw() + geom_point() + geom_errorbarh(aes(xmin = log2(or.l), xmax = log2(or.u), height = 0.2)) + scale_x_continuous(breaks = xbrk, labels = xlab, limits = range(xbrk), name = "Odds ratio") + scale_y_continuous(breaks = gdat.df04$ybrk, labels = gdat.df04$ylab, name = "Risk factor") + geom_vline(xintercept = log2(1), linetype = "dashed") + annotate("text", x = log2(0.25), y = gdat.df04$ybrk, label = gdat.df04$ref, hjust = 0, size = 3) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0)) ## ----message = FALSE---------------------------------------------------------- dat.df05 <- birthwt; head(dat.df05) dat.df05$flow <- factor(dat.df05$low, levels = c(1,0)) dat.df05$fsmoke <- factor(dat.df05$smoke, levels = c(1,0)) dat.df05$frace <- factor(dat.df05$race, levels = c(1,2,3)) dat.tab05 <- table(dat.df05$fsmoke, dat.df05$flow, dat.df05$frace, dnn = c("Smoke", "Low BW", "Race")); dat.tab05 ## ----message = FALSE---------------------------------------------------------- dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", elab = "smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi05 ## ----message = FALSE---------------------------------------------------------- dat.df06 <- birthwt dat.tab06 <- dat.df06 %>% mutate(flow = factor(low, levels = c(1,0), labels = c("yes","no"))) %>% mutate(fsmoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>% mutate(frace = factor(race)) %>% group_by(frace, fsmoke, flow) %>% summarise(n = n()) dat.tab06 # View the data in conventional 2 by 2 table format: pivot_wider(dat.tab06, id_cols = c(frace, fsmoke), names_from = flow, values_from = n) ## ----message = FALSE---------------------------------------------------------- dat.epi06 <- epi.2by2(dat = dat.tab06, method = "cohort.count", elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns") dat.epi06 ## ----mantel_haenszel, echo = TRUE, message = FALSE, fig.align = "center", out.width = "80%", fig.cap = "Risk factors for low birth weight babies. Error bar plot showing the odds of having a low birth weight baby for smokers of maternal race categories 1, 2 and 3 and the Mantel-Haenszel odds of having a low birth weight baby for smokers, adjusted for maternal race."---- xbrk <- seq(from = -5, to = 5, by = 1) xlab <- 2^xbrk nstrata <- dat.epi06$n.strata ybrk <- c(1:nstrata, max(nstrata) + 1) ylab <- c("M-H", paste("Strata ", 1:nstrata, sep = "")) or.p <- c(dat.epi06$massoc.detail$OR.mh$est, dat.epi06$massoc.detail$OR.strata.cfield$est) or.l <- c(dat.epi06$massoc.detail$OR.mh$lower, dat.epi06$massoc.detail$OR.strata.cfield$lower) or.u <- c(dat.epi06$massoc.detail$OR.mh$upper, dat.epi06$massoc.detail$OR.strata.cfield$upper) gdat.df06 <- data.frame(ybrk, ylab, or.p, or.l, or.u) ggplot(data = gdat.df06, aes(x = log2(or.p), y = ybrk)) + theme_bw() + geom_point() + geom_errorbarh(aes(xmin = log2(or.u), xmax = log2(or.l), height = 0.2)) + scale_x_continuous(breaks = xbrk, labels = xlab, limits = range(xbrk), name = "Odds ratio") + scale_y_continuous(breaks = gdat.df06$ybrk, labels = gdat.df06$ylab, name = "Risk factor") + geom_vline(xintercept = log2(1), linetype = "dashed") + annotate("text", x = log2(0.03125), y = gdat.df06$yat, label = gdat.df06$ref, hjust = 0, size = 3) + coord_fixed(ratio = 0.75 / 1) + theme(axis.title.y = element_text(vjust = 0))