## ----------------------------------------------------------------------------- #| label: preliminaries #| include: false library("betareg") data("GasolineYield", package = "betareg") data("FoodExpenditure", package = "betareg") gy_loglog <- betareg(yield ~ batch + temp, data = GasolineYield, link = "loglog") fe_beta2 <- betareg(I(food/income) ~ income + persons | persons, data = FoodExpenditure) knitr::opts_chunk$set( engine = "R", collapse = TRUE, comment = "##", message = FALSE, warning = FALSE, echo = TRUE ) options(width = 70, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE, digits = 5) ## ----------------------------------------------------------------------------- #| echo: false #| fig-width: 8.5 #| fig-height: 4.5 #| out-width: 100% #| label: fig-beta-distributions #| fig-cap: "Probability density functions for beta distributions with varying parameters $\\mu = 0.10, 0.25, 0.50, 0.75, 0.90$ and $\\phi = 5$ (left) and $\\phi = 100$ (right)." par(mfrow = c(1, 2), mar = c(4.1, 4.1, 4.1, 0.1)) dbeta2 <- function(x, mu, phi = 1) dbeta(x, mu * phi, (1 - mu) * phi) x <- seq(from = 0.01, to = 0.99, length = 200) xx <- cbind(x, x, x, x, x) yy <- cbind( dbeta2(x, 0.10, 5), dbeta2(x, 0.25, 5), dbeta2(x, 0.50, 5), dbeta2(x, 0.75, 5), dbeta2(x, 0.90, 5) ) matplot(xx, yy, type = "l", xlab = "y", ylab = "Density", main = expression(phi == 5), lty = 1, col = "black", ylim = c(0, 15)) text(0.05, 12 , "0.10") text(0.95, 12 , "0.90") text(0.22, 2.8, "0.25") text(0.78, 2.8, "0.75") text(0.50, 2.3, "0.50") yy <- cbind( dbeta2(x, 0.10, 100), dbeta2(x, 0.25, 100), dbeta2(x, 0.50, 100), dbeta2(x, 0.75, 100), dbeta2(x, 0.90, 100) ) matplot(xx, yy, type = "l", xlab = "y", ylab = "", main = expression(phi == 100), lty = 1, col = "black", ylim = c(0, 15)) text(0.10, 14.5, "0.10") text(0.90, 14.5, "0.90") text(0.25, 9.8, "0.25") text(0.75, 9.8, "0.75") text(0.50, 8.6, "0.50") ## ----eval=FALSE--------------------------------------------------------------- # betareg(formula, data, subset, na.action, weights, offset, # link = "logit", link.phi = NULL, control = betareg.control(...), # model = TRUE, y = TRUE, x = FALSE, ...) ## ----------------------------------------------------------------------------- #| label: GasolineYield-betareg data("GasolineYield", package = "betareg") gy_logit <- betareg(yield ~ batch + temp, data = GasolineYield) summary(gy_logit) ## ----------------------------------------------------------------------------- #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-GasolineYield #| fig-cap: "Gasoline yield data from @betareg:Prater:1956: Proportion of crude oil converted to gasoline explained by temperature (in degrees Fahrenheit) at which all gasoline has vaporized and given batch (indicated by gray level). Fitted curves correspond to beta regressions `gy_loglog` with log-log link (solid, red) and `gy_logit` with logit link (dashed, blue). Both curves were evaluated at varying temperature with the intercept for batch 6 (i.e., roughly the average intercept)." redblue <- hcl(c(0, 260), 90, 40) plot(yield ~ temp, data = GasolineYield, type = "n", ylab = "Proportion of crude oil converted to gasoline", xlab = "Temperature at which all gasoline has vaporized", main = "Prater's gasoline yield data") points(yield ~ temp, data = GasolineYield, cex = 1.75, pch = 19, col = rev(gray.colors(10))[as.numeric(batch)]) points(yield ~ temp, data = GasolineYield, cex = 1.75) legend("topleft", as.character(1:10), title = "Batch", col = rev(gray.colors(10)), pch = 19, bty = "n") legend("topleft", as.character(1:10), title = "Batch", pch = 1, bty = "n") lines(150:500, predict(gy_logit, newdata = data.frame(temp = 150:500, batch = "6")), col = redblue[2], lwd = 2, lty = 2) lines(150:500, predict(gy_loglog, newdata = data.frame(temp = 150:500, batch = "6")), col = redblue[1], lwd = 2) legend("bottomright", c("log-log", "logit"), col = redblue, lty = 1:2, lwd = 2, bty = "n") ## ----------------------------------------------------------------------------- #| fig-width: 8.5 #| fig-height: 10 #| out-width: 100% #| label: fig-GasolineYield-plot #| fig-cap: "Diagnostic plots for beta regression model `gy_logit`." par(mfrow = c(3, 2)) suppressWarnings(RNGversion("3.5.0")) set.seed(123) plot(gy_logit, which = 1:4, type = "pearson") plot(gy_logit, which = 5, type = "deviance", sub.caption = "") plot(gy_logit, which = 1, type = "deviance", sub.caption = "") ## ----------------------------------------------------------------------------- #| label: GasolineYield-update gy_logit4 <- update(gy_logit, subset = -4) coef(gy_logit, model = "precision") coef(gy_logit4, model = "precision") ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-lm data("FoodExpenditure", package = "betareg") fe_lm <- lm(I(food/income) ~ income + persons, data = FoodExpenditure) ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-bptest library("lmtest") bptest(fe_lm) ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-betareg fe_beta <- betareg(I(food/income) ~ income + persons, data = FoodExpenditure) summary(fe_beta) ## ----------------------------------------------------------------------------- #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-FoodExpenditure #| fig-cap: "Household food expenditure data from @betareg:Griffiths+Hill+Judge:1993: Proportion of household income spent on food explained by household income and number of persons in household (indicated by gray level). Fitted curves correspond to beta regressions `fe_beta` with fixed dispersion (long-dashed, blue), `fe_beta2` with variable dispersion (solid, red), and the linear regression `fe_lin` (dashed, black). All curves were evaluated at varying income with the intercept for mean number of persons ($ = `r round(mean(FoodExpenditure$persons), digits = 2)`$)." redblueblack <- hcl(c(0, 260, 0), c(90, 90, 0), c(40, 40, 0)) plot(I(food/income) ~ income, data = FoodExpenditure, xlab = "Household income", ylab = "Proportion of food expenditures", main = "Food expenditures data", type = "n", ylim = c(0.04, 0.57)) points(I(food/income) ~ income, data = FoodExpenditure, cex = 1.75, pch = 19, col = rev(gray.colors(7))[persons]) points(I(food/income) ~ income, data = FoodExpenditure, cex = 1.75) legend("bottomleft", rev(as.character(sort(unique(FoodExpenditure$persons)))), title = "Persons", col = gray.colors(7), pch = 19, bty = "n") legend("bottomleft", rev(as.character(sort(unique(FoodExpenditure$persons)))), title = "Persons", pch = 1, bty = "n") lines(10:100, predict(fe_lm, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[3], lwd = 2, lty = 2) lines(10:100, predict(fe_beta, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[2], lwd = 2, lty = 5) lines(10:100, predict(fe_beta2, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[1], lwd = 2) legend("topright", c("logit, var. disp.", "logit, fix. disp.", "lm"), col = redblueblack, lty = c(1, 5, 2), lwd = 2, bty = "n") ## ----------------------------------------------------------------------------- #| label: GasolineYield-phireg gy_logit2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield) ## ----------------------------------------------------------------------------- #| label: GasolineYield-phireg-coef #| echo: false printCoefmat(summary(gy_logit2)$coefficients$precision) ## ----------------------------------------------------------------------------- #| label: GasolineYield-lrtest lrtest(gy_logit, gy_logit2) ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-betareg2 fe_beta2 <- betareg(I(food/income) ~ income + persons | persons, data = FoodExpenditure) ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-comparison lrtest(fe_beta, fe_beta2) AIC(fe_beta, fe_beta2, k = log(nrow(FoodExpenditure))) ## ----------------------------------------------------------------------------- #| label: GasolineYield-loglog gy_loglog <- betareg(yield ~ batch + temp, data = GasolineYield, link = "loglog") ## ----------------------------------------------------------------------------- #| label: GasolineYield-Rsquared summary(gy_logit)$pseudo.r.squared summary(gy_loglog)$pseudo.r.squared ## ----------------------------------------------------------------------------- #| label: GasolineYield-AIC AIC(gy_logit, gy_logit2, gy_loglog) ## ----------------------------------------------------------------------------- #| label: GasolineYield-reset lrtest(gy_logit, . ~ . + I(predict(gy_logit, type = "link")^2)) lrtest(gy_loglog, . ~ . + I(predict(gy_loglog, type = "link")^2)) ## ----------------------------------------------------------------------------- #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-GasolineYield-diagnostics #| fig-cap: "Scatterplot comparing the absolute raw residuals from beta regression modes with log-log link (x-axis) and logit link (y-axis)." plot(abs(residuals(gy_loglog, type = "response")), abs(residuals(gy_logit, type = "response"))) abline(0, 1, lty = 2) ## ----------------------------------------------------------------------------- #| label: GasolineYield-loglog2 gy_loglog2 <- update(gy_loglog, link.phi = "log") summary(gy_loglog2)$iterations ## ----------------------------------------------------------------------------- #| label: FoodExpenditure-links sapply(c("logit", "probit", "cloglog", "cauchit", "loglog"), function(x) logLik(update(fe_beta2, link = x))) ## ----------------------------------------------------------------------------- #| label: ReadingSkills-eda #| echo: false #| results: hide data("ReadingSkills", package = "betareg") rs_accuracy <- format(round(with(ReadingSkills, tapply(accuracy, dyslexia, mean)), digits = 3)) ## ----------------------------------------------------------------------------- #| label: ReadingSkills-ols data("ReadingSkills", package = "betareg") rs_ols <- lm(qlogis(accuracy) ~ dyslexia * iq, data = ReadingSkills) coeftest(rs_ols) ## ----------------------------------------------------------------------------- #| label: ReadingSkills-beta rs_beta <- betareg(accuracy ~ dyslexia * iq | dyslexia + iq, data = ReadingSkills, hessian = TRUE) coeftest(rs_beta) ## ----------------------------------------------------------------------------- #| echo: false #| fig-width: 6 #| fig.height: 5.5 #| out-width: 100% #| label: fig-ReadingSkills #| fig-cap: "Reading skills data from @betareg:Smithson+Verkuilen:2006 : Linearly transformed reading accuracy by IQ score and dyslexia status (control, blue vs. dyslexic, red). Fitted curves correspond to beta regression `rs_beta` (solid) and OLS regression with logit-transformed dependent variable `rs_ols` (dashed)." cl1 <- hcl(c(260, 0), 90, 40) cl2 <- hcl(c(260, 0), 10, 95) plot(accuracy ~ iq, data = ReadingSkills, col = cl2[as.numeric(dyslexia)], main = "Reading skills data", xlab = "IQ score", ylab = "Reading accuracy", pch = c(19, 17)[as.numeric(dyslexia)], cex = 1.5) points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = (1:2)[as.numeric(dyslexia)], col = cl1[as.numeric(dyslexia)]) nd <- data.frame(dyslexia = "no", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[1], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[1], lty = 2, lwd = 2) nd <- data.frame(dyslexia = "yes", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[2], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[2], lty = 2, lwd = 2) legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(19, 17, NA, NA), lwd = 2, col = c(cl2, 1, 1), bty = "n") legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(1, 2, NA, NA), col = c(cl1, NA, NA), bty = "n") ## ----------------------------------------------------------------------------- #| label: strucchange-data suppressWarnings(RNGversion("3.5.0")) set.seed(123) y1 <- c(rbeta(150, 0.3 * 4, 0.7 * 4), rbeta(50, 0.5 * 4, 0.5 * 4)) y2 <- c(rbeta(100, 0.3 * 4, 0.7 * 4), rbeta(100, 0.3 * 8, 0.7 * 8)) ## ----------------------------------------------------------------------------- #| label: strucchange-gefp library("strucchange") y1_gefp <- gefp(y1 ~ 1, fit = betareg) y2_gefp <- gefp(y2 ~ 1, fit = betareg) ## ----------------------------------------------------------------------------- #| fig-width: 6.5 #| fig-height: 6 #| label: fig-strucchange1 #| fig-cap: "Structural change tests for artificial data `y1` with change in $\\mu$." plot(y1_gefp, aggregate = FALSE) ## ----------------------------------------------------------------------------- #| fig-width: 6.5 #| fig-height: 6 #| label: fig-strucchange2 #| fig-cap: "Structural change tests for artificial data `y2` with change in $\\phi$." plot(y2_gefp, aggregate = FALSE)