## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 6 ) ## ----setup, message = FALSE--------------------------------------------------- library(gghinton) library(ggplot2) ## ----correlation-------------------------------------------------------------- df_cor <- as_hinton_df(cor(mtcars)) vars <- colnames(mtcars) ggplot(df_cor, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = seq_along(vars), labels = vars) + scale_y_continuous(breaks = seq_along(vars), labels = rev(vars)) + coord_fixed() + theme_hinton() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs( title = "Correlation matrix: mtcars", subtitle = "White = positive, black = negative" ) ## ----pca-loadings------------------------------------------------------------- pca <- prcomp(scale(mtcars)) # First four principal components loadings <- pca$rotation[, 1:4] colnames(loadings) <- paste0("PC", 1:4) df_pca <- matrix_to_hinton(loadings) ggplot(df_pca, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:4, labels = colnames(loadings)) + scale_y_continuous(breaks = seq_along(rownames(loadings)), labels = rev(rownames(loadings))) + coord_fixed() + theme_hinton() + labs( title = "PCA loadings: mtcars", subtitle = "Each column is a principal component" ) ## ----confusion---------------------------------------------------------------- # Realistic confusion matrix for a 5-class classifier # (e.g., handwritten digit recognition on a held-out test set) classes <- c("0", "1", "2", "3", "4") conf <- matrix(c( 96, 0, 1, 2, 1, 0, 98, 1, 0, 1, 2, 1, 88, 5, 4, 1, 0, 4, 91, 4, 1, 2, 4, 2, 91 ), nrow = 5, byrow = TRUE, dimnames = list(actual = classes, predicted = classes)) # Row-normalise so each row shows the conditional error distribution conf_prop <- prop.table(conf, margin = 1) df_conf <- as_hinton_df(conf_prop) ggplot(df_conf, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:5, labels = classes) + scale_y_continuous(breaks = 1:5, labels = rev(classes)) + coord_fixed() + theme_hinton() + labs( title = "Classifier confusion matrix (row-normalised)", subtitle = "Diagonal = correct; off-diagonal = errors", x = "Predicted", y = "Actual" ) ## ----social-mobility, fig.width = 6.5, fig.height = 6.5----------------------- trans <- prop.table(occupationalStatus, margin = 1) df_mob <- as_hinton_df(trans) ggplot(df_mob, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:8, labels = colnames(occupationalStatus)) + scale_y_continuous(breaks = 1:8, labels = rev(rownames(occupationalStatus))) + coord_fixed() + theme_hinton() + labs( title = "Occupational mobility: UK (Hope 1982)", subtitle = "Row-normalised; large square = likely transition", x = "Son's status", y = "Father's status" ) ## ----credit-ratings, fig.width = 7, fig.height = 7---------------------------- ratings <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D") # Approximate one-year transition probabilities (illustrative; # based on S&P Global published default studies). # Rows sum to 1. sp_mat <- matrix(c( # AAA AA A BBB BB B CCC D 0.9181, 0.0748, 0.0050, 0.0006, 0.0008, 0.0000, 0.0000, 0.0007, 0.0057, 0.9109, 0.0762, 0.0054, 0.0010, 0.0006, 0.0002, 0.0000, 0.0009, 0.0226, 0.9115, 0.0560, 0.0064, 0.0020, 0.0004, 0.0002, 0.0002, 0.0027, 0.0507, 0.8685, 0.0588, 0.0129, 0.0024, 0.0038, 0.0003, 0.0010, 0.0067, 0.0778, 0.7749, 0.1106, 0.0101, 0.0186, 0.0000, 0.0006, 0.0025, 0.0104, 0.0720, 0.7653, 0.0613, 0.0879, 0.0000, 0.0000, 0.0023, 0.0090, 0.0194, 0.1326, 0.4493, 0.3874, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000 ), nrow = 8, byrow = TRUE, dimnames = list(from = ratings, to = ratings)) df_sp <- as_hinton_df(sp_mat) ggplot(df_sp, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:8, labels = ratings) + scale_y_continuous(breaks = 1:8, labels = rev(ratings)) + coord_fixed() + theme_hinton() + labs( title = "Credit rating one-year transition probabilities", subtitle = "Approximate values based on S&P Global published studies", x = "To rating", y = "From rating" ) ## ----nucleotide-sub----------------------------------------------------------- # Kimura 2-parameter rate matrix, kappa = 4 # Rows: source base; Columns: destination base # Diagonal is negative (departure rate); off-diagonal positive (arrival rate) kappa <- 4 # Under K80: transversion rate beta, transition rate alpha = kappa * beta # With overall rate normalised: beta = 1/(2+2*kappa) beta <- 1 / (2 + 2 * kappa) alpha <- kappa * beta bases <- c("A", "C", "G", "T") Q <- matrix(c( -(alpha + 2*beta), beta, alpha, beta, beta, -(alpha + 2*beta), beta, alpha, alpha, beta, -(alpha + 2*beta), beta, beta, alpha, beta, -(alpha + 2*beta) ), nrow = 4, byrow = TRUE, dimnames = list(from = bases, to = bases)) df_Q <- matrix_to_hinton(Q) ggplot(df_Q, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:4, labels = bases) + scale_y_continuous(breaks = 1:4, labels = rev(bases)) + coord_fixed() + theme_hinton() + labs( title = paste0("Kimura K80 substitution rate matrix (kappa = ", kappa, ")"), subtitle = "White = positive rate; black = negative diagonal (departure rate)", x = "To", y = "From" ) ## ----regression-coefs--------------------------------------------------------- # Three simple regressions: mpg, hp, and wt each predicted by # a common set of standardised predictors from mtcars outcomes <- c("mpg", "hp", "wt") predictors <- c("cyl", "disp", "drat", "qsec", "gear", "carb") # Fit and collect standardised coefficients (excluding intercept) coef_mat <- sapply(outcomes, function(y) { fit <- lm(reformulate(predictors, response = y), data = as.data.frame(scale(mtcars))) coef(fit)[predictors] }) # coef_mat is predictors x outcomes; transpose to outcomes x predictors coef_mat <- t(coef_mat) df_coef <- matrix_to_hinton(coef_mat) ggplot(df_coef, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = seq_along(predictors), labels = predictors) + scale_y_continuous(breaks = seq_along(outcomes), labels = rev(outcomes)) + coord_fixed() + theme_hinton() + labs( title = "Standardised regression coefficients", subtitle = "Each row is a separate outcome; white = positive effect", x = "Predictor", y = "Outcome" ) ## ----hair-eye----------------------------------------------------------------- # Collapse over sex dimension hair_eye <- margin.table(HairEyeColor, margin = c(1, 2)) # Row-normalise: probability of each eye colour given hair colour hair_eye_prop <- prop.table(hair_eye, margin = 1) df_he <- as_hinton_df(hair_eye_prop) ggplot(df_he, aes(x = col, y = row, weight = weight)) + geom_hinton() + scale_fill_hinton() + scale_x_continuous(breaks = 1:4, labels = colnames(hair_eye)) + scale_y_continuous(breaks = 1:4, labels = rev(rownames(hair_eye))) + coord_fixed() + theme_hinton() + labs( title = "Eye colour given hair colour (HairEyeColor)", subtitle = "Row-normalised; larger square = more probable combination", x = "Eye colour", y = "Hair colour" )