## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  fig.width  = 6,
  fig.height = 6
)

## ----setup, message = FALSE---------------------------------------------------
library(gghinton)
library(ggplot2)

## ----correlation--------------------------------------------------------------
df_cor <- as_hinton_df(cor(mtcars))
vars <- colnames(mtcars)

ggplot(df_cor, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = seq_along(vars), labels = vars) +
  scale_y_continuous(breaks = seq_along(vars), labels = rev(vars)) +
  coord_fixed() +
  theme_hinton() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title    = "Correlation matrix: mtcars",
    subtitle = "White = positive, black = negative"
  )

## ----pca-loadings-------------------------------------------------------------
pca <- prcomp(scale(mtcars))
# First four principal components
loadings <- pca$rotation[, 1:4]
colnames(loadings) <- paste0("PC", 1:4)

df_pca <- matrix_to_hinton(loadings)

ggplot(df_pca, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:4, labels = colnames(loadings)) +
  scale_y_continuous(breaks = seq_along(rownames(loadings)),
                     labels = rev(rownames(loadings))) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "PCA loadings: mtcars",
    subtitle = "Each column is a principal component"
  )

## ----confusion----------------------------------------------------------------
# Realistic confusion matrix for a 5-class classifier
# (e.g., handwritten digit recognition on a held-out test set)
classes <- c("0", "1", "2", "3", "4")
conf <- matrix(c(
  96,  0,  1,  2,  1,
   0, 98,  1,  0,  1,
   2,  1, 88,  5,  4,
   1,  0,  4, 91,  4,
   1,  2,  4,  2, 91
), nrow = 5, byrow = TRUE,
dimnames = list(actual = classes, predicted = classes))

# Row-normalise so each row shows the conditional error distribution
conf_prop <- prop.table(conf, margin = 1)

df_conf <- as_hinton_df(conf_prop)

ggplot(df_conf, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:5, labels = classes) +
  scale_y_continuous(breaks = 1:5, labels = rev(classes)) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "Classifier confusion matrix (row-normalised)",
    subtitle = "Diagonal = correct; off-diagonal = errors",
    x = "Predicted", y = "Actual"
  )

## ----social-mobility, fig.width = 6.5, fig.height = 6.5-----------------------
trans <- prop.table(occupationalStatus, margin = 1)
df_mob <- as_hinton_df(trans)

ggplot(df_mob, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:8,
                     labels = colnames(occupationalStatus)) +
  scale_y_continuous(breaks = 1:8,
                     labels = rev(rownames(occupationalStatus))) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "Occupational mobility: UK (Hope 1982)",
    subtitle = "Row-normalised; large square = likely transition",
    x = "Son's status", y = "Father's status"
  )

## ----credit-ratings, fig.width = 7, fig.height = 7----------------------------
ratings <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "D")

# Approximate one-year transition probabilities (illustrative;
# based on S&P Global published default studies).
# Rows sum to 1.
sp_mat <- matrix(c(
  # AAA      AA       A     BBB      BB       B     CCC       D
  0.9181,  0.0748,  0.0050,  0.0006,  0.0008,  0.0000,  0.0000,  0.0007,
  0.0057,  0.9109,  0.0762,  0.0054,  0.0010,  0.0006,  0.0002,  0.0000,
  0.0009,  0.0226,  0.9115,  0.0560,  0.0064,  0.0020,  0.0004,  0.0002,
  0.0002,  0.0027,  0.0507,  0.8685,  0.0588,  0.0129,  0.0024,  0.0038,
  0.0003,  0.0010,  0.0067,  0.0778,  0.7749,  0.1106,  0.0101,  0.0186,
  0.0000,  0.0006,  0.0025,  0.0104,  0.0720,  0.7653,  0.0613,  0.0879,
  0.0000,  0.0000,  0.0023,  0.0090,  0.0194,  0.1326,  0.4493,  0.3874,
  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  1.0000
), nrow = 8, byrow = TRUE,
dimnames = list(from = ratings, to = ratings))

df_sp <- as_hinton_df(sp_mat)

ggplot(df_sp, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:8, labels = ratings) +
  scale_y_continuous(breaks = 1:8, labels = rev(ratings)) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "Credit rating one-year transition probabilities",
    subtitle = "Approximate values based on S&P Global published studies",
    x = "To rating", y = "From rating"
  )

## ----nucleotide-sub-----------------------------------------------------------
# Kimura 2-parameter rate matrix, kappa = 4
# Rows: source base; Columns: destination base
# Diagonal is negative (departure rate); off-diagonal positive (arrival rate)
kappa <- 4
# Under K80: transversion rate beta, transition rate alpha = kappa * beta
# With overall rate normalised: beta = 1/(2+2*kappa)
beta  <- 1 / (2 + 2 * kappa)
alpha <- kappa * beta

bases <- c("A", "C", "G", "T")
Q <- matrix(c(
  -(alpha + 2*beta),  beta,               alpha,              beta,
   beta,             -(alpha + 2*beta),   beta,               alpha,
   alpha,             beta,              -(alpha + 2*beta),   beta,
   beta,              alpha,              beta,              -(alpha + 2*beta)
), nrow = 4, byrow = TRUE,
dimnames = list(from = bases, to = bases))

df_Q <- matrix_to_hinton(Q)

ggplot(df_Q, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:4, labels = bases) +
  scale_y_continuous(breaks = 1:4, labels = rev(bases)) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = paste0("Kimura K80 substitution rate matrix (kappa = ", kappa, ")"),
    subtitle = "White = positive rate; black = negative diagonal (departure rate)",
    x = "To", y = "From"
  )

## ----regression-coefs---------------------------------------------------------
# Three simple regressions: mpg, hp, and wt each predicted by
# a common set of standardised predictors from mtcars
outcomes  <- c("mpg", "hp", "wt")
predictors <- c("cyl", "disp", "drat", "qsec", "gear", "carb")

# Fit and collect standardised coefficients (excluding intercept)
coef_mat <- sapply(outcomes, function(y) {
  fit <- lm(reformulate(predictors, response = y),
            data = as.data.frame(scale(mtcars)))
  coef(fit)[predictors]
})
# coef_mat is predictors x outcomes; transpose to outcomes x predictors
coef_mat <- t(coef_mat)

df_coef <- matrix_to_hinton(coef_mat)

ggplot(df_coef, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = seq_along(predictors), labels = predictors) +
  scale_y_continuous(breaks = seq_along(outcomes),
                     labels = rev(outcomes)) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "Standardised regression coefficients",
    subtitle = "Each row is a separate outcome; white = positive effect",
    x = "Predictor", y = "Outcome"
  )

## ----hair-eye-----------------------------------------------------------------
# Collapse over sex dimension
hair_eye <- margin.table(HairEyeColor, margin = c(1, 2))

# Row-normalise: probability of each eye colour given hair colour
hair_eye_prop <- prop.table(hair_eye, margin = 1)

df_he <- as_hinton_df(hair_eye_prop)

ggplot(df_he, aes(x = col, y = row, weight = weight)) +
  geom_hinton() +
  scale_fill_hinton() +
  scale_x_continuous(breaks = 1:4, labels = colnames(hair_eye)) +
  scale_y_continuous(breaks = 1:4, labels = rev(rownames(hair_eye))) +
  coord_fixed() +
  theme_hinton() +
  labs(
    title    = "Eye colour given hair colour (HairEyeColor)",
    subtitle = "Row-normalised; larger square = more probable combination",
    x = "Eye colour", y = "Hair colour"
  )