## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup, message = FALSE--------------------------------------------------- library(pepdiff) library(dplyr) library(ggplot2) ## ----generate-good-data------------------------------------------------------- set.seed(123) # Good data: clean separation, no outliers, balanced # Using 10 reps and 3-fold changes for good power make_good_data <- function() { n_peptides <- 40 n_reps <- 10 peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) diff_peptides <- peptides[1:12] # 30% differential sim_data <- expand.grid( peptide = peptides, treatment = c("ctrl", "trt"), bio_rep = 1:n_reps, stringsAsFactors = FALSE ) %>% mutate( gene_id = genes[match(peptide, peptides)], base = rep(rgamma(n_peptides, shape = 8, rate = 0.8), each = 2 * n_reps), effect = ifelse(peptide %in% diff_peptides & treatment == "trt", sample(c(0.33, 3), length(diff_peptides) * n_reps, replace = TRUE), 1), value = rgamma(n(), shape = 20, rate = 20 / (base * effect)) ) %>% select(peptide, gene_id, treatment, bio_rep, value) temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) read_pepdiff(temp_file, id = "peptide", gene = "gene_id", value = "value", factors = "treatment", replicate = "bio_rep") } good_data <- make_good_data() ## ----generate-bad-data-------------------------------------------------------- # Problematic data: outlier sample, batch effect, systematic missingness make_bad_data <- function() { n_peptides <- 40 n_reps <- 10 peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) sim_data <- expand.grid( peptide = peptides, treatment = c("ctrl", "trt"), bio_rep = 1:n_reps, stringsAsFactors = FALSE ) %>% mutate( gene_id = genes[match(peptide, peptides)], base = rep(rgamma(n_peptides, shape = 5, rate = 0.5), each = 2 * n_reps), # Outlier: one control sample has 3x higher values batch_effect = ifelse(treatment == "ctrl" & bio_rep == 1, 3, 1), value = rgamma(n(), shape = 10, rate = 10 / (base * batch_effect)) ) %>% select(peptide, gene_id, treatment, bio_rep, value) # Add systematic missingness: low-abundance peptides missing in treatment low_abundance <- peptides[31:40] sim_data <- sim_data %>% mutate(value = ifelse(peptide %in% low_abundance & treatment == "trt" & runif(n()) < 0.7, NA, value)) temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) read_pepdiff(temp_file, id = "peptide", gene = "gene_id", value = "value", factors = "treatment", replicate = "bio_rep") } bad_data <- make_bad_data() ## ----pca-good, fig.height = 4------------------------------------------------- plot_pca_simple(good_data) ## ----pca-bad, fig.height = 4-------------------------------------------------- plot_pca_simple(bad_data) ## ----dist-good, fig.height = 4------------------------------------------------ plot_distributions_simple(good_data) ## ----dist-bad, fig.height = 4------------------------------------------------- plot_distributions_simple(bad_data) ## ----miss-good, fig.height = 4------------------------------------------------ plot_missingness_simple(good_data) ## ----miss-bad, fig.height = 4------------------------------------------------- plot_missingness_simple(bad_data) ## ----run-analyses------------------------------------------------------------- good_results <- compare(good_data, compare = "treatment", ref = "ctrl") bad_results <- compare(bad_data, compare = "treatment", ref = "ctrl") ## ----volcano-good, fig.height = 4--------------------------------------------- plot_volcano_new(good_results) ## ----volcano-bad, fig.height = 4---------------------------------------------- plot_volcano_new(bad_results) ## ----pval-good, fig.height = 4------------------------------------------------ plot_pvalue_histogram(good_results) ## ----pval-bad, fig.height = 4------------------------------------------------- plot_pvalue_histogram(bad_results) ## ----fc-good, fig.height = 4-------------------------------------------------- plot_fc_distribution_new(good_results) ## ----fc-bad, fig.height = 4--------------------------------------------------- plot_fc_distribution_new(bad_results) ## ----export-example, eval = FALSE--------------------------------------------- # p <- plot_volcano_new(good_results) + # labs(title = "Treatment vs Control") + # theme_minimal(base_size = 14) # # ggsave("volcano.pdf", p, width = 6, height = 5) ## ----customise-example, fig.height = 4---------------------------------------- plot_volcano_new(good_results) + theme_classic() + labs(title = "My Custom Volcano Plot")