## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6 ) ## ----setup, message = FALSE--------------------------------------------------- library(pepdiff) library(dplyr) ## ----generate-good-data------------------------------------------------------- set.seed(123) n_peptides <- 40 n_reps <- 5 peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) # Simulate well-behaved Gamma data design <- expand.grid( peptide = peptides, treatment = c("ctrl", "trt"), bio_rep = 1:n_reps, stringsAsFactors = FALSE ) sim_data <- design %>% mutate( gene_id = genes[match(peptide, peptides)], pep_num = as.numeric(gsub("PEP_", "", peptide)), base = 10 + pep_num * 0.5, # First 20 peptides have 2-fold treatment effect effect = ifelse(pep_num <= 20 & treatment == "trt", 2, 1), # Gamma noise (well-behaved) value = rgamma(n(), shape = 15, rate = 15 / (base * effect)) ) %>% select(peptide, gene_id, treatment, bio_rep, value) # Import temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) dat <- read_pepdiff( temp_file, id = "peptide", gene = "gene_id", value = "value", factors = "treatment", replicate = "bio_rep" ) unlink(temp_file) ## ----run-glm-good, message = FALSE-------------------------------------------- results <- compare(dat, compare = "treatment", ref = "ctrl", method = "glm") # Check fit diagnostics diag <- plot_fit_diagnostics(results) ## ----show-good-plot, fig.height = 8------------------------------------------- diag$plot ## ----view-flagged------------------------------------------------------------- diag$flagged ## ----summary-stats------------------------------------------------------------ # Summary statistics diag$summary ## ----generate-bad-data-------------------------------------------------------- set.seed(456) # Same design but with outliers and heavy tails sim_bad <- design %>% mutate( gene_id = genes[match(peptide, peptides)], pep_num = as.numeric(gsub("PEP_", "", peptide)), base = 10 + pep_num * 0.5, effect = ifelse(pep_num <= 20 & treatment == "trt", 2, 1), # Add outliers (10% of observations) outlier = ifelse(runif(n()) < 0.1, runif(n(), 3, 8), 1), # Heavier-tailed noise value = rgamma(n(), shape = 3, rate = 3 / (base * effect)) * outlier ) %>% select(peptide, gene_id, treatment, bio_rep, value) temp_file <- tempfile(fileext = ".csv") write.csv(sim_bad, temp_file, row.names = FALSE) dat_bad <- read_pepdiff( temp_file, id = "peptide", gene = "gene_id", value = "value", factors = "treatment", replicate = "bio_rep" ) unlink(temp_file) ## ----run-glm-bad, message = FALSE--------------------------------------------- results_bad <- compare(dat_bad, compare = "treatment", ref = "ctrl", method = "glm") diag_bad <- plot_fit_diagnostics(results_bad) ## ----show-bad-plot, fig.height = 8-------------------------------------------- diag_bad$plot ## ----try-art, message = FALSE------------------------------------------------- results_art <- compare(dat_bad, compare = "treatment", ref = "ctrl", method = "art") # Compare significant calls comparison <- tibble( peptide = results_bad$results$peptide, glm_sig = results_bad$results$significant, art_sig = results_art$results$significant ) cat("Both significant:", sum(comparison$glm_sig & comparison$art_sig), "\n") cat("GLM only:", sum(comparison$glm_sig & !comparison$art_sig), "\n") cat("ART only:", sum(!comparison$glm_sig & comparison$art_sig), "\n")