## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----eval = FALSE------------------------------------------------------------- # # Install from GitHub # devtools::install_github("TeamMacLean/pepdiff") ## ----eval = FALSE------------------------------------------------------------- # # For heatmaps # BiocManager::install("ComplexHeatmap") # # # For rank products test # BiocManager::install("RankProd") ## ----setup, message = FALSE--------------------------------------------------- library(pepdiff) library(dplyr) ## ----generate-data------------------------------------------------------------ set.seed(123) # Experimental design # 10 reps and 3-fold changes give us good power to detect true effects n_peptides <- 50 n_reps <- 10 conditions <- c("ctrl", "trt") # Generate peptide names and gene IDs peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides)) genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1]) # 30% of peptides (15 out of 50) will be truly differentially abundant n_diff <- round(n_peptides * 0.3) diff_peptides <- sample(peptides, n_diff) cat("Truly differential peptides:", n_diff, "\n") # Generate abundance data # Proteomics abundances are typically right-skewed (can't go below zero, # multiplicative processes). We use a Gamma distribution to reflect this. sim_data <- expand.grid( peptide = peptides, treatment = conditions, bio_rep = 1:n_reps, stringsAsFactors = FALSE ) %>% mutate( gene_id = genes[match(peptide, peptides)], # Base abundance varies by peptide (some peptides more abundant than others) base_abundance = rep(rgamma(n_peptides, shape = 5, rate = 0.5), each = length(conditions) * n_reps), # Treatment effect for differential peptides (~3-fold change up or down) fc = ifelse(peptide %in% diff_peptides & treatment == "trt", sample(c(0.33, 3), n_diff * n_reps, replace = TRUE, prob = c(0.3, 0.7)), 1), # Final value with biological noise value = rgamma(n(), shape = 10, rate = 10 / (base_abundance * fc)) ) %>% select(peptide, gene_id, treatment, bio_rep, value) head(sim_data) ## ----write-temp-csv----------------------------------------------------------- temp_file <- tempfile(fileext = ".csv") write.csv(sim_data, temp_file, row.names = FALSE) ## ----import-data-------------------------------------------------------------- dat <- read_pepdiff( file = temp_file, id = "peptide", # peptide identifiers gene = "gene_id", # gene identifiers value = "value", # abundance values factors = "treatment", # experimental factors replicate = "bio_rep" # biological replicate IDs ) dat ## ----summary-data------------------------------------------------------------- summary(dat) ## ----plot-data, fig.height = 6------------------------------------------------ plot(dat) ## ----compare------------------------------------------------------------------ results <- compare( dat, compare = "treatment", # which factor to test ref = "ctrl" # reference level (what to compare against) ) results ## ----results-structure-------------------------------------------------------- # The main results table head(results$results) ## ----significant-------------------------------------------------------------- sig_peptides <- significant(results) sig_peptides ## ----plot-results, fig.height = 6--------------------------------------------- plot(results) ## ----check-results------------------------------------------------------------ # True positives: correctly identified as significant true_pos <- sig_peptides$peptide[sig_peptides$peptide %in% diff_peptides] # False positives: called significant but not truly differential false_pos <- sig_peptides$peptide[!sig_peptides$peptide %in% diff_peptides] # False negatives: truly differential but not called significant false_neg <- diff_peptides[!diff_peptides %in% sig_peptides$peptide] cat("True positives:", length(true_pos), "\n") cat("False positives:", length(false_pos), "\n") cat("False negatives:", length(false_neg), "\n") ## ----cleanup, include = FALSE------------------------------------------------- unlink(temp_file)