## ----include = FALSE, warning=FALSE------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE, warning = FALSE---------------------------------- library(mums2) library(tidyverse) library(networkD3) ## ----setup_dt, message = FALSE, warning = FALSE, include = FALSE-------------- Sys.setenv("OMP_THREAD_LIMIT" = 1) data.table::setDTthreads(threads = 1) ## ----import_data-------------------------------------------------------------- data <- import_all_data( peak_table = mums2_example("boryillus_peaktable.csv"), metadata = mums2_example("boryillus_metadata.csv"), format = "Progenesis") ## ----peak_table, R.options=list(max.print=10)--------------------------------- read_csv(mums2_example("boryillus_peaktable.csv"), skip = 2) ## ----metadata, R.options=list(max.print=10)----------------------------------- read_csv(mums2_example("boryillus_metadata.csv")) ## ----filter_data, R.options=list(max.print=10)------------------------------- filtered_data <- data |> filter_peak_table(filter_mispicked_ions_params()) |> filter_peak_table(filter_cv_params(cv_threshold = 0.2)) |> filter_peak_table(filter_group_params(group_threshold = 0.1, "Blanks")) |> filter_peak_table(filter_insource_ions_params()) filtered_data ## ----show_rt_of_filtered_data, R.options=list(max.print=5)-------------------- get_peak_table(filtered_data)$rt ## ----convert_rt_to_minutes_or_seconds, R.options=list(max.print=5)------------ filtered_data <- change_rt_to_seconds_or_minute( mpactr_object = filtered_data, rt_type = "seconds" ) filtered_data ## ----R.options=list(max.print=5)---------------------------------------------- filtered_data <- change_rt_to_seconds_or_minute( mpactr_object = filtered_data, rt_type = "minutes" ) filtered_data ## ----preprocess_ms2_data------------------------------------------------------ matched_data <- ms2_ms1_compare( ms2_files = mums2_example("botryllus_v2.gnps.mgf"), mpactr_object = filtered_data, mz_tolerance = 5, rt_tolerance = 6) head(get_ms2_matches(matched_data)) head(get_ms1_data(matched_data)) get_ms2_peaks_data(matched_data)[1] get_samples(matched_data) ## ----chemical_formula_prediction, R.options=list(max.print=10)---------------- data_small <- import_all_data( peak_table = mums2_example("botryllus_pt_small.csv"), metadata = mums2_example("boryillus_metadata.csv"), format = "None") |> filter_peak_table(filter_mispicked_ions_params()) |> filter_peak_table(filter_cv_params(cv_threshold = 0.05)) |> filter_peak_table(filter_group_params(group_threshold = 0.1, "Blanks")) |> filter_peak_table(filter_insource_ions_params()) matched_data_small <- ms2_ms1_compare( ms2_files = mums2_example("botryllus_v2.gnps.mgf"), mpactr_object = data_small, mz_tolerance = .5, rt_tolerance = 6) matched_data_small <- compute_molecular_formulas( mass_data = matched_data_small, parent_ppm = 3, number_of_threads = 2) get_molecular_formula_preds(matched_data_small) ## ----annotations-------------------------------------------------------------- reference_db <- read_msp(msp_file = mums2_example("massbank_example_data.msp")) annotations <- annotate_ms2( mass_data = matched_data, reference = reference_db, scoring_params = modified_cosine_params(0.5), ppm = 1000, min_score = 0.1, chemical_min_score = 0, number_of_threads = 2) annotations[1:5,] ## ----scoring_distance, R.options=list(max.print=10)--------------------------- dist <- dist_ms2( data = matched_data, cutoff = 0.3, precursor_threshold = -1, score_params = modified_cosine_params(0.5), min_peaks = 0, number_of_threads = 2) dist ## ----clustering, R.options=list(max.print=10)--------------------------------- cluster_results <- cluster_data( distance_df = dist, ms2_match_data = matched_data, cutoff = 0.3, cluster_method = "opticlust") cluster_results ## ----community_object, R.options=list(max.print=10)--------------------------- community_w_omus <- create_community_matrix_object(cluster_results) get_community_matrix(community_w_omus) ## ----community_object_no_omus, R.options=list(max.print=10)------------------- community_wo_omus <- create_community_matrix_object(data = matched_data) get_community_matrix(community_wo_omus) ## ----omu_annotations---------------------------------------------------------- annotations <- annotate_ms2( mass_data = matched_data, reference = reference_db, scoring_params = modified_cosine_params(0.5), ppm = 1000, min_score = 0.7, chemical_min_score = 0, cluster_data = cluster_results, number_of_threads = 2) annotations[1:5,] ## ----------------------------------------------------------------------------- get_community_matrix(community_w_omus) |> rowSums() |> sort() ## ----alpha_summary, R.options=list(max.print=15)------------------------------ alpha_summary( community_object = community_w_omus, size = 40000, threshold = 200, diversity_index = c("simpson", "shannon", "richness"), subsample = TRUE, number_of_threads = 2) |> head() ## ----dist_shared, R.options=list(max.print=10)-------------------------------- bray_no_omus <- dist_shared(community_object = community_wo_omus, size = 40000, threshold = 200, diversity_index = "bray", number_of_threads = 2) bray_no_omus bray_w_omus <- dist_shared(community_object = community_w_omus, size = 40000, threshold = 200, diversity_index = "bray", number_of_threads = 2) bray_w_omus ## ----pcoa, fig.width=10, fig.height=10, fig.fullwidth=TRUE-------------------- pcoa <- cmdscale(bray_w_omus, k = 2, eig = T, add = T) variance <- round(pcoa$eig*100/sum(pcoa$eig), 1) colnames(pcoa$points) <- c("pcoa_1", "pcoa_2") as_tibble(pcoa$points, rownames = "sample") |> inner_join(get_metadata(filtered_data), by = c("sample" = "injection")) |> ggplot(aes(x=pcoa_1, y = pcoa_2, color = biological_group)) + geom_point(size = 5.5) + labs( x = paste0("PCoA 1 (", variance[1], "%)"), y = paste0("PCoA 2 (", variance[2], "%)"), color = "Sample type") + theme( legend.text = element_text(size = 10), legend.title = element_text(size = 15) ) ## ----hierarchical_clustering, fig.width=10, fig.height=10, fig.fullwidth=TRUE---- old_par <- par(no.readonly = TRUE) hclust_result <- hclust(bray_w_omus, "average") par(cex=0.6, mar=c(5, 8, 4, 1)) plot(hclust_result) par(old_par) ## ----network_plot------------------------------------------------------------- distance_df_annotations <- annotations[, c("query_ms2_id", "name", "score")] simpleNetwork( distance_df_annotations, height="100px", width="100px", zoom = TRUE) ## ----group_average------------------------------------------------------------ matched_avg <- convert_to_group_averages( matched_data = matched_data, mpactr_object = filtered_data) dist_avg <- dist_ms2( data = matched_avg, cutoff = 0.3, precursor_thresh = 2, score_params = modified_cosine_params(0.5), min_peaks = 0, number_of_threads = 2) cluster_results_avg <- cluster_data( distance_df = dist, ms2_match_data = matched_avg, cutoff = 0.3, cluster_method = "opticlust") annotations_avg <- annotate_ms2( mass_data = matched_avg, reference = reference_db, scoring_params = modified_cosine_params(0.5), ppm = 1000, min_score = 0.6, chemical_min_score = 0, cluster_data = cluster_results_avg, number_of_threads = 2) community_object_avg <- create_community_matrix_object( data = cluster_results_avg) head(get_community_matrix(community_object = community_object_avg), 3) ## ----samples------------------------------------------------------------------ # Normal head(get_samples(matched_data)) # Group Average head(get_samples(matched_avg)) # The items in the injection have been converted into the corresponding sample code. get_metadata(filtered_data)[, c("injection", "sample_code")] ## ----combined_df-------------------------------------------------------------- # For normal data generate_a_combined_table( matched_data = matched_data, annotations = annotations, cluster_data = cluster_results) |> head(n = 3) # For group averaged data generate_a_combined_table( matched_data = matched_avg, annotations = annotations_avg, cluster_data = cluster_results_avg) |> head(n = 3) ## ----heat_map, fig.width=10, fig.height=10, fig.fullwidth=TRUE---------------- dist_shared(community_object_avg, 40000, 200, "bray", number_of_threads = 2) |> as.matrix() |> as_tibble(rownames = "A") |> pivot_longer(-A, names_to = "B", values_to = "distance") |> ggplot(aes(x = A, y = B, fill = distance)) + geom_tile() + scale_fill_gradient(low = "#FFFFFF", high="#FF0000") + theme( axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), panel.background = element_blank() ) ## ----box_plot, fig.width=10, fig.height=10, fig.fullwidth=TRUE---------------- metadata <- get_metadata(filtered_data) alpha <- alpha_summary(community_object_avg, 40000, 200, "simpson", number_of_threads = 2) inner_join(alpha, metadata, by = c("samples" = "sample_code")) |> ggplot(aes(x = biological_group, y = simpson)) + geom_boxplot(outliers = FALSE, fill = NA, color = "gray") + geom_jitter(width = 0.3) + scale_y_continuous(limits = c(0, NA)) + theme_classic()