## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----echo=T, eval=F-----------------------------------------------------------
#  # Install FAMetA
#  install.packages("FAMetA", dependencies = c("Depends", "Imports"))
#  
#  # load library
#  library(FAMetA)
#  

## ----echo=T, eval=F-----------------------------------------------------------
#  library(FAMetA)
#  library(tools)
#  
#  # Example files can be found at
#  # <https://drive.google.com/drive/folders/1-mhqBd6W8VJkIYwVuIOr2Gn4Z9vBUN9-?usp=sharing>.
#  # This dataset contains 12 samples of linfocites grown in culture media
#  # supplemented with 13Cglc, 13Cglc+iFASN, 13Cglc+iSCD and 13Cglc+iFADS2
#  # (3 replicates of each condition), 2 blank samples and 3 injections of a
#  # standards mix.
#  
#  #==============================================================================#
#  # Data pre-processing using LipidMS package
#  #==============================================================================#
#  
#  #=================#
#  # Read metadata
#  
#  metadata <- read.csv("samples.csv", header = T, sep=",")
#  
#  # check file names (they must include .mzXML)
#  if (!all(file_ext(metadata$sample) == "mzXML")){
#    metadata$sample[file_ext(metadata$sample) != "mzXML"] <-
#      paste(metadata$sample[file_ext(metadata$sample) != "mzXML"], ".mzXML", sep="")
#  }
#  
#  
#  #=================#
#  # Set processing parameters
#  
#  # Peak-picking
#  polarity <- "negative"
#  dmzagglom <- 15               # dmz and drt to generate bins/partitions for peak-picking
#  drtagglom <- 200              # max rt window for bins
#  drtclust <- 100               # drt window for clustering (redefines previous bins)
#  minpeak <- 8                  # min number of points to define a peak (MS1, MS2)
#  minint <- 100000              # at least minpeak points must have minint intensity
#  drtgap <- 5                   # max rt gap to fill missing points in a peak
#  drtminpeak <- 8               # min width of a peak when there are more than 1 peak in a EIC
#  drtmaxpeak <- 30              # max rt window for a EIC
#  recurs <- 10                  # max number of peaks in a EIC
#  sb <- 5                       # signal-to-baseline ratio (MS1, MS2)
#  sn <- 5                       # signal-to-noise ratio
#  weight <- 2                   # weight to assign new peaks
#  dmzIso <- 5                   # dmz for isotopes search
#  drtIso <- 5                   # drt for isotopes search
#  
#  parallel <- TRUE              # parallel processing
#  ncores <- 4                   # number of cores
#  
#  
#  #=================#
#  # Peak-picking
#  msbatch <- batchdataProcessing(metadata = metadata,
#                                 polarity = polarity,
#                                 dmzagglom = dmzagglom,
#                                 drtagglom = drtagglom,
#                                 drtclust = drtclust,
#                                 minpeak = minpeak,
#                                 drtgap = drtgap,
#                                 drtminpeak = drtminpeak,
#                                 drtmaxpeak = drtmaxpeak,
#                                 recurs = recurs,
#                                 sb = sb,
#                                 sn = sn,
#                                 minint = minint,
#                                 weight = weight,
#                                 dmzIso = dmzIso,
#                                 drtIso = drtIso,
#                                 parallel = parallel,
#                                 ncores = ncores)

## ----echo=T, eval=F-----------------------------------------------------------
#  #=================#
#  # Batch processing
#  dmzalign <- 10                # max dmz and rt to group peaks for alignment
#  drtalign <- 60                # max rt window for clustering in alignment
#  span <- 0.2                   # span for alignment
#  minsamplesfracalign <- 0.50   # min fraction of samples represented in a peak group to be used for alignment
#  dmzgroup <- 10                # max dmz and rt to group peaks for grouping
#  drtagglomgroup <- 50          # max rt window for clustering in grouping
#  drtgroup <- 10                # max rt difference within a peak group
#  minsamplesfracgroup <- 0.20   # min fraction of samples represented in a peak group to be kept
#  
#  
#  #=================#
#  # Alignment
#  msbatch <- alignmsbatch(msbatch, dmz = dmzalign, drt = drtalign, span = span,
#                          minsamplesfrac = minsamplesfracalign,
#                          parallel = parallel, ncores = ncores)
#  
#  #=================#
#  # Grouping
#  msbatch <- groupmsbatch(msbatch, dmz = dmzgroup, drtagglom = drtagglomgroup,
#                          drt = drtgroup, minsamplesfrac = minsamplesfracgroup,
#                          parallel = parallel, ncores = ncores)
#  
#  
#  #=================#
#  # Save msbatch
#  save(msbatch, "msbatch.rda.gz", compress = TRUE)
#  
#  
#  # If any other external software is used for processing, data can be loaded from
#  # a csv file using the following function:
#  # fadata <- readfadatafile("externafadata.csv", sep=",", dec=".")
#  
#  # In this case, go directly to data correction step.

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # FA annotation
#  #==============================================================================#
#  
#  #=================#
#  # Annotate FA
#  msbatch <- annotateFA(msbatch, dmz = 5)
#  
#  #=================#
#  # plot peaks from identified FAs to check them
#  plots <- plotFA(msbatch, dmz = 10)
#  
#  pdf("FAs.pdf")
#  for (p in 1:length(plots)){
#    print(plots[[p]])
#  }
#  dev.off()
#  
#  #=================#
#  # export annotations for curation
#  write.csv(msbatch$fas, file="faid.csv", row.names=FALSE)
#  

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # FA curation
#  #==============================================================================#
#  
#  #=================#
#  # read csv file with modified annotations
#  faid <- read.csv("faid_curated.csv", sep=",", dec=".")
#  
#  #=================#
#  # change FA annotations
#  msbatch <- curateFAannotations(msbatch, faid)
#  
#  #=================#
#  # plot FA peaks again to check identities
#  plots <- plotFA(msbatch, dmz = 10)
#  
#  pdf("FAs_curated.pdf")
#  for (p in 1:length(plots)){
#    print(plots[[p]])
#  }
#  dev.off()

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # Search FA isotopes and get the fadata object
#  #==============================================================================#
#  fadata <- searchFAisotopes(msbatch, dmz = 10, coelCutoff = 0.6)
#  
#  
#  # if you want to save fadata in a csv to subset it for example:
#  df <- cbind(rbind(fadata$fattyacids, data.frame(Compound="IS", Label="")),
#              rbind(fadata$intensities, fadata$IS))
#  df <- rbind(c("", "sampletype", fadata$metadata$sampletype),
#              c("Compound", "Label", colnames(fadata$intensities)), df)
#  write.table(df, file="fadata.csv", sep=",", col.names = FALSE, row.names = FALSE)
#  
#  # and then, you could read it again using:
#  fadata <- readfadatafile("fadata.csv", sep=",", dec=".")

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # Import FA data
#  #==============================================================================#
#  inputfile <- "externalfadata.csv"
#  fadata <- readfadatafile(inputfile, sep=",", dec=".")

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # Data correction
#  #==============================================================================#
#  fadata <- dataCorrection(fadata, blankgroup = "Blank")
#  
#  # Alternatively, to add external normalization:
#  # fadata <- dataCorrection(fadata, blankgroup = "blank",
#  # externalnormalization = "protein")

## ----echo=T, eval=F-----------------------------------------------------------
#  #==============================================================================#
#  # Metabolic analysis
#  #==============================================================================#
#  
#  #=================#
#  # Synthesis analysis
#  fadata <- synthesisAnalysis(fadata=fadata, R2Thr = 0.95, maxiter = 1e3,
#                              maxconvergence = 100, startpoints = 5)
#  
#  # If inhibitors have been used, make sure D2 has not been underestimated. If so,
#  # D2 could be set as the one calculated for 13-Glc Control samples to improve
#  # the results:
#  
#  # D2 <- fadata$synthesis$results$D2[fadata$synthesis$results$FA == "FA(16:0)"]
#  # fadata$synthesis$results$Group[fadata$synthesis$results$FA == "FA(16:0)"]
#  
#  # D2[4:12] <- rep(mean(D2[1:3]))
#  
#  # relaunch synthesis analysis fixing D2
#  # fadata <- synthesisAnalysis(fadata=fadata, R2Thr = 0.95, maxiter = 1e3,
#  #                             maxconvergence = 100, startpoints = 5, D2 = D2)
#  
#  
#  # Explore results
#  View(fadata$synthesis$results)
#  View(fadata$synthesis$predictedValues)
#  pdf("plotsSynthesis.pdf")
#  for (f in 1:length(fadata$synthesis$plots)){
#    for (s in 1:length(fadata$synthesis$plots[[f]])){
#      print(fadata$synthesis$plots[[f]][[s]])
#    }
#  }
#  dev.off()
#  
#  # to use multinomial distribution without over dispersion, set P parameter to 0
#  fadata <- synthesisAnalysis(fadata=fadata, R2Thr = 0.95, maxiter = 1e3,
#                               maxconvergence = 100, startpoints = 5, P = 0)
#  

## ----echo=T, eval=F-----------------------------------------------------------
#  #=================#
#  # Elongation analysis
#  fadata <- elongationAnalysis(fadata, R2Thr = 0.95, maxiter = 1e4,
#                               maxconvergence=100, startpoints = 5, DThr = 0.1)
#  
#  
#  # Explore results
#  View(fadata$elongation$results)
#  View(fadata$elongation$predictedValues)
#  pdf("plotsElongation.pdf")
#  for (f in 1:length(fadata$elongation$plots)){
#    for (s in 1:length(fadata$elongation$plots[[f]])){
#      print(fadata$elongation$plots[[f]][[s]])
#    }
#  }
#  dev.off()

## ----echo=T, eval=F-----------------------------------------------------------
#  #=================#
#  # Desaturation analysis
#  fadata <- desaturationAnalysis(fadata)
#  
#  # Explore results
#  View(fadata$desaturations$results)

## ----echo=T, eval=F-----------------------------------------------------------
#  #=================#
#  # Summarize results
#  fadata <- summarizeResults(fadata, controlgroup = "Control13Cglc")
#  
#  #=================#
#  # Save fadata
#  save(fadata, file="fadata.rda")
#  
#  
#  
#  #=================#
#  # Export results
#  write.csv(fadata$results$results, file = "results.csv", row.names=FALSE)
#  write.csv(fadata$results$summary, file = "summary.csv")
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$mid),
#                    fadata$mid),
#              file = "mid.csv", sep=",", col.names = FALSE)
#  write.table(rbind(fadata$metadata$sampletype,
#                      colnames(fadata$synthesis$predictedValues),
#                      fadata$synthesis$predictedValues),
#                file = "predictedmid.csv", sep=",", col.names = FALSE)
#  
#  
#  pdf("relativepoolsizeRaw.pdf")
#  print(fadata$results$heatmaps$relativepoolsize$raw$plot)
#  dev.off()
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$heatmaps$relativepoolsize$raw$values),
#                    fadata$results$heatmaps$relativepoolsize$raw$values),
#              file = "relativepoolsizeRaw.csv", sep=",", col.names = FALSE)
#  
#  
#  pdf("relativepoolsizeZscore.pdf")
#  print(fadata$results$heatmaps$relativepoolsize$zscore$plot)
#  dev.off()
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$heatmaps$relativepoolsize$raw$values),
#                    fadata$results$heatmaps$relativepoolsize$zscore$values),
#              file = "relativepoolsizeZscore.csv", sep=",", col.names = FALSE)
#  
#  if ("log2FC" %in% names(fadata$results$heatmaps$relativepoolsize)){
#    pdf("relativepoolsizeLog2FC.pdf")
#    print(fadata$results$heatmaps$relativepoolsize$log2FC$plot)
#    dev.off()
#  
#    write.table(rbind(fadata$metadata$sampletype,
#                      colnames(fadata$results$heatmaps$relativepoolsize$log2FC$values),
#                      fadata$results$heatmaps$relativepoolsize$log2FC$values),
#                file = "relativepoolsizeLog2FC.csv", sep=",", col.names = FALSE)
#  }
#  
#  pdf("resultsRaw_endogenouslySynthesized.pdf")
#  print(fadata$results$heatmaps$synthesized$raw$plot)
#  dev.off()
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$heatmaps$synthesized$raw$values),
#                    fadata$results$heatmaps$synthesized$raw$values),
#              file = "resultsRaw_endogenouslySynthesized.csv", sep=",",
#              col.names = FALSE)
#  
#  
#  if ("log2FC" %in% names(fadata$results$heatmaps$synthesized)){
#    pdf("resultsLog2FC_endogenouslySynthesized.pdf")
#    print(fadata$results$heatmaps$synthesized$log2FC$plot)
#    dev.off()
#  
#    write.table(rbind(fadata$metadata$sampletype,
#                      colnames(fadata$results$heatmaps$synthesized$log2FC$values),
#                      fadata$results$heatmaps$synthesized$log2FC$values),
#                file = "resultsLog2FC_endogenouslySynthesized.csv", sep=",",
#                col.names = FALSE)
#  }
#  
#  
#  #=================#
#  # Isotopologue distributions: observed vs predicted
#  
#  pdf("isotopologueDistributions.pdf")
#  for (f in 1:length(fadata$synthesis$plots)){
#    for (s in 1:length(fadata$synthesis$plots[[f]])){
#      print(fadata$synthesis$plots[[f]][[s]])
#    }
#  }
#  for (f in 1:length(fadata$elongation$plots)){
#      for (s in 1:length(fadata$elongation$plots[[f]])){
#        print(fadata$elongation$plots[[f]][[s]])
#      }
#    }
#  dev.off()
#  
#  
#  
#  #=================#
#  # Reorganized tables for synthesis and elongation parameters (S16, E1, E2, E3,
#  # E4 and E5)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$S16),
#                    fadata$results$allparameters$S16),
#              file = "S16.csv", sep=",", col.names = FALSE)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$E1),
#                    fadata$results$allparameters$E1),
#              file = "E1.csv", sep=",", col.names = FALSE)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$E2),
#                    fadata$results$allparameters$E2),
#              file = "E2.csv", sep=",", col.names = FALSE)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$E3),
#                    fadata$results$allparameters$E3),
#              file = "E3.csv", sep=",", col.names = FALSE)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$E4),
#                    fadata$results$allparameters$E4),
#              file = "E4.csv", sep=",", col.names = FALSE)
#  
#  write.table(rbind(fadata$metadata$sampletype,
#                    colnames(fadata$results$allparameters$E5),
#                    fadata$results$allparameters$E5),
#              file = "E5.csv", sep=",", col.names = FALSE)

## ----echo=T, eval=F-----------------------------------------------------------
#  #=================#
#  # Customize parameters database
#  parameters <- FAMetA::parameters
#  
#  # Add a new unknown FA(18:1)
#  newrow <- data.frame(FattyAcid = "FA(18:1)nv",
#                       M = 18,
#                       S16 = 1,
#                       E1 = 1,
#                       E2 = 0,
#                       E3 = 0,
#                       E4 = 0,
#                       E5 = 0)
#  
#  
#  parameters <- data.frame(rbind(parameters, newrow))
#  parameters <- parameters[order(parameters$FattyAcid),]
#  View(parameters)
#  
#  # Change fatty acid settings: add E1 step for FA(18:3)n6
#  parameters$E1[parameters$FattyAcid == "FA(18:3)n6"] <- 1
#  
#  
#  # Then add the parameters argument to elongationAnalysis and summarizeResults
#  # functions
#  fadata <- elongationAnalysis(fadata, R2Thr = 0.95, maxiter = 1e4,
#                               maxconvergence=100, startpoints = 5, D2Thr = 0.1,
#                               parameters = parameters)
#  
#  fadata <- FAMetA:::summarizeResults(fadata, controlgroup = "H460_13Cglc",
#                                       parameters = parameters)