## ----include = FALSE---------------------------------------------------------- options(rmarkdown.html_vignette.check_title = FALSE) options(tidyverse.quiet = TRUE) data.table::setDTthreads(2) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", out.width = "80%", fig.align = 'center', fig.height = 3, fig.width = 6.5 ) ## ----findfiles, message=FALSE------------------------------------------------- library(RaMS) # Locate the file directory msdata_dir <- system.file("extdata", package = "RaMS") # Identify the files of interest data_files <- list.files(msdata_dir, pattern = "mzML", full.names = TRUE)[1:4] # Check that the files identified are the ones expected basename(data_files) ## ----loadTIC------------------------------------------------------------------ single_file <- data_files[2] msdata <- grabMSdata(single_file, grab_what = "TIC") knitr::kable(head(msdata$TIC, 3)) ## ----headerTIC---------------------------------------------------------------- plot(msdata$TIC$rt, msdata$TIC$int, type = "l") ## ----loadBPC, fig.height=3---------------------------------------------------- msdata <- grabMSdata(single_file, grab_what = "BPC") ## ----plotBPC, warning=FALSE--------------------------------------------------- library(tidyverse) ggplot(msdata$BPC) + geom_line(aes(x=rt, y=int)) ## ----loadmultiBPC------------------------------------------------------------- msdata <- grabMSdata(data_files[2:4], grab_what = "BPC") ggplot(msdata$BPC) + geom_line(aes(x=rt, y=int, color=filename)) ## ----ggplotdemo, dpi=144------------------------------------------------------ ggplot(msdata$BPC) + geom_polygon(aes(x=rt, y=int, color=filename), lwd=1, fill="#FFFFFF44") + theme(legend.position = "inside", legend.position.inside=c(0.8, 0.7), plot.title = element_text(face="bold"), axis.title = element_text(size = 15)) + scale_y_continuous(labels = c(0, "250M", "500M"), breaks = c(0, 2.5e8, 5e8)) + scale_colour_manual(values = c("#2596be", "#6c25be", "#bea925")) + labs(x="Retention time (minutes)", y="Intensity", title = "Base peak chromatogram", color="Files:") + coord_cartesian(xlim = c(7.50, 9), ylim = c(0, 5e8)) ## ----metadatademo------------------------------------------------------------- # Since the minification process strips some metadata, I use the # less-minified DDA file here grabMSdata(files = data_files[1], grab_what = "metadata") ## ----MS1demo------------------------------------------------------------------ msdata <- grabMSdata(data_files[2:4], grab_what = "MS1") knitr::kable(head(msdata$MS1, 3)) ## ----adenineplot, warning=FALSE, message=FALSE-------------------------------- library(data.table) adenine_mz <- 136.06232 adenine_data <- msdata$MS1[mz%between%pmppm(adenine_mz, ppm=5)][rt%between%c(4, 9)] ggplot(adenine_data) + geom_line(aes(x=rt, y=int, color=filename)) ## ----multicmpdplot------------------------------------------------------------ mzs_of_interest <- c(Adenine=136.06232, Valine=118.0865, Homarine=138.055503) mass_data <- imap(mzs_of_interest, function(mz_i, name){ cbind(msdata$MS1[mz%between%pmppm(mz_i, ppm=10)], name) }) %>% bind_rows() %>% filter(rt%between%c(4, 9)) ggplot(mass_data) + geom_line(aes(x=rt, y=int, color=filename)) + facet_wrap(~name, ncol = 1, scales = "free_y") ## ----MS2data------------------------------------------------------------------ msdata <- grabMSdata(data_files[1], grab_what = c("MS1", "MS2", "MS3")) knitr::kable(head(msdata$MS2, 3)) ## ----MS2demo------------------------------------------------------------------ iodine_MS2 <- msdata$MS2[premz%between%pmppm(351.0818, 5)][rt==min(rt)] iodine_MS2$int <- iodine_MS2$int/max(iodine_MS2$int) ggplot(iodine_MS2) + geom_point(aes(x=fragmz, y=int)) + geom_segment(aes(x=fragmz, xend=fragmz, y=int, yend=0)) + scale_y_continuous(breaks = c(0, .5, 1), labels = c("0%", "50%", "100%")) + labs(x="Fragment m/z", y="") ## ----plotly, eval=FALSE------------------------------------------------------- # ## Not run to save space in the vignette: # library(plotly) # # compound_MS1 <- msdata$MS1 %>% # filter(mz%between%pmppm(351.0818, 10)) %>% # filter(!str_detect(filename, "DDA")) %>% # slice_max(int, by = rt) # # compound_MS2 <- msdata$MS2[premz%between%pmppm(351.0818, 10)] %>% # group_by(rt) %>% # arrange(desc(int)) %>% # slice(1:10) %>% # summarise(frags=paste( # paste(round(fragmz, digits = 3), round(int), sep = ": "), collapse = "\n"), # .groups="drop" # ) %>% # mutate(int=approx(x = compound_MS1$rt, y=compound_MS1$int, xout = rt)$y) # plot_ly(compound_MS1) %>% # add_trace(type="scatter", mode="lines", x=~rt, y=~int, hoverinfo="none") %>% # add_trace(type="scatter", mode="markers", x=~rt, y=~int, # text=~frags, hoverinfo="text", showlegend=FALSE, # marker=list(color="black"), data = compound_MS2) %>% # layout(annotations=list(x=min(compound_MS2$rt), y=median(compound_MS2$int)*10, # text="Mouse over to see\nMSMS fragments")) ## ----homarine_fragsearch------------------------------------------------------ msdata$MS2[fragmz%between%pmppm(57.07, ppm = 10)] %>% head() %>% knitr::kable() ## ----------------------------------------------------------------------------- msdata$MS3 %>% head() %>% knitr::kable() ## ----fig.height=3------------------------------------------------------------- chrom_file <- system.file("extdata", "wk_chrom.mzML.gz", package = "RaMS") msdata_chroms <- grabMSdata(chrom_file, verbosity = 0, grab_what = "chroms") given_chrom <- msdata_chroms$chroms[chrom_type=="SRM iletter1"] ptitle <- with(given_chrom, paste0( unique(chrom_type), ": Target m/z = ", unique(target_mz), "; Product m/z = ", unique(product_mz) )) plot(given_chrom$rt, given_chrom$int, type="l", main=ptitle) ## ----EICdemo------------------------------------------------------------------ all_data <- grabMSdata(data_files[-1], grab_what = c("MS1", "MS2")) mzs_of_interest <- c(adenine=136.06232, valine=118.0865) small_data <- grabMSdata(data_files[-1], grab_what = c("EIC", "EIC_MS2"), mz=mzs_of_interest, ppm = 5) all_data$MS1 %>% mutate(type="All data") %>% rbind(small_data$EIC %>% mutate(type="Extracted data only")) %>% filter(!str_detect(filename, "DDA")) %>% filter(rt%between%c(5, 15)) %>% group_by(rt, filename, type) %>% summarise(TIC=sum(int), .groups="drop") %>% ggplot() + geom_line(aes(x=rt, y=TIC, color=filename)) + facet_wrap(~type, ncol = 1) # Size reduction factor: as.numeric(object.size(all_data)/object.size(small_data)) ## ----rtrangedemo-------------------------------------------------------------- small_data <- grabMSdata(data_files[-1], grab_what = c("MS1", "MS2"), rtrange = c(6, 8)) all_data$MS1 %>% mutate(type="All data") %>% rbind(small_data$MS1 %>% mutate(type="Extracted data only")) %>% filter(!str_detect(filename, "DDA")) %>% group_by(rt, filename, type) %>% summarise(TIC=sum(int), .groups="drop") %>% ggplot() + geom_line(aes(x=rt, y=TIC, color=filename)) + facet_wrap(~type, ncol = 1) # Size reduction factor: as.numeric(object.size(all_data)/object.size(small_data)) ## ----verbosedemo-------------------------------------------------------------- all_data <- grabMSdata(data_files, grab_what = c("MS1", "MS2"), verbosity = 2) ## ----eval=FALSE--------------------------------------------------------------- # ## Not run: # library(parallel) # cl <- makeCluster(getOption("cl.cores", detectCores()-1)) # output_data <- parLapply(data_files, grabMzmlData, grab_what="everything", cl = cl) # # library(foreach) # library(doParallel) # registerDoParallel(detectCores()-1) # output_data <- foreach (i=data_files) %dopar% { # RaMS::grabMzmlData(i, grab_what="everything") # } # stopImplicitCluster() ## ----eval=FALSE--------------------------------------------------------------- # ## Not run: # data_nodes <- xml2::xml_find_all(mzML_nodes, xpath="//d1:precursorMz") # raw_data <- xml2::xml_attr(data_nodes, "value") ## ----eval=FALSE--------------------------------------------------------------- # ## Not run: # decoded_binary <- base64enc::base64decode(binary) # raw_binary <- as.raw(decoded_binary) # decomp_binary <- memDecompress(raw_binary, type = file_metadata$compression) # final_binary <- readBin(decomp_binary, what = "double", # n=length(decomp_binary)/file_metadata$mz_precision, # size = file_metadata$mz_precision) # # # See https://github.com/ProteoWizard/pwiz/issues/1301