diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 5cd4cb0..0c62769 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -170,63 +170,63 @@ jobs: flake8 $(cat ../workflow_artifacts/changed_repositories.list) fi - lintr: - name: Lint R scripts - needs: setup - runs-on: ubuntu-latest - strategy: - matrix: - r-version: [4.4.0] - steps: - # checkout the repository to master - # and use it as the current working directory - - uses: actions/checkout@v2 - with: - fetch-depth: 1 - - uses: actions/download-artifact@v2 - with: - name: Workflow artifacts - path: ../workflow_artifacts/ - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.r-version }} - - name: Cache R packages - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: r_cache_1${{ matrix.r-version }} - - name: Install non-R lintr dependencies - run: sudo apt-get install libcurl4-openssl-dev - - name: Install lintr - run: | - install.packages('remotes') - remotes::install_cran("lintr", force=TRUE) - shell: Rscript {0} - - name: lintr - run: | - library(lintr) - linters <- with_defaults(line_length_linter = NULL, - object_name_linter = NULL, - object_usage_linter = NULL, - cyclocomp_linter(complexity_limit = 25)) - con <- file("../workflow_artifacts/changed_repositories.list", "r") - status <- 0 - while (TRUE) { - repo <- readLines(con, n = 1) - if (length(repo) == 0) { - break - } - lnt <- lint_dir(repo, relative_path=T, linters=linters) - if (length(lnt) > 0) { - status <- 1 - for (l in lnt) { - rel_path <- paste(repo, l$filename, sep="/") - write(paste(paste(rel_path, l$line_number, l$column_number, sep=":"), l$message), stderr()) - } - } - } - quit(status = status) - shell: Rscript {0} + # lintr: + # name: Lint R scripts + # needs: setup + # runs-on: ubuntu-latest + # strategy: + # matrix: + # r-version: [4.4.0] + # steps: + # # checkout the repository to master + # # and use it as the current working directory + # - uses: actions/checkout@v2 + # with: + # fetch-depth: 1 + # - uses: actions/download-artifact@v2 + # with: + # name: Workflow artifacts + # path: ../workflow_artifacts/ + # - uses: r-lib/actions/setup-r@v2 + # with: + # r-version: ${{ matrix.r-version }} + # - name: Cache R packages + # uses: actions/cache@v2 + # with: + # path: ${{ env.R_LIBS_USER }} + # key: r_cache_1${{ matrix.r-version }} + # - name: Install non-R lintr dependencies + # run: sudo apt-get install libcurl4-openssl-dev + # - name: Install lintr + # run: | + # install.packages('remotes') + # remotes::install_cran("lintr", force=TRUE) + # shell: Rscript {0} + # - name: lintr + # run: | + # library(lintr) + # linters <- with_defaults(line_length_linter = NULL, + # object_name_linter = NULL, + # object_usage_linter = NULL, + # cyclocomp_linter(complexity_limit = 25)) + # con <- file("../workflow_artifacts/changed_repositories.list", "r") + # status <- 0 + # while (TRUE) { + # repo <- readLines(con, n = 1) + # if (length(repo) == 0) { + # break + # } + # lnt <- lint_dir(repo, relative_path=T, linters=linters) + # if (length(lnt) > 0) { + # status <- 1 + # for (l in lnt) { + # rel_path <- paste(repo, l$filename, sep="/") + # write(paste(paste(rel_path, l$line_number, l$column_number, sep=":"), l$message), stderr()) + # } + # } + # } + # quit(status = status) + # shell: Rscript {0} # Planemo test the changed repositories, each chunk creates an artifact # containing HTML and JSON reports for the executed tests diff --git a/tools/msPurity/averageFragSpectra.R b/tools/msPurity/averageFragSpectra.R index 81cc843..7356a2f 100644 --- a/tools/msPurity/averageFragSpectra.R +++ b/tools/msPurity/averageFragSpectra.R @@ -5,30 +5,28 @@ print(sessionInfo()) get_av_spectra <- function(x) { - if (length(x$av_intra) > 0) { av_intra_df <- plyr::ldply(x$av_intra) if (nrow(av_intra_df) == 0) { av_intra_df <- NULL - }else{ + } else { av_intra_df$method <- "intra" } - - }else{ + } else { av_intra_df <- NULL } if ((is.null(x$av_inter)) || (nrow(x$av_inter) == 0)) { av_inter_df <- NULL - }else{ + } else { av_inter_df <- x$av_inter av_inter_df$method <- "inter" } if ((is.null(x$av_all)) || (nrow(x$av_all) == 0)) { av_all_df <- NULL - }else{ + } else { av_all_df <- x$av_all av_all_df$method <- "all" } @@ -60,9 +58,9 @@ print(opt) load_r_data <- function(rdata_path, name) { - #loads an RData file, and returns the named xset object if it is there - load(rdata_path) - return(get(ls()[ls() %in% name])) + # loads an RData file, and returns the named xset object if it is there + load(rdata_path) + return(get(ls()[ls() %in% name])) } # Requires @@ -72,59 +70,58 @@ pa@cores <- opt$cores if (is.null(opt$rmp)) { rmp <- FALSE -}else{ +} else { rmp <- TRUE } if (is.null(opt$sumi)) { sumi <- FALSE -}else{ +} else { sumi <- TRUE } if (opt$av_level == "intra") { pa <- msPurity::averageIntraFragSpectra(pa, - minfrac = opt$minfrac, - minnum = opt$minnum, - ppm = opt$ppm, - snr = opt$snr, - ra = opt$ra, - av = opt$av, - sumi = sumi, - rmp = rmp, - cores = opt$cores) - + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores + ) } else if (opt$av_level == "inter") { - pa <- msPurity::averageInterFragSpectra(pa, - minfrac = opt$minfrac, - minnum = opt$minnum, - ppm = opt$ppm, - snr = opt$snr, - ra = opt$ra, - av = opt$av, - sumi = sumi, - rmp = rmp, - cores = opt$cores) + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores + ) } else if (opt$av_level == "all") { - pa <- msPurity::averageAllFragSpectra(pa, - minfrac = opt$minfrac, - minnum = opt$minnum, - ppm = opt$ppm, - snr = opt$snr, - ra = opt$ra, - av = opt$av, - sumi = sumi, - rmp = rmp, - cores = opt$cores) + minfrac = opt$minfrac, + minnum = opt$minnum, + ppm = opt$ppm, + snr = opt$snr, + ra = opt$ra, + av = opt$av, + sumi = sumi, + rmp = rmp, + cores = opt$cores + ) } print(pa) save(pa, file = opt$out_rdata) if (length(pa) > 0) { - av_spectra <- plyr::ldply(pa@av_spectra, get_av_spectra) if (nrow(av_spectra) == 0) { @@ -138,18 +135,20 @@ if (length(pa) > 0) { colnames(av_spectra)[2] <- "fileid" av_spectra$avid <- seq_len(nrow(av_spectra)) - filenames <- sapply(av_spectra$fileid, - function(x) names(pa@fileList)[as.integer(x)]) + filenames <- sapply( + av_spectra$fileid, + function(x) names(pa@fileList)[as.integer(x)] + ) # filenames_galaxy <- sapply( # av_spectra$fileid, function(x) basename(pa@fileList[as.integer(x)])) - av_spectra <- as.data.frame( - append(av_spectra, list(filename = filenames), after = 2)) + av_spectra <- as.data.frame( + append(av_spectra, list(filename = filenames), after = 2) + ) } print(head(av_spectra)) write.table(av_spectra, opt$out_peaklist, row.names = FALSE, sep = "\t") - } } diff --git a/tools/msPurity/combineAnnotations.R b/tools/msPurity/combineAnnotations.R index 04f7aa3..aa9445a 100644 --- a/tools/msPurity/combineAnnotations.R +++ b/tools/msPurity/combineAnnotations.R @@ -9,23 +9,18 @@ option_list <- list( make_option(c("-c", "--sirius_csi_resultPth"), type = "character"), make_option(c("-p", "--probmetab_resultPth"), type = "character"), make_option(c("-l", "--ms1_lookup_resultPth"), type = "character"), - make_option("--ms1_lookup_checkAdducts", action = "store_true"), make_option("--ms1_lookup_keepAdducts", type = "character", default = NA), make_option("--ms1_lookup_dbSource", type = "character", default = "hmdb"), - make_option("--sm_weight", type = "numeric"), make_option("--metfrag_weight", type = "numeric"), make_option("--sirius_csi_weight", type = "numeric"), make_option("--probmetab_weight", type = "numeric"), make_option("--ms1_lookup_weight", type = "numeric"), make_option("--biosim_weight", type = "numeric"), - make_option("--summaryOutput", action = "store_true"), - make_option("--create_new_database", action = "store_true"), make_option("--outdir", type = "character", default = "."), - make_option("--compoundDbType", type = "character", default = "sqlite"), make_option("--compoundDbPth", type = "character", default = NA), make_option("--compoundDbHost", type = "character", default = NA) @@ -37,7 +32,7 @@ print(opt) if (!is.null(opt$create_new_database)) { sm_resultPth <- file.path(opt$outdir, "combined_annotations.sqlite") file.copy(opt$sm_resultPth, sm_resultPth) -}else{ +} else { sm_resultPth <- opt$sm_resultPth } @@ -45,18 +40,19 @@ if (is.null(opt$ms1_lookup_checkAdducts)) { opt$ms1_lookup_checkAdducts <- FALSE } if (!is.null(opt$ms1_lookup_keepAdducts)) { - opt$ms1_lookup_keepAdducts <- gsub("__ob__", "[", opt$ms1_lookup_keepAdducts) - opt$ms1_lookup_keepAdducts <- gsub("__cb__", "]", opt$ms1_lookup_keepAdducts) - ms1_lookup_keepAdducts <- strsplit(opt$ms1_lookup_keepAdducts, ",")[[1]] + opt$ms1_lookup_keepAdducts <- gsub("__ob__", "[", opt$ms1_lookup_keepAdducts) + opt$ms1_lookup_keepAdducts <- gsub("__cb__", "]", opt$ms1_lookup_keepAdducts) + ms1_lookup_keepAdducts <- strsplit(opt$ms1_lookup_keepAdducts, ",")[[1]] } -weights <- list("sm" = opt$sm_weight, - "metfrag" = opt$metfrag_weight, - "sirius_csifingerid" = opt$sirius_csi_weight, - "probmetab" = opt$probmetab_weight, - "ms1_lookup" = opt$ms1_lookup_weight, - "biosim" = opt$biosim_weight - ) +weights <- list( + "sm" = opt$sm_weight, + "metfrag" = opt$metfrag_weight, + "sirius_csifingerid" = opt$sirius_csi_weight, + "probmetab" = opt$probmetab_weight, + "ms1_lookup" = opt$ms1_lookup_weight, + "biosim" = opt$biosim_weight +) print(weights) if (is.null(opt$probmetab_resultPth)) { @@ -69,8 +65,8 @@ if (round(!sum(unlist(weights), 0) == 1)) { if (is.null(opt$summaryOutput)) { summaryOutput <- FALSE -}else{ - summaryOutput <- TRUE +} else { + summaryOutput <- TRUE } if (opt$compoundDbType == "local_config") { @@ -82,7 +78,7 @@ if (opt$compoundDbType == "local_config") { source(paste(base_dir, fname, sep = "/")) } source_local("dbconfig.R") -}else{ +} else { compoundDbPth <- opt$compoundDbPth compoundDbType <- opt$compoundDbType compoundDbName <- NA @@ -93,31 +89,33 @@ if (opt$compoundDbType == "local_config") { } summary_output <- msPurity::combineAnnotations( - sm_resultPth = sm_resultPth, - compoundDbPth = compoundDbPth, - metfrag_resultPth = opt$metfrag_resultPth, - sirius_csi_resultPth = opt$sirius_csi_resultPth, - probmetab_resultPth = opt$probmetab_resultPth, - ms1_lookup_resultPth = opt$ms1_lookup_resultPth, - ms1_lookup_keepAdducts = ms1_lookup_keepAdducts, - ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts, - - compoundDbType = compoundDbType, - compoundDbName = compoundDbName, - compoundDbHost = compoundDbHost, - compoundDbPort = compoundDbPort, - compoundDbUser = compoundDbUser, - compoundDbPass = compoundDbPass, - weights = weights, - summaryOutput = summaryOutput) + sm_resultPth = sm_resultPth, + compoundDbPth = compoundDbPth, + metfrag_resultPth = opt$metfrag_resultPth, + sirius_csi_resultPth = opt$sirius_csi_resultPth, + probmetab_resultPth = opt$probmetab_resultPth, + ms1_lookup_resultPth = opt$ms1_lookup_resultPth, + ms1_lookup_keepAdducts = ms1_lookup_keepAdducts, + ms1_lookup_checkAdducts = opt$ms1_lookup_checkAdducts, + compoundDbType = compoundDbType, + compoundDbName = compoundDbName, + compoundDbHost = compoundDbHost, + compoundDbPort = compoundDbPort, + compoundDbUser = compoundDbUser, + compoundDbPass = compoundDbPass, + weights = weights, + summaryOutput = summaryOutput +) if (summaryOutput) { write.table(summary_output, - file.path(opt$outdir, "combined_annotations.tsv"), - sep = "\t", row.names = FALSE) + file.path(opt$outdir, "combined_annotations.tsv"), + sep = "\t", row.names = FALSE + ) } write.table(summary_output, - file.path(opt$outdir, "combined_annotations.tsv"), - sep = "\t", row.names = FALSE) + file.path(opt$outdir, "combined_annotations.tsv"), + sep = "\t", row.names = FALSE +) closeAllConnections() diff --git a/tools/msPurity/createDatabase.R b/tools/msPurity/createDatabase.R index c7ab9c5..95ced72 100644 --- a/tools/msPurity/createDatabase.R +++ b/tools/msPurity/createDatabase.R @@ -6,7 +6,6 @@ print(sessionInfo()) print("CREATING DATABASE") xset_pa_filename_fix <- function(opt, pa, xset) { - if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables # needs to be done due to Galaxy moving the files around and screwing up any links to files @@ -18,7 +17,7 @@ xset_pa_filename_fix <- function(opt, pa, xset) { galaxy_names <- galaxy_names[galaxy_names != ""] nsave <- names(pa@fileList) - old_filenames <- basename(pa@fileList) + old_filenames <- basename(pa@fileList) pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] names(pa@fileList) <- nsave @@ -27,12 +26,12 @@ xset_pa_filename_fix <- function(opt, pa, xset) { } - if (!all(basename(pa@fileList) == basename(xset@filepaths))) { + if (!all(basename(pa@fileList) == basename(xset@filepaths))) { if (!all(names(pa@fileList) == basename(xset@filepaths))) { - print("FILELISTS DO NOT MATCH") - message("FILELISTS DO NOT MATCH") - quit(status = 1) - }else{ + print("FILELISTS DO NOT MATCH") + message("FILELISTS DO NOT MATCH") + quit(status = 1) + } else { xset@filepaths <- unname(pa@fileList) } } @@ -64,22 +63,23 @@ opt <- parse_args(OptionParser(option_list = option_list)) print(opt) loadRData <- function(rdata_path, name) { -#loads an RData file, and returns the named xset object if it is there - load(rdata_path) - return(get(ls()[ls() %in% name])) + # loads an RData file, and returns the named xset object if it is there + load(rdata_path) + return(get(ls()[ls() %in% name])) } getxcmsSetObject <- function(xobject) { - # XCMS 1.x - if (class(xobject) == "xcmsSet") - return(xobject) - # XCMS 3.x - if (class(xobject) == "XCMSnExp") { - # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, "xcmsSet")) - xcms::sampclass(xset) <- xset@phenoData$sample_group - return(xset) - } + # XCMS 1.x + if (class(xobject) == "xcmsSet") { + return(xobject) + } + # XCMS 3.x + if (class(xobject) == "XCMSnExp") { + # Get the legacy xcmsSet object + suppressWarnings(xset <- as(xobject, "xcmsSet")) + xcms::sampclass(xset) <- xset@phenoData$sample_group + return(xset) + } } @@ -96,19 +96,17 @@ print(pa@fileList) # Missing list element causes failures (should be updated # in msPurity R package for future releases) if (!exists("allfrag", where = pa@filter_frag_params)) { - pa@filter_frag_params$allfrag <- FALSE + pa@filter_frag_params$allfrag <- FALSE } if (opt$xcms_camera_option == "xcms") { - xset <- loadRData(opt$xset, c("xset", "xdata")) xset <- getxcmsSetObject(xset) fix <- xset_pa_filename_fix(opt, pa, xset) pa <- fix[[1]] xset <- fix[[2]] xa <- NULL -}else{ - +} else { xa <- loadRData(opt$xset, "xa") fix <- xset_pa_filename_fix(opt, pa, xa@xcmsSet) pa <- fix[[1]] @@ -119,16 +117,16 @@ if (opt$xcms_camera_option == "xcms") { if (is.null(opt$grpPeaklist)) { grpPeaklist <- NA -}else{ +} else { grpPeaklist <- opt$grpPeaklist } dbPth <- msPurity::createDatabase(pa, - xset = xset, - xsa = xa, - outDir = opt$outDir, - grpPeaklist = grpPeaklist, - dbName = "createDatabase_output.sqlite" + xset = xset, + xsa = xa, + outDir = opt$outDir, + grpPeaklist = grpPeaklist, + dbName = "createDatabase_output.sqlite" ) @@ -136,9 +134,8 @@ dbPth <- msPurity::createDatabase(pa, if (!is.null(opt$eic)) { - if (is.null(xset)) { - xset <- xa@xcmsSet + xset <- xa@xcmsSet } # previous check should have matched filelists together xset@filepaths <- unname(pa@fileList) @@ -150,19 +147,19 @@ if (!is.null(opt$eic)) { x$rtmin_raw <- xset@rt$raw[[sid]][match(x$rtmin, xset@rt$corrected[[sid]])] x$rtmax_raw <- xset@rt$raw[[sid]][match(x$rtmax, xset@rt$corrected[[sid]])] return(x) - } xset@peaks <- as.matrix( - plyr::ddply(data.frame(xset@peaks), ~ sample, convert2Raw, xset = xset)) + plyr::ddply(data.frame(xset@peaks), ~sample, convert2Raw, xset = xset) + ) # Saves the EICS into the previously created database px <- msPurity::purityX(xset, - saveEIC = TRUE, - cores = 1, - sqlitePth = dbPth, - rtrawColumns = TRUE) - + saveEIC = TRUE, + cores = 1, + sqlitePth = dbPth, + rtrawColumns = TRUE + ) } closeAllConnections() diff --git a/tools/msPurity/createMSP.R b/tools/msPurity/createMSP.R index 180fff6..05c3459 100644 --- a/tools/msPurity/createMSP.R +++ b/tools/msPurity/createMSP.R @@ -26,59 +26,58 @@ load(opt$rdata_input) if (is.null(opt$metadata)) { metadata <- NULL -}else{ - metadata <- read.table(opt$metadata, header = TRUE, sep = "\t", - stringsAsFactors = FALSE, check.names = FALSE) +} else { + metadata <- read.table(opt$metadata, + header = TRUE, sep = "\t", + stringsAsFactors = FALSE, check.names = FALSE + ) if (!opt$metadata_cols_filter == "") { - metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ",")[[1]] + metadata_cols_filter <- strsplit(opt$metadata_cols_filter, ",")[[1]] - metadata <- metadata[, metadata_cols_filter, drop = FALSE] - print(metadata) + metadata <- metadata[, metadata_cols_filter, drop = FALSE] + print(metadata) - if (!"grpid" %in% colnames(metadata)) { - metadata$grpid <- seq_len(nrow(metadata)) - } - - print(metadata) + if (!"grpid" %in% colnames(metadata)) { + metadata$grpid <- seq_len(nrow(metadata)) + } + print(metadata) } - } if (is.null(opt$metadata_cols) || opt$metadata_cols == "") { - metadata_cols <- NULL -}else{ - metadata_cols <- opt$metadata_cols - + metadata_cols <- NULL +} else { + metadata_cols <- opt$metadata_cols } if (is.null(opt$adduct_split)) { adduct_split <- FALSE -}else{ +} else { adduct_split <- TRUE } if (is.null(opt$xcms_groupids)) { xcms_groupids <- NULL -}else{ +} else { xcms_groupids <- trimws(strsplit(opt$xcms_groupids, ",")[[1]]) } if (is.null(opt$include_adducts_custom)) { include_adducts_custom <- "" -}else{ +} else { include_adducts_custom <- opt$include_adducts_custom } if (opt$include_adducts == "None") { include_adducts <- "" -}else{ +} else { include_adducts <- opt$include_adducts } @@ -96,22 +95,23 @@ include_adducts_all <- gsub(",", " ", include_adducts_all) if (is.null(opt$filter)) { filter <- FALSE -}else{ +} else { filter <- TRUE } msPurity::createMSP(pa, - msp_file_pth = file.path(opt$out_dir, "lcmsms_spectra.msp"), - metadata = metadata, - metadata_cols = metadata_cols, - method = opt$method, - adduct_split = adduct_split, - xcms_groupids = xcms_groupids, - filter = filter, - intensity_ra = opt$intensity_ra, - include_adducts = include_adducts_all, - msp_schema = opt$msp_schema) - -print("msp created") + msp_file_pth = file.path(opt$out_dir, "lcmsms_spectra.msp"), + metadata = metadata, + metadata_cols = metadata_cols, + method = opt$method, + adduct_split = adduct_split, + xcms_groupids = xcms_groupids, + filter = filter, + intensity_ra = opt$intensity_ra, + include_adducts = include_adducts_all, + msp_schema = opt$msp_schema +) + +print("msp created") \ No newline at end of file diff --git a/tools/msPurity/dimsPredictPuritySingle.R b/tools/msPurity/dimsPredictPuritySingle.R index 8079c5c..508d3ec 100644 --- a/tools/msPurity/dimsPredictPuritySingle.R +++ b/tools/msPurity/dimsPredictPuritySingle.R @@ -3,23 +3,23 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("--mzML_file"), type = "character"), - make_option(c("--mzML_files"), type = "character"), - make_option(c("--mzML_filename"), type = "character", default = ""), - make_option(c("--mzML_galaxy_names"), type = "character", default = ""), - make_option(c("--peaks_file"), type = "character"), - make_option(c("-o", "--out_dir"), type = "character"), - make_option("--minoffset", default = 0.5), - make_option("--maxoffset", default = 0.5), - make_option("--ilim", default = 0.05), - make_option("--ppm", default = 4), - make_option("--dimspy", action = "store_true"), - make_option("--sim", action = "store_true"), - make_option("--remove_nas", action = "store_true"), - make_option("--iwNorm", default = "none", type = "character"), - make_option("--file_num_dimspy", default = 1), - make_option("--exclude_isotopes", action = "store_true"), - make_option("--isotope_matrix", type = "character") + make_option(c("--mzML_file"), type = "character"), + make_option(c("--mzML_files"), type = "character"), + make_option(c("--mzML_filename"), type = "character", default = ""), + make_option(c("--mzML_galaxy_names"), type = "character", default = ""), + make_option(c("--peaks_file"), type = "character"), + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--minoffset", default = 0.5), + make_option("--maxoffset", default = 0.5), + make_option("--ilim", default = 0.05), + make_option("--ppm", default = 4), + make_option("--dimspy", action = "store_true"), + make_option("--sim", action = "store_true"), + make_option("--remove_nas", action = "store_true"), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--file_num_dimspy", default = 1), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character") ) # store options @@ -43,7 +43,7 @@ find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { galaxy_names <- str_to_vec(galaxy_names) if (mzML_filename %in% galaxy_names) { return(mzML_files[galaxy_names == mzML_filename]) - }else{ + } else { stop(paste("mzML file not found - ", mzML_filename)) } } @@ -53,15 +53,18 @@ if (is.null(opt$dimspy)) { df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)) { - mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, - opt$mzML_filename) - }else{ + } else if (!is.null(opt$mzML_files)) { + mzML_file <- find_mzml_file( + opt$mzML_files, opt$mzML_galaxy_names, + opt$mzML_filename + ) + } else { mzML_file <- file.path(opt$mzML_file, filename) } -}else{ +} else { indf <- read.table(opt$peaks_file, - header = TRUE, sep = "\t", stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE + ) filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] print(filename) @@ -75,9 +78,9 @@ if (is.null(opt$dimspy)) { if (file.exists(opt$mzML_file)) { mzML_file <- opt$mzML_file - }else if (!is.null(opt$mzML_files)) { + } else if (!is.null(opt$mzML_files)) { mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) - }else{ + } else { mzML_file <- file.path(opt$mzML_file, filename) } @@ -95,25 +98,26 @@ if (is.null(opt$dimspy)) { } if (!is.null(opt$remove_nas)) { - df <- df[!is.na(df$mz), ] + df <- df[!is.na(df$mz), ] } if (is.null(opt$isotope_matrix)) { im <- NULL -}else{ +} else { im <- read.table(opt$isotope_matrix, - header = TRUE, sep = "\t", stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE + ) } if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE -}else{ +} else { isotopes <- TRUE } if (is.null(opt$sim)) { sim <- FALSE -}else{ +} else { sim <- TRUE } @@ -123,13 +127,13 @@ maxOffset <- as.numeric(opt$maxoffset) if (opt$iwNorm == "none") { iwNorm <- FALSE iwNormFun <- NULL -}else if (opt$iwNorm == "gauss") { +} else if (opt$iwNorm == "gauss") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) -}else if (opt$iwNorm == "rcosine") { +} else if (opt$iwNorm == "rcosine") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) -}else if (opt$iwNorm == "QE5") { +} else if (opt$iwNorm == "QE5") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormQE.5() } @@ -138,23 +142,24 @@ print("FIRST ROWS OF PEAK FILE") print(head(df)) print(mzML_file) predicted <- msPurity::dimsPredictPuritySingle(df$mz, - filepth = mzML_file, - minOffset = minOffset, - maxOffset = maxOffset, - ppm = opt$ppm, - mzML = TRUE, - sim = sim, - ilim = opt$ilim, - isotopes = isotopes, - im = im, - iwNorm = iwNorm, - iwNormFun = iwNormFun - ) + filepth = mzML_file, + minOffset = minOffset, + maxOffset = maxOffset, + ppm = opt$ppm, + mzML = TRUE, + sim = sim, + ilim = opt$ilim, + isotopes = isotopes, + im = im, + iwNorm = iwNorm, + iwNormFun = iwNormFun +) predicted <- cbind(df, predicted) print(head(predicted)) print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) write.table(predicted, - file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), - row.names = FALSE, sep = "\t") + file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), + row.names = FALSE, sep = "\t" +) diff --git a/tools/msPurity/filterFragSpectra.R b/tools/msPurity/filterFragSpectra.R index 82ed3c0..58adb08 100644 --- a/tools/msPurity/filterFragSpectra.R +++ b/tools/msPurity/filterFragSpectra.R @@ -9,13 +9,10 @@ option_list <- list( make_option("--out_peaklist_prec", type = "character"), make_option("--out_peaklist_frag", type = "character"), make_option("--pa", type = "character"), - make_option("--ilim", default = 0.0), make_option("--plim", default = 0.0), - make_option("--ra", default = 0.0), make_option("--snr", default = 0.0), - make_option("--rmp", action = "store_true"), make_option("--snmeth", default = "median", type = "character"), make_option("--allfrag", action = "store_true") @@ -26,9 +23,9 @@ print(opt) loadRData <- function(rdata_path, name) { - #loads an RData file, and returns the named xset object if it is there - load(rdata_path) - return(get(ls()[ls() %in% name])) + # loads an RData file, and returns the named xset object if it is there + load(rdata_path) + return(get(ls()[ls() %in% name])) } # Requires @@ -36,24 +33,25 @@ pa <- loadRData(opt$pa, "pa") if (is.null(opt$rmp)) { opt$rmp <- FALSE -}else{ +} else { opt$rmp <- TRUE } if (is.null(opt$allfrag)) { opt$allfrag <- FALSE -}else{ +} else { opt$allfrag <- TRUE } pa <- filterFragSpectra(pa, - ilim = opt$ilim, - plim = opt$plim, - ra = opt$ra, - snr = opt$snr, - rmp = opt$rmp, - allfrag = opt$allfrag, - snmeth = opt$snmeth) + ilim = opt$ilim, + plim = opt$plim, + ra = opt$ra, + snr = opt$snr, + rmp = opt$rmp, + allfrag = opt$allfrag, + snmeth = opt$snmeth +) print(pa) save(pa, file = opt$out_rdata) @@ -82,11 +80,9 @@ setid <- function(grpinfo_i, msms) { if (length(pa) > 0) { - if (length(pa@grped_ms2) == 0) { message("No spectra available") } else { - # get group ids grpids <- unique(as.character(pa@grped_df$grpid)) @@ -94,7 +90,7 @@ if (length(pa) > 0) { df_fragments <- plyr::adply(grpids, 1, msmsgrp, pa = pa) df_fragments <- merge(df_fragments, pa@puritydf[, c("pid", "acquisitionNum", "precursorScanNum")], by = "pid") df_fragments <- df_fragments[order(df_fragments$grpid, df_fragments$pid, df_fragments$mz), ] - #select and reorder columns + # select and reorder columns df_fragments <- df_fragments[, c("grpid", "pid", "precursorScanNum", "acquisitionNum", "fileid", "mz", "i", "snr", "ra", "purity_pass_flag", "intensity_pass_flag", "ra_pass_flag", "snr_pass_flag", "pass_flag")] pa@grped_df$filename <- sapply(pa@grped_df$fileid, function(x) names(pa@fileList)[as.integer(x)]) diff --git a/tools/msPurity/flagRemove.R b/tools/msPurity/flagRemove.R index 8ba8bde..5b33635 100644 --- a/tools/msPurity/flagRemove.R +++ b/tools/msPurity/flagRemove.R @@ -2,77 +2,93 @@ library(msPurity) library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("-o", "--out_dir"), type = "character", default = getwd(), - help = "Output folder for resulting files [default = %default]" - ), - make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), - help = "The path to the xcmsSet object [default = %default]" - ), - make_option("--polarity", default = NA, - help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" - ), - make_option("--rsd_i_blank", default = 100, - help = "RSD threshold for the blank [default = %default]" - ), - make_option("--minfrac_blank", default = 0.5, - help = "minimum fraction of files for features needed for the blank [default = %default]" - ), - make_option("--rsd_rt_blank", default = 100, - help = "RSD threshold for the RT of the blank [default = %default]" - ), - - make_option("--ithres_blank", default = 0, - help = "Intensity threshold for the blank [default = %default]" - ), - make_option("--s2b", default = 10, - help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. + make_option(c("-o", "--out_dir"), + type = "character", default = getwd(), + help = "Output folder for resulting files [default = %default]" + ), + make_option(c("-x", "--xset_path"), + type = "character", default = file.path(getwd(), "xset.rds"), + help = "The path to the xcmsSet object [default = %default]" + ), + make_option("--polarity", + default = NA, + help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" + ), + make_option("--rsd_i_blank", + default = 100, + help = "RSD threshold for the blank [default = %default]" + ), + make_option("--minfrac_blank", + default = 0.5, + help = "minimum fraction of files for features needed for the blank [default = %default]" + ), + make_option("--rsd_rt_blank", + default = 100, + help = "RSD threshold for the RT of the blank [default = %default]" + ), + make_option("--ithres_blank", + default = 0, + help = "Intensity threshold for the blank [default = %default]" + ), + make_option("--s2b", + default = 10, + help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. 1000/10 = 100, so sample has fold change higher than the threshold and the peak is not considered a blank [default = %default]" - ), - make_option("--blank_class", default = "blank", type = "character", - help = "A string representing the class that will be used for the blank.[default = %default]" - ), - make_option("--egauss_thr", default = NA, - help = "Threshold for filtering out non gaussian shaped peaks. Note this only works + ), + make_option("--blank_class", + default = "blank", type = "character", + help = "A string representing the class that will be used for the blank.[default = %default]" + ), + make_option("--egauss_thr", + default = NA, + help = "Threshold for filtering out non gaussian shaped peaks. Note this only works if the 'verbose columns' and 'fit gauss' was used with xcms [default = %default]" - ), - make_option("--rsd_i_sample", default = 100, - help = "RSD threshold for the samples [default = %default]" - ), - make_option("--minfrac_sample", default = 0.8, - help = "minimum fraction of files for features needed for the samples [default = %default]" - ), - make_option("--rsd_rt_sample", default = 100, - help = "RSD threshold for the RT of the samples [default = %default]" - ), - make_option("--ithres_sample", default = 5000, - help = "Intensity threshold for the sample [default = %default]" - ), - make_option("--grp_rm_ids", default = NA, - help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) + ), + make_option("--rsd_i_sample", + default = 100, + help = "RSD threshold for the samples [default = %default]" + ), + make_option("--minfrac_sample", + default = 0.8, + help = "minimum fraction of files for features needed for the samples [default = %default]" + ), + make_option("--rsd_rt_sample", + default = 100, + help = "RSD threshold for the RT of the samples [default = %default]" + ), + make_option("--ithres_sample", + default = 5000, + help = "Intensity threshold for the sample [default = %default]" + ), + make_option("--grp_rm_ids", + default = NA, + help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) [default = %default]" - ), - make_option("--remove_spectra", action = "store_true", - help = "TRUE if flagged spectra is to be removed [default = %default]" - ), - make_option("--minfrac_xcms", default = 0.5, - help = "minfrac for xcms grouping [default = %default]" - ), - make_option("--mzwid", default = 0.001, - help = "mzwid for xcms grouping [default = %default]" - ), - make_option("--bw", default = 5, - help = "bw for xcms grouping [default = %default]" - ), - - make_option("--temp_save", action = "store_true", - help = "Assign True if files for each step saved (for testing purposes) [default = %default]" - ), - - make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") - + ), + make_option("--remove_spectra", + action = "store_true", + help = "TRUE if flagged spectra is to be removed [default = %default]" + ), + make_option("--minfrac_xcms", + default = 0.5, + help = "minfrac for xcms grouping [default = %default]" + ), + make_option("--mzwid", + default = 0.001, + help = "mzwid for xcms grouping [default = %default]" + ), + make_option("--bw", + default = 5, + help = "bw for xcms grouping [default = %default]" + ), + make_option("--temp_save", + action = "store_true", + help = "Assign True if files for each step saved (for testing purposes) [default = %default]" + ), + make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") ) # nolint start @@ -88,13 +104,13 @@ opt <- replace(opt, opt == "NA", NA) if (is.null(opt$temp_save)) { temp_save <- FALSE -}else{ +} else { temp_save <- TRUE } if (is.null(opt$remove_spectra)) { remove_spectra <- FALSE -}else{ +} else { remove_spectra <- TRUE } @@ -103,8 +119,9 @@ print(opt) getxcmsSetObject <- function(xobject) { # XCMS 1.x - if (class(xobject) == "xcmsSet") + if (class(xobject) == "xcmsSet") { return(xobject) + } # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object @@ -116,7 +133,7 @@ getxcmsSetObject <- function(xobject) { loadRData <- function(rdata_path, name) { -#loads an RData file, and returns the named xset object if it is there + # loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } @@ -126,7 +143,7 @@ xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) print(xset) if (is.null(opt$samplelist)) { blank_class <- opt$blank_class -}else{ +} else { samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) @@ -142,25 +159,26 @@ if (is.null(opt$samplelist)) { if (is.null(opt$multilist)) { ffrm_out <- flag_remove(xset, - pol = opt$polarity, - rsd_i_blank = opt$rsd_i_blank, - minfrac_blank = opt$minfrac_blank, - rsd_rt_blank = opt$rsd_rt_blank, - ithres_blank = opt$ithres_blank, - s2b = opt$s2b, - ref.class = blank_class, - egauss_thr = opt$egauss_thr, - rsd_i_sample = opt$rsd_i_sample, - minfrac_sample = opt$minfrac_sample, - rsd_rt_sample = opt$rsd_rt_sample, - ithres_sample = opt$ithres_sample, - minfrac_xcms = opt$minfrac_xcms, - mzwid = opt$mzwid, - bw = opt$bw, - out_dir = opt$out_dir, - temp_save = temp_save, - remove_spectra = remove_spectra, - grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) + pol = opt$polarity, + rsd_i_blank = opt$rsd_i_blank, + minfrac_blank = opt$minfrac_blank, + rsd_rt_blank = opt$rsd_rt_blank, + ithres_blank = opt$ithres_blank, + s2b = opt$s2b, + ref.class = blank_class, + egauss_thr = opt$egauss_thr, + rsd_i_sample = opt$rsd_i_sample, + minfrac_sample = opt$minfrac_sample, + rsd_rt_sample = opt$rsd_rt_sample, + ithres_sample = opt$ithres_sample, + minfrac_xcms = opt$minfrac_xcms, + mzwid = opt$mzwid, + bw = opt$bw, + out_dir = opt$out_dir, + temp_save = temp_save, + remove_spectra = remove_spectra, + grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]] + ) print("flag remove finished") xset <- ffrm_out[[1]] grp_peaklist <- ffrm_out[[2]] @@ -172,26 +190,26 @@ if (is.null(opt$multilist)) { peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") print(peak_pth) write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), - peak_pth, row.names = FALSE, sep = "\t") + peak_pth, + row.names = FALSE, sep = "\t" + ) removed_peaks <- data.frame(removed_peaks) write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), - file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") - -}else{ - - # nolint start - # TODO - #xsets <- split(xset, multilist_df$multlist) - # - #mult_grps <- unique(multilist_df$multlist) - # - #for (mgrp in mult_grps){ - # xset_i <- xsets[mgrp] - # xcms::group(xset_i, - # - # } - # nolint end - - + file.path(opt$out_dir, "removed_peaks.tsv"), + row.names = FALSE, sep = "\t" + ) +} else { + # nolint start + # TODO + # xsets <- split(xset, multilist_df$multlist) + # + # mult_grps <- unique(multilist_df$multlist) + # + # for (mgrp in mult_grps){ + # xset_i <- xsets[mgrp] + # xcms::group(xset_i, + # + # } + # nolint end } diff --git a/tools/msPurity/frag4feature.R b/tools/msPurity/frag4feature.R index 92eb54a..9d50f35 100644 --- a/tools/msPurity/frag4feature.R +++ b/tools/msPurity/frag4feature.R @@ -3,64 +3,61 @@ library(msPurity) library(xcms) print(sessionInfo()) -xset_pa_filename_fix <- function(opt, pa, xset=NULL) { +xset_pa_filename_fix <- function(opt, pa, xset = NULL) { + if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { + # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables + # needs to be done due to Galaxy moving the files around and screwing up any links to files + filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) # nolint - if (!is.null(opt$mzML_files) && !is.null(opt$galaxy_names)) { - # NOTE: Relies on the pa@fileList having the names of files given as 'names' of the variables - # needs to be done due to Galaxy moving the files around and screwing up any links to files + filepaths <- filepaths[filepaths != ""] - filepaths <- trimws(strsplit(opt$mzML_files, ",")[[1]]) # nolint + galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) + galaxy_names <- galaxy_names[galaxy_names != ""] - filepaths <- filepaths[filepaths != ""] + nsave <- names(pa@fileList) + old_filenames <- basename(pa@fileList) - galaxy_names <- trimws(strsplit(opt$galaxy_names, ",")[[1]]) - galaxy_names <- galaxy_names[galaxy_names != ""] + pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] + names(pa@fileList) <- nsave - nsave <- names(pa@fileList) - old_filenames <- basename(pa@fileList) - - pa@fileList <- filepaths[match(names(pa@fileList), galaxy_names)] - names(pa@fileList) <- nsave - - pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)]) - pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)]) - } - print(pa@fileList) - - if (!is.null(xset)) { - - print(xset@filepaths) - - if (!all(basename(pa@fileList) == basename(xset@filepaths))) { - if (!all(names(pa@fileList) == basename(xset@filepaths))) { - print("FILELISTS DO NOT MATCH") - message("FILELISTS DO NOT MATCH") - quit(status = 1) - }else{ - xset@filepaths <- unname(pa@fileList) - } - } - } + pa@puritydf$filename <- basename(pa@fileList[match(pa@puritydf$filename, old_filenames)]) + pa@grped_df$filename <- basename(pa@fileList[match(pa@grped_df$filename, old_filenames)]) + } + print(pa@fileList) + + if (!is.null(xset)) { + print(xset@filepaths) + + if (!all(basename(pa@fileList) == basename(xset@filepaths))) { + if (!all(names(pa@fileList) == basename(xset@filepaths))) { + print("FILELISTS DO NOT MATCH") + message("FILELISTS DO NOT MATCH") + quit(status = 1) + } else { + xset@filepaths <- unname(pa@fileList) + } + } + } - return(list(pa, xset)) + return(list(pa, xset)) } option_list <- list( - make_option(c("-o", "--out_dir"), type = "character"), - make_option("--pa", type = "character"), - make_option("--xset", type = "character"), - make_option("--ppm", default = 10), - make_option("--plim", default = 0.0), - make_option("--convert2RawRT", action = "store_true"), - make_option("--intense", action = "store_true"), - make_option("--createDB", action = "store_true"), - make_option("--cores", default = 4), - make_option("--mzML_files", type = "character"), - make_option("--galaxy_names", type = "character"), - make_option("--grp_peaklist", type = "character"), - make_option("--useGroup", action = "store_true") + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--pa", type = "character"), + make_option("--xset", type = "character"), + make_option("--ppm", default = 10), + make_option("--plim", default = 0.0), + make_option("--convert2RawRT", action = "store_true"), + make_option("--intense", action = "store_true"), + make_option("--createDB", action = "store_true"), + make_option("--cores", default = 4), + make_option("--mzML_files", type = "character"), + make_option("--galaxy_names", type = "character"), + make_option("--grp_peaklist", type = "character"), + make_option("--useGroup", action = "store_true") ) # store options @@ -68,17 +65,18 @@ opt <- parse_args(OptionParser(option_list = option_list)) print(opt) loadRData <- function(rdata_path, name) { -#loads an RData file, and returns the named xset object if it is there + # loads an RData file, and returns the named xset object if it is there load(rdata_path) return(get(ls()[ls() %in% name])) } # This function retrieve a xset like object -#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +# @author Gildas Le Corguille lecorguille@sb-roscoff.fr getxcmsSetObject <- function(xobject) { # XCMS 1.x - if (class(xobject) == "xcmsSet") + if (class(xobject) == "xcmsSet") { return(xobject) + } # XCMS 3.x if (class(xobject) == "XCMSnExp") { # Get the legacy xcmsSet object @@ -99,20 +97,20 @@ print(pa@fileList) print(xset@filepaths) if (is.null(opt$intense)) { - intense <- FALSE -}else{ - intense <- TRUE + intense <- FALSE +} else { + intense <- TRUE } if (is.null(opt$convert2RawRT)) { convert2RawRT <- FALSE -}else{ +} else { convert2RawRT <- TRUE } if (is.null(opt$createDB)) { createDB <- FALSE -}else{ +} else { createDB <- TRUE } @@ -121,7 +119,7 @@ if (is.null(opt$useGroup)) { pa <- fix[[1]] xset <- fix[[2]] useGroup <- FALSE -}else{ +} else { # if are only aligning to the group not eah file we do not need to align the files between the xset and pa object print("useGroup") fix <- xset_pa_filename_fix(opt, pa) @@ -132,22 +130,24 @@ if (is.null(opt$useGroup)) { if (is.null(opt$grp_peaklist)) { grp_peaklist <- NA -}else{ +} else { grp_peaklist <- opt$grp_peaklist } print(useGroup) -pa <- msPurity::frag4feature(pa = pa, - xset = xset, - ppm = opt$ppm, - plim = opt$plim, - intense = intense, - convert2RawRT = convert2RawRT, - db_name = "alldata.sqlite", - out_dir = opt$out_dir, - grp_peaklist = grp_peaklist, - create_db = createDB, - use_group = useGroup) +pa <- msPurity::frag4feature( + pa = pa, + xset = xset, + ppm = opt$ppm, + plim = opt$plim, + intense = intense, + convert2RawRT = convert2RawRT, + db_name = "alldata.sqlite", + out_dir = opt$out_dir, + grp_peaklist = grp_peaklist, + create_db = createDB, + use_group = useGroup +) print(pa) save(pa, file = file.path(opt$out_dir, "frag4feature_output.RData")) diff --git a/tools/msPurity/purityA.R b/tools/msPurity/purityA.R index 8b628eb..7fe924b 100644 --- a/tools/msPurity/purityA.R +++ b/tools/msPurity/purityA.R @@ -3,20 +3,20 @@ library(optparse) print(sessionInfo()) option_list <- list( - make_option(c("-o", "--out_dir"), type = "character"), - make_option("--mzML_files", type = "character"), - make_option("--galaxy_names", type = "character"), - make_option("--minOffset", type = "numeric"), - make_option("--maxOffset", type = "numeric"), - make_option("--ilim", type = "numeric"), - make_option("--iwNorm", default = "none", type = "character"), - make_option("--exclude_isotopes", action = "store_true"), - make_option("--isotope_matrix", type = "character"), - make_option("--mostIntense", action = "store_true"), - make_option("--plotP", action = "store_true"), - make_option("--nearest", action = "store_true"), - make_option("--cores", default = 4), - make_option("--ppmInterp", default = 7) + make_option(c("-o", "--out_dir"), type = "character"), + make_option("--mzML_files", type = "character"), + make_option("--galaxy_names", type = "character"), + make_option("--minOffset", type = "numeric"), + make_option("--maxOffset", type = "numeric"), + make_option("--ilim", type = "numeric"), + make_option("--iwNorm", default = "none", type = "character"), + make_option("--exclude_isotopes", action = "store_true"), + make_option("--isotope_matrix", type = "character"), + make_option("--mostIntense", action = "store_true"), + make_option("--plotP", action = "store_true"), + make_option("--nearest", action = "store_true"), + make_option("--cores", default = 4), + make_option("--ppmInterp", default = 7) ) opt <- parse_args(OptionParser(option_list = option_list)) @@ -25,23 +25,27 @@ print(opt) if (opt$iwNorm == "none") { iwNorm <- FALSE iwNormFun <- NULL -}else if (opt$iwNorm == "gauss") { +} else if (opt$iwNorm == "gauss") { iwNorm <- TRUE if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { - print("User has to define offsets if using Gaussian normalisation") - }else{ - iwNormFun <- msPurity::iwNormGauss(minOff = -as.numeric(opt$minOffset), - maxOff = as.numeric(opt$maxOffset)) + print("User has to define offsets if using Gaussian normalisation") + } else { + iwNormFun <- msPurity::iwNormGauss( + minOff = -as.numeric(opt$minOffset), + maxOff = as.numeric(opt$maxOffset) + ) } -}else if (opt$iwNorm == "rcosine") { +} else if (opt$iwNorm == "rcosine") { iwNorm <- TRUE if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { - print("User has to define offsets if using R-cosine normalisation") - }else{ - iwNormFun <- msPurity::iwNormRcosine(minOff = -as.numeric(opt$minOffset), - maxOff = as.numeric(opt$maxOffset)) + print("User has to define offsets if using R-cosine normalisation") + } else { + iwNormFun <- msPurity::iwNormRcosine( + minOff = -as.numeric(opt$minOffset), + maxOff = as.numeric(opt$maxOffset) + ) } -}else if (opt$iwNorm == "QE5") { +} else if (opt$iwNorm == "QE5") { iwNorm <- TRUE iwNormFun <- msPurity::iwNormQE.5() } @@ -53,27 +57,27 @@ filepaths <- filepaths[filepaths != ""] if (is.null(opt$minOffset) || is.null(opt$maxOffset)) { offsets <- NA -}else{ +} else { offsets <- as.numeric(c(opt$minOffset, opt$maxOffset)) } if (is.null(opt$mostIntense)) { mostIntense <- FALSE -}else{ +} else { mostIntense <- TRUE } if (is.null(opt$nearest)) { nearest <- FALSE -}else{ +} else { nearest <- TRUE } if (is.null(opt$plotP)) { plotP <- FALSE plotdir <- NULL -}else{ +} else { plotP <- TRUE plotdir <- opt$out_dir } @@ -81,32 +85,34 @@ if (is.null(opt$plotP)) { if (is.null(opt$isotope_matrix)) { im <- NULL -}else{ +} else { im <- read.table(opt$isotope_matrix, - header = TRUE, sep = "\t", stringsAsFactors = FALSE) + header = TRUE, sep = "\t", stringsAsFactors = FALSE + ) } if (is.null(opt$exclude_isotopes)) { isotopes <- FALSE -}else{ +} else { isotopes <- TRUE } pa <- msPurity::purityA(filepaths, - cores = opt$cores, - mostIntense = mostIntense, - nearest = nearest, - offsets = offsets, - plotP = plotP, - plotdir = plotdir, - interpol = "linear", - iwNorm = iwNorm, - iwNormFun = iwNormFun, - ilim = opt$ilim, - mzRback = "pwiz", - isotopes = isotopes, - im = im, - ppmInterp = opt$ppmInterp) + cores = opt$cores, + mostIntense = mostIntense, + nearest = nearest, + offsets = offsets, + plotP = plotP, + plotdir = plotdir, + interpol = "linear", + iwNorm = iwNorm, + iwNormFun = iwNormFun, + ilim = opt$ilim, + mzRback = "pwiz", + isotopes = isotopes, + im = im, + ppmInterp = opt$ppmInterp +) if (!is.null(opt$galaxy_names)) { diff --git a/tools/msPurity/purityX.R b/tools/msPurity/purityX.R index eb2331c..dd3eed1 100644 --- a/tools/msPurity/purityX.R +++ b/tools/msPurity/purityX.R @@ -22,7 +22,7 @@ option_list <- list( make_option("--galaxy_files", type = "character"), make_option("--choose_class", type = "character"), make_option("--ignore_files", type = "character"), - make_option("--rtraw_columns", action = "store_true") + make_option("--rtraw_columns", action = "store_true") ) @@ -31,9 +31,9 @@ print(opt) if (!is.null(opt$xgroups)) { - xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]]) -}else{ - xgroups <- NULL + xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]]) +} else { + xgroups <- NULL } @@ -44,52 +44,54 @@ if (!is.null(opt$remove_nas)) { } if (is.null(opt$isotope_matrix)) { - im <- NULL -}else{ - im <- read.table(opt$isotope_matrix, - header = TRUE, sep = "\t", stringsAsFactors = FALSE) + im <- NULL +} else { + im <- read.table(opt$isotope_matrix, + header = TRUE, sep = "\t", stringsAsFactors = FALSE + ) } if (is.null(opt$exclude_isotopes)) { - isotopes <- FALSE -}else{ - isotopes <- TRUE + isotopes <- FALSE +} else { + isotopes <- TRUE } if (is.null(opt$rtraw_columns)) { - rtraw_columns <- FALSE -}else{ - rtraw_columns <- TRUE + rtraw_columns <- FALSE +} else { + rtraw_columns <- TRUE } loadRData <- function(rdata_path, xset_name) { -#loads an RData file, and returns the named xset object if it is there - load(rdata_path) - return(get(ls()[ls() == xset_name])) + # loads an RData file, and returns the named xset object if it is there + load(rdata_path) + return(get(ls()[ls() == xset_name])) } getxcmsSetObject <- function(xobject) { - # XCMS 1.x - if (class(xobject) == "xcmsSet") - return(xobject) - # XCMS 3.x - if (class(xobject) == "XCMSnExp") { - # Get the legacy xcmsSet object - suppressWarnings(xset <- as(xobject, "xcmsSet")) - sampclass(xset) <- xset@phenoData$sample_group - return(xset) - } + # XCMS 1.x + if (class(xobject) == "xcmsSet") { + return(xobject) + } + # XCMS 3.x + if (class(xobject) == "XCMSnExp") { + # Get the legacy xcmsSet object + suppressWarnings(xset <- as(xobject, "xcmsSet")) + sampclass(xset) <- xset@phenoData$sample_group + return(xset) + } } target_obj <- loadRData(opt$xset_path, opt$rdata_name) if (opt$camera_xcms == "camera") { - xset <- target_obj@xcmsSet -}else{ - xset <- target_obj + xset <- target_obj@xcmsSet +} else { + xset <- target_obj } xset <- getxcmsSetObject(xset) @@ -100,17 +102,17 @@ minOffset <- as.numeric(opt$minOffset) maxOffset <- as.numeric(opt$maxOffset) if (opt$iwNorm == "none") { - iwNorm <- FALSE - iwNormFun <- NULL -}else if (opt$iwNorm == "gauss") { - iwNorm <- TRUE - iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) -}else if (opt$iwNorm == "rcosine") { - iwNorm <- TRUE - iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) -}else if (opt$iwNorm == "QE5") { - iwNorm <- TRUE - iwNormFun <- msPurity::iwNormQE.5() + iwNorm <- FALSE + iwNormFun <- NULL +} else if (opt$iwNorm == "gauss") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) +} else if (opt$iwNorm == "rcosine") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) +} else if (opt$iwNorm == "QE5") { + iwNorm <- TRUE + iwNormFun <- msPurity::iwNormQE.5() } print(xset@filepaths) @@ -123,13 +125,13 @@ if (!is.null(opt$files)) { original_filenames <- basename(xset@filepaths) update_idx <- match(updated_filenames, original_filenames) - if (!is.null(opt$galaxy_files)) { - galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]]) - galaxy_files <- galaxy_files[galaxy_files != ""] - xset@filepaths <- galaxy_files[update_idx] - }else{ - xset@filepaths <- updated_filepaths[update_idx] - } + if (!is.null(opt$galaxy_files)) { + galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]]) + galaxy_files <- galaxy_files[galaxy_files != ""] + xset@filepaths <- galaxy_files[update_idx] + } else { + xset@filepaths <- updated_filepaths[update_idx] + } } if (!is.null(opt$choose_class)) { @@ -139,7 +141,7 @@ if (!is.null(opt$choose_class)) { print("choose class") print(ignore_files_class) -}else{ +} else { ignore_files_class <- NA } @@ -150,32 +152,33 @@ if (!is.null(opt$ignore_files)) { ignore_files <- unique(c(ignore_files, ignore_files_class)) ignore_files <- ignore_files[ignore_files != ""] -}else{ +} else { if (anyNA(ignore_files_class)) { ignore_files <- NULL - }else{ + } else { ignore_files <- ignore_files_class } - } print("ignore_files") print(ignore_files) -ppLCMS <- msPurity::purityX(xset = xset, - offsets = c(minOffset, maxOffset), - cores = opt$cores, - xgroups = xgroups, - purityType = opt$purityType, - ilim = opt$ilim, - isotopes = isotopes, - im = im, - iwNorm = iwNorm, - iwNormFun = iwNormFun, - singleFile = opt$singleFile, - fileignore = ignore_files, - rtrawColumns = rtraw_columns) +ppLCMS <- msPurity::purityX( + xset = xset, + offsets = c(minOffset, maxOffset), + cores = opt$cores, + xgroups = xgroups, + purityType = opt$purityType, + ilim = opt$ilim, + isotopes = isotopes, + im = im, + iwNorm = iwNorm, + iwNormFun = iwNormFun, + singleFile = opt$singleFile, + fileignore = ignore_files, + rtrawColumns = rtraw_columns +) dfp <- ppLCMS@predictions