Skip to content

Commit

Permalink
More capitalization corrections and tests reformat for easier reading
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben-Drucker committed Aug 12, 2024
1 parent 495d174 commit 8961f68
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 24 deletions.
8 changes: 4 additions & 4 deletions R/PNNL_DMS_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ get_tool_output_files_for_job_number <- function(jobNumber, toolName = NULL,
{
# get job records first. This will be useful to get dataset folder
jobRecord <- get_dms_job_records(jobNumber)
datasetFolder <- dirname( as.character(jobRecord$Folder))
datasetFolder <- dirname(as.character(jobRecord$folder))

# get tool's subfolder
if( is.null(toolName) ){
Expand Down Expand Up @@ -325,7 +325,7 @@ download_datasets_by_data_package <- function(data_package_num,
}
multiproc_cl <- makeCluster(ncores)
on.exit(stopCluster(multiproc_cl))
pbwalk(X = pathToFile$Folder, FUN = file.copy, cl = multiproc_cl, to = copy_to)
pbwalk(X = pathToFile$folder, FUN = file.copy, cl = multiproc_cl, to = copy_to)
}


Expand Down Expand Up @@ -495,7 +495,7 @@ path_to_FASTA_used_by_DMS <- function(data_package_num, organism_db = NULL)
# make sure it was the same fasta used for all msgf jobs
# at this point this works only with one data package at a time
jobRecords <- get_job_records_by_dataset_package(data_package_num)
# jobRecords <- jobRecords[grepl("MSGFPlus", jobRecords$Tool),]
# jobRecords <- jobRecords[grepl("MSGFPlus", jobRecords$tool),]
# if(length(unique(jobRecords$organism_db)) != 1){
# stop("There should be exactly one FASTA file per data package!")
# }
Expand Down Expand Up @@ -556,6 +556,6 @@ path_to_FASTA_used_by_DMS <- function(data_package_num, organism_db = NULL)


# Prevent "no visible binding for global variable" note.
utils::globalVariables(c("Tool", "value"))
utils::globalVariables(c("tool", "value"))


6 changes: 3 additions & 3 deletions R/read_AScore_results_from_DMS.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ read_AScore_results_from_DMS <- function(data_package_num){
}else if(.Platform$OS.type == "windows"){
# in case Windows
ascores <- read_tsv(
file.path(job['Folder'],"Concatenated_msgfplus_syn_ascore.txt"))
file.path(job['folder'],"Concatenated_msgfplus_syn_ascore.txt"))
job_to_dataset_map <- read_tsv(
file.path(job['Folder'], "Job_to_Dataset_Map.txt"))
file.path(job['folder'], "Job_to_Dataset_Map.txt"))
}else{
stop("unknown OS")
}
Expand All @@ -82,5 +82,5 @@ read_AScore_results_from_DMS <- function(data_package_num){


# Prevent "no visible binding for global variable" note.
utils::globalVariables(c("Tool"))
utils::globalVariables(c("tool"))

8 changes: 4 additions & 4 deletions R/read_MSstats_from_MSFragger_job.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@ read_MSstats_from_MSFragger_job <- function(data_package_num,
job_records <- filter(job_records, tool == "MSFragger")

if (!is.null(param_file)) {
job_records <- filter(job_records, Parameter_File == param_file)
job_records <- filter(job_records, parameter_file == param_file)
}

if (!is.null(settings_file)) {
job_records <- filter(job_records, Settings_File == settings_file)
job_records <- filter(job_records, settings_file == !!settings_file)
}

if (!is.null(organism_db)) {
job_records <- filter(job_records, Organism_DB == organism_db)
job_records <- filter(job_records, organism_db == !!organism_db)
}

path <- unique(job_records$Folder)
path <- unique(job_records$folder)

if (length(path) == 0) {
stop("No jobs found.")
Expand Down
6 changes: 3 additions & 3 deletions R/read_msfragger_data_from_DMS.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ read_msfragger_data_from_DMS <- function(data_package_num,

path <- unique(job_records$folder)
remote_folder <- gsub("\\\\", "/", path)
local_folder <- .new_tempdir()
mount_folder <- local_folder <- .new_tempdir()
mount_cmd <- sprintf("mount -t smbfs %s %s", remote_folder, local_folder)
system(mount_cmd)

Expand All @@ -82,7 +82,7 @@ read_msfragger_data_from_DMS <- function(data_package_num,
aggregate_zip_file <- file.path(local_folder, "Dataset_PSM_tsv.zip")
exdir <- .new_tempdir()
unzip(zipfile = aggregate_zip_file, list = FALSE, exdir = exdir)
path <- exdir
local_folder <- exdir
}

fileNamePttrn <- "_psm\\.tsv"
Expand All @@ -108,7 +108,7 @@ read_msfragger_data_from_DMS <- function(data_package_num,
`Spectrum File` = sub("\\.pep\\.xml", "", `Spectrum File`),
`Spectrum File` = sub(fileNamePttrn, "", `Spectrum File`))

system(glue::glue("umount {local_folder}"))
system(glue::glue("umount {mount_folder}"))

if (!assume_inference) {
dt <- dt %>%
Expand Down
14 changes: 7 additions & 7 deletions R/read_msgf_data_from_DMS.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ read_msgf_data_from_DMS <- function(data_package_num,
jobRecords <- get_job_records_by_dataset_package(data_package_num)
}

jobRecords <- jobRecords[grepl("MSGFPlus", jobRecords$Tool),]
jobRecords <- jobRecords[grepl("MSGFPlus", jobRecords$tool),]

# THIS PARAMETER ARGUMENT CHECK CAN BE MORE ELEGANT
# Check parameter file. Is there any redundancy?
param_files <- unique(jobRecords$Parameter_File)
param_files <- unique(jobRecords$parameter_file)

# Check if param_file is NULL
if (is.null(param_file)) {
Expand All @@ -73,7 +73,7 @@ read_msgf_data_from_DMS <- function(data_package_num,
}

# FASTA files (organism_db)
organism_dbs <- unique(jobRecords$`Organism DB`)
organism_dbs <- unique(jobRecords$organism_db)

# Check if organism_db is NULL
if (is.null(organism_db)) {
Expand All @@ -95,11 +95,11 @@ read_msgf_data_from_DMS <- function(data_package_num,
}

# Subset to specific parameter file and FASTA file
jobRecords <- jobRecords[jobRecords$Parameter_File == param_file &
jobRecords$`Organism DB` == organism_db, ]
jobRecords <- jobRecords[jobRecords$parameter_file == param_file &
jobRecords$organism_db == organism_db, ]

if (use_mzIdentML) {
mzid_files <- list.files(jobRecords$Folder, pattern = ".mzid.gz",
mzid_files <- list.files(jobRecords$folder, pattern = ".mzid.gz",
full.names = TRUE)

# Read mzIdentML files into psms slot
Expand All @@ -110,7 +110,7 @@ read_msgf_data_from_DMS <- function(data_package_num,
)
} else {
results <- get_results_for_multiple_jobs.dt(jobRecords)
tool <- unique(jobRecords$Tool)
tool <- unique(jobRecords$tool)
pattern <- tool2suffix[[tool]]
results <- results[[pattern]]
msnid <- convert_msgf_output_to_msnid(results)
Expand Down
17 changes: 14 additions & 3 deletions tests/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,34 @@ path_check <- function(path) {
expect_true(grepl("Rtmp", path, ignore.case = FALSE))
system(glue::glue("umount {path}"))
}

cat("1------\n")
expect_true(is_PNNL_DMS_connection_successful())
cat("2------\n")
expect_equal(dim(get_dms_job_records(4232)), c(1, 23))
cat("3------\n")
expect_equal(get_output_folder_for_job_and_tool(2344924, "DiaNN"), "DNN202408121049_Auto2344924")
cat("4------\n")
expect_equal(dim(get_job_records_by_dataset_package(4232)), c(2, 22))
cat("5------\n")
expect_equal(dim(get_datasets_by_data_package(4232)), c(1, 13))
cat("6------\n")
path <- path_to_FASTA_used_by_DMS(4232)
path_check(path)
expect_equal(dim(read_AScore_results_from_DMS(4136)), c(916130, 12)) # check args
expect_equal(dim(read_AScore_results_from_DMS(4136)), c(916130, 12)) # check args
cat("7------\n")
path <- path_to_study_design_from_DMS(4232)
path_check(path)
cat("8------\n")
path <- path_to_study_design_from_DMS(4232, T)
path_check(path)
expect_equal(dim(read_masic_data_from_DMS(4232)), c(20938, 22)) # check args
cat("9------\n")
expect_equal(dim(read_masic_data_from_DMS(4232)), c(20938, 22)) # check args
cat("10------\n")
expect_equal(dim(read_msfragger_data_from_DMS(
4804,
"MSFragger_Tryp_ProlineRule_ProtNTermAcet_Stat_CysAlk_TMT_6Plex_20ppmParTol.params",
"MSFragger_PepFDR_0.99_ProtFDR_0.99_IncludeDecoys.xml",
"ID_008350_56958B5A.fasta"
)), c(266203, 11))

# get_tool_output_files_for_job_number(2344924, fileNamePttrn = ".") -> res

0 comments on commit 8961f68

Please sign in to comment.