Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GEN-1148]add script to update release version variable by folder #120

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
286 changes: 286 additions & 0 deletions scripts/release/update_release_version_by_folder.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
# Description: Update release version variables in files given a folder.
# Author: Xindi Guo
# Date: 2024-02-05

# pre-setup ---------------------------

tic = as.double(Sys.time())

library(optparse)
library(glue)
library(dplyr)
library(synapser)

# constants
ALL = "all"

waitifnot <- function(cond, msg) {
if (!cond) {

for (str in msg) {
message(str)
}
message("Press control-C to exit and try again.")

while(T) {}
}
}

# user input ----------------------------

option_list <- list(
make_option(c("-i", "--synid_folder_input"), type = "character",
help="Synapse ID of folder with clinical release files (required)"),
make_option(c("-o", "--synid_folder_output"), type = "character", default = NA,
help="Synapse ID of output folder for updated release files (default: write locally)"),
make_option(c("-u", "--updated_version"), type = "character",
help=glue("Updated release version number")),
make_option(c("-v", "--verbose"), action="store_true", default = FALSE,
help="Output script messages to the user (default: FALSE)"),
make_option(c("-a", "--auth"),
type = "character",
default = NA,
help="Synapse personal access token or path to .synapseConfig (default: normal synapse login behavior)")
)
opt <- parse_args(OptionParser(option_list=option_list))
waitifnot(!is.null(opt$synid_folder_input),
msg = "Rscript update_release_version_by_folder.R -h")

synid_folder_input <- opt$synid_folder_input
synid_folder_output <- opt$synid_folder_output
updated_version <- opt$updated_version
verbose <- opt$verbose
auth <- opt$auth

# functions ----------------------------

#' Extract personal access token from .synapseConfig
#' located at a custom path.
#'
#' @param path Path to .synapseConfig
#' @return personal acccess token
get_auth_token <- function(path) {

lines <- scan(path, what = "character", sep = "\t", quiet = T)
line <- grep(pattern = "^authtoken = ", x = lines, value = T)

token <- strsplit(line, split = ' ')[[1]][3]
return(token)
}

#' Override of synapser::synLogin() function to accept
#' custom path to .synapseConfig file or personal authentication
#' token. If no arguments are supplied, performs standard synLogin().
#'
#' @param auth full path to .synapseConfig file or authentication token
#' @param silent verbosity control on login
#' @return TRUE for successful login; F otherwise
synLogin <- function(auth = NA, silent = T) {

secret <- Sys.getenv("SCHEDULED_JOB_SECRETS")
if (secret != "") {
# Synapse token stored as secret in json string
syn = synapser::synLogin(silent = T, authToken = fromJSON(secret)$SYNAPSE_AUTH_TOKEN)
} else if (auth == "~/.synapseConfig" || is.na(auth)) {
# default Synapse behavior
syn <- synapser::synLogin(silent = silent)
} else {

# in case pat passed directly
token <- auth

# extract token from custom path to .synapseConfig
if (grepl(x = auth, pattern = "\\.synapseConfig$")) {
token = get_auth_token(auth)

if (is.na(token)) {
return(F)
}
}

# login with token
syn <- tryCatch({
synapser::synLogin(authToken = token, silent = silent)
}, error = function(cond) {
return(F)
})
}

# NULL returned indicates successful login
if (is.null(syn)) {
return(T)
}
return(F)
}

#' Download and load data stored in csv or other delimited format on Synapse
#' into an R data frame.
#'
#' @param synapse_id Synapse ID
#' @version Version of the Synapse entity to download. NA will load current
#' version
#' @param set Delimiter for file
#' @param na.strings Vector of strings to be read in as NA values
#' @param header TRUE if the file contains a header row; FALSE otherwise.
#' @param check_names TRUE if column names should be modified for compatibility
#' with R upon reading; FALSE otherwise.
#' @param comment.char character designating comment lines to ignore
#' @return data frame
get_synapse_entity_data_in_csv <- function(synapse_id,
version = NA,
sep = ",",
na.strings = c("NA"),
header = T,
check_names = F,
comment.char = "#",
colClasses = "character") {

if (is.na(version)) {
entity <- synGet(synapse_id)
} else {
entity <- synGet(synapse_id, version = version)
}

data <- read.csv(entity$path, stringsAsFactors = F,
na.strings = na.strings, sep = sep, check.names = check_names,
header = header, comment.char = comment.char, colClasses = colClasses)
return(data)
}

#' Store a file on Synapse with options to define provenance.
#'
#' @param path Path to the file on the local machine.
#' @param parent_id Synapse ID of the folder or project to which to load the file.
#' @param file_name Name of the Synapse entity once loaded
#' @param prov_name Provenance short description title
#' @param prov_desc Provenance long description
#' @param prov_used Vector of Synapse IDs of data used to create the current
#' file to be loaded.
#' @param prov_exec String representing URL to script used to create the file.
#' @return Synapse ID of entity representing file
save_to_synapse <- function(path,
parent_id,
file_name = NA,
prov_name = NA,
prov_desc = NA,
prov_used = NA,
prov_exec = NA) {

if (is.na(file_name)) {
file_name = path
}
file <- File(path = path, parentId = parent_id, name = file_name)

if (!is.na(prov_name) || !is.na(prov_desc) || !is.na(prov_used) || !is.na(prov_exec)) {
act <- Activity(name = prov_name,
description = prov_desc,
used = prov_used,
executed = prov_exec)
file <- synStore(file, activity = act, forceVersion=FALSE)
} else {
file <- synStore(file, forceVersion=FALSE)
}

return(file$properties$id)
}

#' Get all child entities of a synapse folder.
#'
#' @param synapse_id Synapse ID of the folder
#' @param include_types Types of child entities to return
#' @return Vector with values as Synapse IDs and names as entity names.
get_synapse_folder_children <- function(synapse_id,
include_types=list("folder", "file", "table", "link", "entityview", "dockerrepo")) {

ent <- as.list(synGetChildren(synapse_id, includeTypes = include_types))

children <- c()

if (length(ent) > 0) {
for (i in 1:length(ent)) {
children[ent[[i]]$name] <- ent[[i]]$id
}
}

return(children)
}

#' Get the name of a Synapse entity.
#'
#' @param synapse_id Synapse ID string
#' @return String representing entity name
#' @example get_synapse_entity_name("syn12345")
get_synapse_entity_name <- function(synapse_id) {
return(synGet(synapse_id, downloadFile = F)$properties$name)
}

#' Return current time as a string.
#'
#' @param timeOnly If TRUE, return only time; otherwise return date and time
#' @param tz Time Zone
#' @return Time stamp as string
#' @example
#' now(timeOnly = T)
now <- function(timeOnly = F, tz = "US/Pacific") {

Sys.setenv(TZ=tz)

if(timeOnly) {
return(format(Sys.time(), "%H:%M:%S"))
}

return(format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
}

# synapse login --------------------

status <- synLogin(auth = auth)

# read ----------------------------

synid_file_children <- get_synapse_folder_children(synapse_id = synid_folder_input,
include_types=list("file"))

# main ----------------------------

if (verbose) {
print(glue("{now()}: updating release version to {updated_version} in clinical files.."))
}

for (i in 1:length(synid_file_children)) {

file_local <- names(synid_file_children)[i]
synid_file_child <- as.character(synid_file_children[i])

if (verbose) {
print(glue("{now()}: updating release version from '{file_local}' ({synid_file_child})..."))
}

raw <- get_synapse_entity_data_in_csv(synid_file_child, na.strings = c(""))
mod <- raw
mod$release_version = updated_version
write.csv(mod, file = file_local, row.names = F, quote = T, na = "")

if (verbose) {
print(glue("{now()}: release version is updated for '{file_local}' ({synid_file_child})."))
}

if (!is.na(synid_folder_output)) {
synid_file_dest <- save_to_synapse(path = file_local,
parent_id = synid_folder_output,
prov_name = "update clinical files",
prov_desc = "update release version variable in clinical files",
prov_used = c(synid_file_child),
prov_exec = "https://github.com/Sage-Bionetworks/genie-bpc-pipeline/blob/develop/scripts/release/update_release_version_by_folder.R")
file.remove(file_local)

if (verbose) {
print(glue("{now()}: uploaded {file_local} to {synid_file_dest}"))
}
}
}

# close out ----------------------------

toc = as.double(Sys.time())
print(glue("Runtime: {round(toc - tic)} s"))