Skip to content

Commit

Permalink
Merge changes to use httr2 and xml2 packages
Browse files Browse the repository at this point in the history
Merge branch 'httr2' into devel

# Conflicts:
#	DESCRIPTION
  • Loading branch information
grimbough committed Jan 24, 2024
2 parents 29a31bf + 4eaeb7c commit 4858ec5
Show file tree
Hide file tree
Showing 45 changed files with 4,661 additions and 299 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ on:
push:
pull_request:
branches:
- master
- devel

name: R-CMD-check

Expand Down
10 changes: 7 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
Package: biomaRt
<<<<<<< HEAD
Version: 2.59.0
=======
Version: 2.59.1
>>>>>>> httr2
Title: Interface to BioMart databases (i.e. Ensembl)
License: Artistic-2.0
Description: In recent years a wealth of biological data has become available
Expand All @@ -24,17 +28,17 @@ Authors@R: c(person("Steffen", "Durinck", role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-7800-3848")))
Depends: methods
Imports: utils, XML (>= 3.99-0.7), AnnotationDbi, progress, stringr, httr,
Imports: utils, AnnotationDbi, progress, stringr, httr2,
digest, BiocFileCache, rappdirs, xml2
Suggests: BiocStyle, knitr, mockery, rmarkdown, testthat, webmockr
Suggests: BiocStyle, knitr, mockery, rmarkdown, testthat, httptest2
URL: https://github.com/grimbough/biomaRt
BugReports: https://github.com/grimbough/biomaRt/issues
VignetteBuilder: knitr
biocViews: Annotation
LazyLoad: yes
NeedsCompilation: no
Encoding: UTF-8
RoxygenNote: 7.1.1
RoxygenNote: 7.3.0
Collate: biomaRtClasses.R
methods-Mart.R
biomaRt.R
Expand Down
14 changes: 9 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import(methods)
import(XML)

importFrom(utils, edit, head, read.table)
importFrom(AnnotationDbi, keys, columns, keytypes, select)
importFrom(progress, progress_bar)
importFrom(stringr, str_extract_all, str_match, str_replace)
importFrom(httr, POST, GET, content, content_type, timeout, set_cookies,
stop_for_status, status_code, with_config, config, set_config)
importFrom(httr2, request, req_perform, req_timeout, req_options, req_body_form,
req_error, req_retry,
resp_status, resp_body_string, resp_is_error,
url_parse, url_build)
importFrom(digest, digest)
importFrom(BiocFileCache, bfcnew, bfcadd, bfcquery, bfcinfo, removebfc, bfcremove)
importFrom(BiocFileCache, bfcnew, bfcadd, bfcquery, bfcinfo, removebfc,
bfcremove, bfcupdate)
importFrom(rappdirs, user_cache_dir)
importFrom(xml2, read_html)
importFrom(xml2, read_html, xml_find_first, xml_find_all, xml_text)

export(listMarts, getGene, getSequence, exportFASTA, useMart, listDatasets,
useDataset, listAttributes, listFilters,
Expand All @@ -28,6 +30,8 @@ export(useEnsemblGenomes, listEnsemblGenomes)

export(listEnsemblArchives)

export(setEnsemblSSL)

exportClasses(Mart)

exportMethods("show")
Expand Down
15 changes: 15 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
CHANGES IN VERSION 2.60.0
-------------------------

USER VISIBLE CHANGES

o listEnsemblGenomes() and useEnsemblGenomes() now have a host argument,
allowing you to select an Ensembl Genomes archive site. (Thanks to Hervé Pagès
@hpages for the suggestion: https://github.com/grimbough/biomaRt/issues/93)

INTERNAL CHANGES

o Removed dependency on XML package and switched all functionality to xml2
o Swiched from httr to httr2 package for submitting queries to BioMart
servers.

CHANGES IN VERSION 2.58.0
-------------------------

Expand Down
111 changes: 49 additions & 62 deletions R/biomaRt.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,20 @@ martCheck = function(mart, biomart = NULL){
}


bmRequest <- function(request, httr_config, verbose = FALSE){
bmRequest <- function(request, http_config, verbose = FALSE){
if(verbose)
message("Attempting web service request:\n", request)

result <- httr::GET(request, config = httr_config,
content_type("text/plain"),
timeout(getOption("timeout", default = 60)))
stop_for_status(result)
request <- httr2::request(request) |>
req_timeout(getOption("timeout", default = 60)) |>
req_options(!!!http_config)

result2 <- content(result, encoding = "UTF-8")
if(is.na(result2)) {
result2 <- content(result, encoding = "Latin1")
}
result <- req_perform(request)

result2 <- resp_body_string(result)
# if(is.na(result2)) {
# result2 <- content(result, encoding = "Latin1")
# }
return(result2)
}

Expand All @@ -62,28 +63,28 @@ bmRequest <- function(request, httr_config, verbose = FALSE){
#######################################################

listMarts <- function( mart = NULL, host="https://www.ensembl.org", path="/biomart/martservice",
port, includeHosts = FALSE, archive = FALSE, httr_config, verbose = FALSE){
port, includeHosts = FALSE, archive = FALSE, http_config, verbose = FALSE){

if(missing(port)) {
port <- ifelse(grepl("https", host), yes = 443, no = 80)
}

if(grepl(pattern = "^https://.*ensembl.org", x = host) && missing(httr_config)) {
httr_config <- .getEnsemblSSL()
if(grepl(pattern = "^https://.*ensembl.org", x = host) && missing(http_config)) {
http_config <- .getEnsemblSSL()
}

if(missing(httr_config)) {
httr_config <- httr::config()
if(missing(http_config)) {
http_config <- list()
}

.listMarts(mart = mart, host = host, path = path, port = port, includeHosts = includeHosts,
archive = archive, verbose = verbose, httr_config = httr_config, ensemblRedirect = TRUE)
archive = archive, verbose = verbose, http_config = http_config, ensemblRedirect = TRUE)

}

.listMarts <- function( mart = NULL, host="www.ensembl.org", path="/biomart/martservice",
port=80, includeHosts = FALSE, archive = FALSE, verbose = FALSE,
httr_config, ensemblRedirect = NULL, warn = TRUE){
http_config, ensemblRedirect = NULL, warn = TRUE){

request = NULL
if(is.null(mart)){
Expand All @@ -94,12 +95,12 @@ listMarts <- function( mart = NULL, host="https://www.ensembl.org", path="/bioma
} else {
request <- paste0(host, ":", port, path, "?type=registry&requestid=biomaRt")
}
if(is(httr_config, 'list')) {
httr_config <- do.call(c, httr_config)
if(is(http_config, 'list')) {
http_config <- do.call(c, http_config)
}
} else if(is(mart, 'Mart')) {
request = paste0(martHost(mart), "?type=registry&requestid=biomaRt")
httr_config <- martHTTRConfig(mart)
http_config <- martHTTPConfig(mart)
} else{
stop(mart, " object needs to be of class Mart created with the useMart function.\n",
"If you don't have a Mart object yet, use listMarts() without arguments or only specify the host argument")
Expand All @@ -109,7 +110,7 @@ listMarts <- function( mart = NULL, host="https://www.ensembl.org", path="/bioma
request <- paste0(request, "&redirect=no")
}

registry = bmRequest(request = request, httr_config = httr_config, verbose = verbose)
registry = bmRequest(request = request, http_config = http_config, verbose = verbose)

## check this looks like the MartRegistry XML, otherwise throw an error
if(!grepl(x = registry, pattern = "^\n*<MartRegistry>")) {
Expand All @@ -127,37 +128,23 @@ listMarts <- function( mart = NULL, host="https://www.ensembl.org", path="/bioma
call. = FALSE)
}
}
registry = xmlTreeParse(registry, asText=TRUE)
registry = registry$doc$children[[1]]

marts = list(biomart = NULL, version = NULL, host = NULL, path = NULL, database = NULL)
index = 1

# if(host != "www.biomart.org" || archive){
for(i in seq(length.out=xmlSize(registry))){
if(xmlName(registry[[i]])=="MartURLLocation"){
if(xmlGetAttr(registry[[i]],"visible") == 1){
if(!is.null(xmlGetAttr(registry[[i]],"name"))) marts$biomart[index] = as.character(xmlGetAttr(registry[[i]],"name"))
if(!is.null(xmlGetAttr(registry[[i]],"database"))) marts$database[index] = as.character(xmlGetAttr(registry[[i]],"database"))
if(!is.null(xmlGetAttr(registry[[i]],"displayName"))) marts$version[index] = as.character(xmlGetAttr(registry[[i]],"displayName"))
if(!is.null(xmlGetAttr(registry[[i]],"host"))) marts$host[index] = as.character(xmlGetAttr(registry[[i]],"host"))
if(!is.null(xmlGetAttr(registry[[i]],"path"))) marts$path[index] = as.character(xmlGetAttr(registry[[i]],"path"))
if(!is.null(xmlGetAttr(registry[[i]],"port"))) marts$port[index] = as.character(xmlGetAttr(registry[[i]],"port"))
if(!is.null(xmlGetAttr(registry[[i]],"serverVirtualSchema"))){
marts$vschema[index] = as.character(xmlGetAttr(registry[[i]],"serverVirtualSchema"))
}
index=index+1
}
}
}

registry_xml2 <- xml2::read_xml(registry)
registry_xml2 <- xml2::xml_children(registry_xml2)

## create a table with the registry information
marts <- do.call('rbind', lapply(registry_xml2, FUN = xml2::xml_attrs))
marts <- as.data.frame( marts[marts[,"visible"] == "1",] )
## rename some columns
names(marts)[names(marts) == "name"] <- "biomart"
names(marts)[names(marts) == "displayName"] <- "version"
names(marts)[names(marts) == "serverVirtualSchema"] <- "vschema"

if(includeHosts){
return(marts)
return(as.list(marts))
}
else{
ret = data.frame(biomart = as.character(marts$biomart),
version = as.character(marts$version),
stringsAsFactors=FALSE)
return(ret)
return(marts[,c("biomart", "version")])
}
}

Expand All @@ -176,11 +163,11 @@ useMart <- function(biomart, dataset, host = "https://www.ensembl.org", path = "

mart <- .useMart(biomart, dataset, host = host, path = path, port = port,
archive = archive, version = version, verbose = verbose,
httr_config = list(httr::config()), ensemblRedirect = TRUE)
http_config = list(), ensemblRedirect = TRUE)
}

.useMart <- function(biomart, dataset, host = "https://www.ensembl.org", path = "/biomart/martservice", port = 443,
archive = FALSE, ensemblRedirect = NULL, version, httr_config, verbose = FALSE){
archive = FALSE, ensemblRedirect = NULL, version, http_config, verbose = FALSE){

if(missing(biomart) && missing(version))
stop("No biomart databases specified. Specify a biomart database to use using the biomart or version argument")
Expand All @@ -198,7 +185,7 @@ useMart <- function(biomart, dataset, host = "https://www.ensembl.org", path = "
host <- .cleanHostURL(host)

marts <- .listMarts(host=host, path=path, port=port, includeHosts = TRUE,
httr_config = httr_config, archive = archive,
http_config = http_config, archive = archive,
ensemblRedirect = ensemblRedirect, warn = FALSE)
mindex = NA
if(!missing(biomart)){
Expand Down Expand Up @@ -227,8 +214,8 @@ useMart <- function(biomart, dataset, host = "https://www.ensembl.org", path = "
"?redirect=no",
"")

if(missing(httr_config)) {
httr_config <- list()
if(missing(http_config)) {
http_config <- list()
}

mart <- Mart(
Expand All @@ -238,13 +225,13 @@ useMart <- function(biomart, dataset, host = "https://www.ensembl.org", path = "
port,
marts$path[mindex],
redirect),
httr_config = httr_config
http_config = http_config
)

if(length(grep("archive",martHost(mart)) > 0)){

## hack to work around redirection of most recent mirror URL
archives <- .listEnsemblArchives(https = TRUE, httr_config = httr_config)
archives <- .listEnsemblArchives(https = TRUE, http_config = http_config)
current_release <- archives[archives$current_release == "*", 'url']
if(grepl(martHost(mart), pattern = current_release)) {
martHost(mart) <- stringr::str_replace(martHost(mart), pattern = current_release, "https://www.ensembl.org")
Expand Down Expand Up @@ -281,9 +268,9 @@ listDatasets <- function(mart, verbose = FALSE) {
sep <- ifelse(grepl(x = martHost(mart), pattern = ".+\\?.+"), "&", "?")

request = paste0(martHost(mart), sep, "type=datasets&requestid=biomaRt&mart=", martBM(mart))
httr_config <- martHTTRConfig(mart)
http_config <- martHTTPConfig(mart)

bmResult = bmRequest(request = request, httr_config = httr_config, verbose = verbose)
bmResult = bmRequest(request = request, http_config = http_config, verbose = verbose)
con = textConnection(bmResult)
txt = scan(con, sep="\t", blank.lines.skip=TRUE, what="character", quiet=TRUE, quote = "\"")
close(con)
Expand Down Expand Up @@ -314,9 +301,9 @@ bmVersion <- function(mart, verbose=FALSE){
sep <- ifelse(grepl(x = martHost(mart), pattern = ".+\\?.+"), "&", "?")

request = paste0(martHost(mart), sep, "type=version", "&requestid=biomaRt&mart=", martBM(mart))
httr_config <- martHTTRConfig(mart)
http_config <- martHTTPConfig(mart)

BioMartVersion = bmRequest(request = request, httr_config = httr_config, verbose = verbose)
BioMartVersion = bmRequest(request = request, http_config = http_config, verbose = verbose)
bmv = ""
if(BioMartVersion == "\n" || BioMartVersion == ""){
bmv = NA
Expand Down Expand Up @@ -347,7 +334,7 @@ bmVersion <- function(mart, verbose=FALSE){
"&requestid=biomaRt&mart=", martBM(mart),
"&virtualSchema=", martVSchema(mart))

attrfilt <- bmRequest(request = request, httr_config = martHTTRConfig(mart), verbose = verbose)
attrfilt <- bmRequest(request = request, http_config = martHTTPConfig(mart), verbose = verbose)
attrfiltParsed <- read.table(text = attrfilt, sep="\t", header=FALSE,
quote = "", comment.char = "", as.is=TRUE)
return(attrfiltParsed)
Expand Down Expand Up @@ -589,7 +576,7 @@ getBM <- function(attributes, filters = "", values = "", mart, curl = NULL,
if(!file.exists(tf)) {
postRes <- .submitQueryXML(host = paste0(martHost(mart), sep),
query = fullXmlQuery,
httr_config = martHTTRConfig(mart))
http_config = martHTTPConfig(mart))
result <- .processResults(postRes, mart = mart, hostURLsep = sep, fullXmlQuery = fullXmlQuery,
quote = quote, numAttributes = length(attributes))
saveRDS(result, file = tf)
Expand Down Expand Up @@ -683,7 +670,7 @@ getLDS <- function(attributes, filters = "", values = "", mart,
## POST query
postRes <- .submitQueryXML(host = paste0(martHost(mart), sep),
query = xmlQuery,
httr_config = martHTTRConfig(mart))
http_config = martHTTPConfig(mart))

if(length(grep("^Query ERROR", postRes))>0L)
stop(postRes)
Expand Down
4 changes: 2 additions & 2 deletions R/biomaRtClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ Mart <- setClass("Mart",
dataset = "character",
filters = "data.frame",
attributes = "data.frame",
httr_config = "list"
http_config = "list"
),
prototype(dataset = "",
vschema="default",
version = "",
httr_config = list(httr::config())
http_config = list()
)
)
Loading

0 comments on commit 4858ec5

Please sign in to comment.