Skip to content

Commit

Permalink
Merge pull request #21 from natverse/feature/fanc_meta
Browse files Browse the repository at this point in the history
Feature/fanc meta
  • Loading branch information
jefferis authored Aug 24, 2024
2 parents be821a8 + 6d02b22 commit e1fc072
Show file tree
Hide file tree
Showing 14 changed files with 179 additions and 66 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ package-secrets.txt
^pkgdown$
^README\.Rmd$
^codecov\.yml$
^samples/$
9 changes: 8 additions & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
CLIO_TOKEN: ${{ secrets.CLIO_TOKEN }}
FLYWIRE_PRINCIPLES: IAGREETOTHEFLYWIREPRINCIPLES
CHUNKEDGRAPH_SECRET: ${{ secrets.CHUNKEDGRAPH_SECRET }}
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

steps:
- uses: actions/checkout@v3
Expand All @@ -33,7 +34,7 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck, any::devtools, any::covr, any::pkgdown, catmaid=natverse/rcatmaid, flyconnectome/malecns
extra-packages: any::rcmdcheck, any::devtools, any::covr, any::pkgdown, catmaid=natverse/rcatmaid, flyconnectome/malecns, any::reticulate
needs: check, coverage

- name: Add some R options for later steps
Expand All @@ -47,6 +48,12 @@ jobs:
fafbseg::download_flywire_release_data(version=783)
shell: Rscript {0}

- name: Install fafbseg + python
run: |
fafbseg::simple_python()
fafbseg::dr_fafbseg()
shell: Rscript {0}

- uses: r-lib/actions/check-r-package@v2

- name: Deploy package
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ Suggests:
knitr,
rmarkdown,
dendroextras,
tidyr
tidyr,
reticulate
Enhances:
malecns (>= 0.3)
Remotes:
Expand Down
29 changes: 25 additions & 4 deletions R/ids.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,28 @@ is_key <- function(x, compound=FALSE) {
#' @param banc Pass banc ids to this argument (we only support basic metadata
#' queries for banc)
#'
#' @details all neuprint datasets (hemibrain, malevnc, opticlobe, malecns) use
#' the same query syntax although some fields may be dataset specific (see
#' examples).
#' @details You will often want to perform a query, most commonly for a cell
#' \emph{type} or cell \emph{class}, rather than specific numeric ids. The
#' most flexible way to do this is to use a regular expression (regex) query,
#' specified with an initial \code{"/"}.
#'
#' All neuprint datasets (hemibrain, malevnc, opticlobe, malecns) use the same
#' query syntax although some fields may be dataset specific (see examples).
#' The regex syntax for CAVE datasets (flywire, fanc, banc) should be the same
#' although you may find some wrinkles because the underlying data stores are
#' different. Note that we do not yet translate all the different fields
#' across datasets for queries, although this is a goal. For example the
#' neuprint/fanc/banc \code{class} field is equivalent to flywire
#' \code{super_class}. Similarly the values are not guaranteed to be the same.
#' Where flywire uses \code{super_class=="descending")} the manc uses
#' \code{class=="descending neuron")}.
#'
#' Therefore to find all DNs in these two datasets you will need to do:
#' \code{cf_ids(manc='/class:descending.*',
#' flywire='/super_class:descending.*')}
#'
#' Feel free to \href{https://github.com/natverse/coconatfly/issues}{make an
#' issue} if you find something that doesn't feel right or can suggest an improvement.
#' @return A list of ids with additional class \code{cidlist}
#' @export
#' @family ids
Expand All @@ -141,6 +159,9 @@ is_key <- function(x, compound=FALSE) {
#'
#' # now equivalent to
#' keys(c(cf_ids("/type:MBON1.+"), cf_ids(hemibrain = hbids)))
#'
#' # queries on classes respecting dataset idiosyncrasies
#' cf_ids(manc='/class:descending.*', flywire='/super_class:descending.*', expand = T)
#' }
cf_ids <- function(
query=NULL,
Expand Down Expand Up @@ -245,7 +266,7 @@ expand_ids <- function(ids, dataset) {
dataset=match_datasets(dataset)
FUN <- switch(dataset,
manc=malevnc::manc_ids,
fanc=I,
fanc=fanc_ids,
malecns=malecns::mcns_ids,
banc=banc_ids,
flywire=function(ids) fafbseg::flywire_ids(ids, version=fafbseg::flywire_connectome_data_version()),
Expand Down
113 changes: 65 additions & 48 deletions R/meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ npconn <- function(dataset) {
else if(dataset=='opticlobe')
return(neuprintr::neuprint_login(
server="https://neuprint.janelia.org",
dataset='optic-lobe:v1.0'))
dataset='optic-lobe:v1.0.1'))
else if(dataset=='malecns')
return(malecns::mcns_neuprint())
else if(dataset=='manc')
Expand Down Expand Up @@ -156,97 +156,114 @@ manc_meta <- function(ids, ...) {
tres
}

fanc_meta <- function(ids, ...) {
warning("true metadata is not currently supported for fanc!")
data.frame(id=fancr::fanc_ids(ids), type=NA, side=NA)
fanc_meta <- function(ids=NULL, ...) {
ids=fanc_ids(ids)
fancr::with_fanc(fancorbanc_meta(table='neuron_information', ids=ids, ...))
}

banc_meta <- function(ids=NULL, ...) {
ids=banc_ids(ids)
# cell_info %>% tidyr::pivot_wider(id_cols = pt_root_id, names_from = tag2, values_from = tag, values_fn = function(x) paste(x, collapse = ';')) %>% colnames()
fancr::with_banc(fancorbanc_meta(table='cell_info', ids=ids, ...))
}

fancorbanc_meta <- function(table, ids=NULL, ...) {
fid=list(tag2=c('primary class',"anterior-posterior projection pattern", "neuron identity"))
# FIXME - think of a better workaround for the fact that ids may not be in
# correct materialisation state
# if(length(ids)>0) {
# fid[['pt_root_id']]=ids
# }
fid=list(cell_info=fid)
selc=list(cell_info=c("id", "tag", "tag2", "pt_root_id", 'pt_supervoxel_id'))

cell_infos=fancr::with_banc(
fafbseg::flywire_cave_query('cell_info', filter_in_dict=fid, select_columns=selc,
version='latest', timetravel = T, allow_missing_lookups=T))
fid=list(fid)
names(fid)=table
selc=list(c("id", "tag", "tag2", "pt_root_id", 'pt_supervoxel_id'))
names(selc)=table

cell_infos=fafbseg::flywire_cave_query(table, filter_in_dict=fid, select_columns=selc,
version='latest', timetravel = T, allow_missing_lookups=T)
metadf <- if(nrow(cell_infos)<1) {
df=data.frame(id=character(), class=character(), type=character(), side=character())
} else {
cell_infosw <- cell_infos %>%
mutate(tag=sub("\n\n\n*banc-bot*","", fixed = T, tag)) %>%
tidyr::pivot_wider(id_cols = pt_root_id,
names_from = tag2,
values_from = tag,
values_fn = function(x) {
sux=sort(unique(x))
# try removing ?
sux2=sort(unique(sub("?","", x, fixed = T)))
if(length(sux2)<length(sux)) sux=sux2
paste(sux, collapse = ';')
cell_infosw <- cell_infos %>%
mutate(tag=sub("\n\n\n*banc-bot*","", fixed = T, tag)) %>%
tidyr::pivot_wider(id_cols = pt_root_id,
names_from = tag2,
values_from = tag,
values_fn = function(x) {
sux=sort(unique(x))
# try removing ?
sux2=sort(unique(sub("?","", x, fixed = T)))
if(length(sux2)<length(sux)) sux=sux2
paste(sux, collapse = ';')
})
cell_infosw %>%
rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`,type=`neuron identity`) %>%
mutate(class=case_when(
class=='sensory neuron' & grepl('scending', apc) ~ paste('sensory', apc),
(is.na(class) | class=='central neuron') & apc=='ascending' ~ 'ascending',
(is.na(class) | class=='central neuron') & apc=='descending' ~ 'descending',
is.na(apc) & is.na(class) ~ 'unknown',
is.na(apc) ~ class,
T ~ paste(class, apc)
)) %>%
mutate(class=sub(" neuron", '', class)) %>%
select(id, class, type) %>%
mutate(id=as.character(id), side=NA)
cell_infosw %>%
rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`,type=`neuron identity`) %>%
mutate(class=case_when(
class=='sensory neuron' & grepl('scending', apc) ~ paste('sensory', apc),
(is.na(class) | class=='central neuron') & apc=='ascending' ~ 'ascending',
(is.na(class) | class=='central neuron') & apc=='descending' ~ 'descending',
is.na(apc) & is.na(class) ~ 'unknown',
is.na(apc) ~ class,
T ~ paste(class, apc)
)) %>%
mutate(class=sub(" neuron", '', class)) %>%
select(id, class, type) %>%
mutate(id=as.character(id), side=NA)
}
if(length(ids))
left_join(data.frame(id=ids), metadf, by='id')
else
metadf
}

#' @importFrom dplyr pull
banc_ids <- function(ids) {
fancorbanc_ids(ids, dataset='banc')
}

fanc_ids <- function(ids) {
fancorbanc_ids(ids, dataset='fanc')
}

#' @importFrom dplyr pull
fancorbanc_ids <- function(ids, dataset=c("banc", "fanc")) {
dataset=match.arg(dataset)
# extract numeric ids if possible
ids <- extract_ids(ids)
if(is.character(ids) && length(ids)==1 && !fafbseg:::valid_id(ids)) {
# query
metadf=banc_meta()
metadf=if(dataset=="banc") banc_meta() else fanc_meta()
if(isTRUE(ids=='all')) return(fancr::fanc_ids(metadf$id, integer64 = F))
if(isTRUE(ids=='neurons')) {
ids <- metadf %>%
filter(is.na(.data$class) | .data$class!='glia') %>%
pull(.data$id)
return(fancr::fanc_ids(ids, integer64 = F))
}
if(substr(ids, 1, 1)=="/")
if(isTRUE(substr(ids, 1, 1)=="/"))
ids=substr(ids, 2, nchar(ids))
else warning("All FANC/BANC queries are regex queries. ",
"Use an initial / to suppress this warning!")
if(!grepl(":", ids)) ids=paste0("type:", ids)
qsplit=stringr::str_match(ids, pattern = '[/]{0,1}(.+):(.+)')
field=qsplit[,2]
value=qsplit[,3]
if(!field %in% colnames(metadf)) {
stop("banc queries only work with these fields: ",
paste(colnames(metadf)[-1], collapse = ','))
stop(glue("{dataset} queries only work with these fields: ",
paste(colnames(metadf)[-1], collapse = ',')))
}
ids <- metadf %>%
filter(grepl(value, .data[[field]])) %>%
pull(.data$id)
} else if(length(ids)>0) {
# check they are valid for current materialisation
ids=fancr::with_banc(fafbseg::flywire_latestid(ids, version = banc_version()))
ids <- if(dataset=="banc")
fancr::with_banc(fafbseg::flywire_latestid(ids, version = banc_version()))
else
fancr::with_fanc(fafbseg::flywire_latestid(ids, version = fanc_version()))
}
return(fancr::fanc_ids(ids, integer64 = F))
}

banc_version <- function() {
bcc=fancr::banc_cave_client()
ver=bcc$materialize$version
fancr::with_banc(fanc_version())
}

fanc_version <- function() {
fcc=fancr::fanc_cave_client()
ver=fcc$materialize$version
ver
}
12 changes: 9 additions & 3 deletions R/partners.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,20 @@ cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"),
T ~ malecns::mcns_soma_side(., method = "instance")
))
} else if (n=='fanc') {
tres=fancr::fanc_partner_summary(ids[[n]], partners = partners,
threshold = threshold-1L)
fids=fanc_ids(ids[[n]])
tres=fancr::fanc_partner_summary(fids,
partners = partners,
threshold = threshold-1L,
version=fanc_version())
partner_col=grep("_id", colnames(tres), value = T)
metadf=banc_meta()
colnames(metadf)[[1]]=partner_col
tres=left_join(tres, metadf, by = partner_col)
} else if (n=='banc') {
bids=banc_ids(ids[[n]])
tres=fancr::with_banc(fancr::fanc_partner_summary(bids, partners = partners,
threshold = threshold-1L, version=banc_version()))
partner_col=grep("_id", colnames(tres), value = T)
# metadf=banc_meta(tres[[partner_col]])
metadf=banc_meta()
colnames(metadf)[[1]]=partner_col
tres=left_join(tres, metadf, by = partner_col)
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ check_fanc <- function() {
sub('graphene://','', u)
}, silent = T)
if(inherits(furl, 'try-error')) furl=NA_character_
ver <- if(is.na(furl)) NA_character_
if(is.na(furl)) ver=NA_character_
else {
ver=try(silent = T, {
fcc=fancr::fanc_cave_client()
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ At present the following datasets are supported (dataset names used in the packa
6. Janelia Male Optic Lobe (part of the malecns) (**opticlobe**)
7. Wei Lee and colleagues [Brain and Nerve Cord](https://github.com/jasper-tms/the-BANC-fly-connectome/wiki) (**banc**)

Datasets 1-4 and 7 are either public (hemibrain, manc, flywire, opticlobe) or
Datasets 1-4 and 6, 7 are either public (hemibrain, manc, flywire, opticlobe) or
access can be requested subject to agreeing to certain terms of use (fanc, banc).
The Male CNS dataset is currently undergoing
proofreading and annotation in a collaboration between the
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ the package in brackets):
Cord](https://github.com/jasper-tms/the-BANC-fly-connectome/wiki)
(**banc**)

Datasets 1-4 and 7 are either public (hemibrain, manc, flywire,
Datasets 1-4 and 6, 7 are either public (hemibrain, manc, flywire,
opticlobe) or access can be requested subject to agreeing to certain
terms of use (fanc, banc). The Male CNS dataset is currently undergoing
proofreading and annotation in a collaboration between the
Expand Down
28 changes: 25 additions & 3 deletions man/cf_ids.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions tests/testthat/test-datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ test_that("dataset functions work", {
expect_equal(cf_datasets(),
lengthen_datasets(abbreviate_datasets(cf_datasets())))

expect_equal(abbreviate_datasets(c("flywire", "flywire", "hemibrain")),
c("fw", "fw", "hb"))
expect_equal(
abbreviate_datasets(c("flywire", "flywire", "hemibrain", "banc", "fanc", "manc")),
c("fw", "fw", "hb", "bc", "fv", "mv"))
})
17 changes: 16 additions & 1 deletion tests/testthat/test-ids.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,24 @@ test_that("key handling works", {

expect_warning(cf_ids(hemibrain = 'rhubarb', expand = T))

expect_equal(c(
expect_equal(res <- c(
cf_ids(hemibrain = '/MBON0[12].*', flywire=1:3),
cf_ids(hemibrain = '612371421', flywire=3:5)),
cf_ids(hemibrain = c("612371421", "673509195", "424789697", "5813022341"),
flywire=as.character(1:5)))

expect_output(print(res), regexp = 'flywire.*hemibrain')
})

test_that("fanc/banc ids/metadata", {
skip_if_not_installed('fancr')
skip_if_not_installed('reticulate')
expect_in(
cf_ids(fanc='/type:DNa01', expand = TRUE)$fanc,
fancr::fanc_latestid(c("648518346488820970", "648518346475464576"),
version='latest'))

expect_length(dna02keys <- cf_ids(banc='/DNa02', keys = T), 2L)
expect_warning(
expect_in(cf_ids(banc='DNa02', keys = T), dna02keys))
})
Loading

0 comments on commit e1fc072

Please sign in to comment.