Skip to content

Commit

Permalink
Fixed #19
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Oct 19, 2023
1 parent 9665ac8 commit 909153a
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 4 deletions.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ include bedboss/*
include bedboss/bedstat/*
include bedboss/bedstat/tools/*
include bedboss/bedmaker/*
include bedboss/bedqc/*
include bedboss/bedqc/*
include bedboss/qdrant_index/*
7 changes: 5 additions & 2 deletions bedboss/bedboss.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from bedboss.bedstat.bedstat import bedstat
from bedboss.bedmaker.bedmaker import BedMaker
from bedboss.bedqc.bedqc import bedqc
from bedboss.qdrant_index import add_to_qdrant
from bedboss.cli import build_argparser
from bedboss.const import (
OS_HG19,
Expand Down Expand Up @@ -234,14 +235,16 @@ def main(test_args: dict = None) -> NoReturn:
)
if args_dict["command"] == "all":
run_all(pm=pm, **args_dict)
elif args_dict["command"] == "all-pep":
run_all_by_pep(args_dict["pep_config"])
elif args_dict["command"] == "make":
BedMaker(pm=pm, **args_dict)
elif args_dict["command"] == "qc":
bedqc(pm=pm, **args_dict)
elif args_dict["command"] == "stat":
bedstat(pm=pm, **args_dict)
elif args_dict["command"] == "all-pep":
run_all_by_pep(args_dict["pep_config"])
elif args_dict["command"] == "index":
add_to_qdrant(pm=pm, **args_dict)
else:
parser.print_help()
# raise Exception("Incorrect pipeline name.")
Expand Down
2 changes: 1 addition & 1 deletion bedboss/bedstat/bedstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def bedstat(
if not skip_qdrant:
bbc.add_bed_to_qdrant(
bed_id=bed_digest,
bed_file_path=bedfile,
bed_file=bedfile,
payload={"fileid": fileid},
)
bbc.bed.report(
Expand Down
24 changes: 24 additions & 0 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import logmuse

from bedboss._version import __version__
from bedboss.const import DEFAULT_BEDBASE_API_URL


def build_argparser() -> ArgumentParser:
"""
BEDboss parser
:retrun: Tuple[pipeline, arguments]
"""
parser = VersionInHelpParser(
Expand Down Expand Up @@ -38,6 +40,11 @@ def build_argparser() -> ArgumentParser:
help="A pipeline to read a file in BED format and produce metadata "
"in JSON format.",
)

sub_index = subparser.add_parser(
"index", help="Index not indexed bed files and add them to the qdrant database "
)

sub_all.add_argument(
"--outfolder",
required=True,
Expand Down Expand Up @@ -318,4 +325,21 @@ def build_argparser() -> ArgumentParser:
help="whether just to commit the JSON to the database",
)

sub_index.add_argument(
"--bedbase-config",
dest="bedbase_config",
type=str,
required=True,
help="a path to the bedbase configuration file [Required]",
)

sub_index.add_argument(
"--bedbase-api",
dest="bedbase_api",
type=str,
required=False,
default=DEFAULT_BEDBASE_API_URL,
help=f"URL of the Bedbase API [Default: {DEFAULT_BEDBASE_API_URL}]",
)

return logmuse.add_logging_options(parser)
2 changes: 2 additions & 0 deletions bedboss/const.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
DEFAULT_BEDBASE_API_URL = "https://bedbase.org/api"

OPEN_SIGNAL_FOLDER = "./openSignalMatrix"
OPEN_SIGNAL_URL = "http://big.databio.org/open_chromatin_matrix/"

Expand Down
3 changes: 3 additions & 0 deletions bedboss/qdrant_index/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from bedboss.qdrant_index.qdrant_index import add_to_qdrant

__all__ = ["add_to_qdrant"]
64 changes: 64 additions & 0 deletions bedboss/qdrant_index/qdrant_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import logging
from typing import List
from bbconf import BedBaseConf
from geniml.bbclient import BBClient
from geniml.region2vec import Region2VecExModel

from bedboss.const import DEFAULT_BEDBASE_API_URL

_LOGGER = logging.getLogger("bedboss")


def get_unindexed_bed_files(bbc: BedBaseConf) -> List[str]:
"""
Get list of unindexed bed files from the bedbase
:return: list of record_identifiers of unindexed bed files
"""
result_list = bbc.bed.backend.select_txt(
columns=["record_identifier"],
filter_templ="""added_to_qdrant = false and (genome->>'alias') = 'hg38'""",
)
return [result[0] for result in result_list]


def add_to_qdrant(
bedbase_config: str,
bedbase_api: str = DEFAULT_BEDBASE_API_URL,
**kwargs,
) -> None:
"""
Add unindexed bed files to qdrant
:param bedbase_config: path to the bedbase configuration file
:param bedbase_api: URL of the Bedbase API
:return: None
"""
# get list of bed files
bbc = BedBaseConf(config_path=bedbase_config)
list_of_record_ids = get_unindexed_bed_files(bbc)

if len(list_of_record_ids) == 0:
_LOGGER.info("No unindexed bed files found")
return None

region_to_vec_obj = Region2VecExModel("databio/r2v-ChIP-atlas-hg38")

for record_id in list_of_record_ids:
bedfile_object = BBClient(
cache_folder="~/bedbase_cache", bedbase_api=bedbase_api
).load_bed(record_id)

bbc.add_bed_to_qdrant(
bed_id=record_id,
bed_file=bedfile_object,
payload={"description": "test"},
region_to_vec=region_to_vec_obj,
)

bbc.bed.report(
record_identifier=record_id,
values={"added_to_qdrant": True},
force_overwrite=True,
)

return None

0 comments on commit 909153a

Please sign in to comment.