Skip to content

Commit

Permalink
Fixed incorrect md5sum of bed files
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Oct 17, 2023
1 parent 3f5894b commit 9665ac8
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 7 deletions.
2 changes: 1 addition & 1 deletion bedboss/bedboss.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def run_all(
just_db_commit: bool = False,
no_db_commit: bool = False,
force_overwrite: bool = False,
skip_qdrant: bool = False,
skip_qdrant: bool = True,
pm: pypiper.PipelineManager = None,
**kwargs,
) -> NoReturn:
Expand Down
21 changes: 15 additions & 6 deletions bedboss/bedstat/bedstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def bedstat(
just_db_commit: bool = False,
no_db_commit: bool = False,
force_overwrite: bool = False,
skip_qdrant: bool = False,
skip_qdrant: bool = True,
pm: pypiper.PipelineManager = None,
**kwargs,
) -> NoReturn:
Expand All @@ -93,7 +93,7 @@ def bedstat(
:param bool just_db_commit: whether just to commit the JSON to the database
:param bool no_db_commit: whether the JSON commit to the database should be
skipped
:param skip_qdrant: whether to skip qdrant indexing
:param skip_qdrant: whether to skip qdrant indexing [Default: True]
:param bool force_overwrite: whether to overwrite the existing record
:param pm: pypiper object
"""
Expand All @@ -106,7 +106,7 @@ def bedstat(
pass
bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)

bed_digest = md5(open(bedfile, "rb").read()).hexdigest()
bed_digest = digest_bedfile(bedfile)
bedfile_name = os.path.split(bedfile)[1]

fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
Expand Down Expand Up @@ -192,7 +192,7 @@ def bedstat(
{
"bedfile": {
"path": bed_relpath,
"size": os.path.getsize(bedfile),
"size": convert_unit(os.path.getsize(bedfile)),
"title": "Path to the BED file",
}
}
Expand All @@ -203,8 +203,8 @@ def bedstat(
{
"bigbedfile": {
"path": bigbed_relpath,
"size": os.path.getsize(
os.path.join(bigbed, fileid + ".bigBed")
"size": convert_unit(
os.path.getsize(os.path.join(bigbed, fileid + ".bigBed"))
),
"title": "Path to the big BED file",
}
Expand Down Expand Up @@ -241,6 +241,10 @@ def bedstat(

# deleting md5sum, because it is record_identifier
del data["md5sum"]

# add added_to_qdrant to the data
data.update({"added_to_qdrant": False})

bbc.bed.report(
record_identifier=bed_digest,
values=data,
Expand All @@ -253,3 +257,8 @@ def bedstat(
bed_file_path=bedfile,
payload={"fileid": fileid},
)
bbc.bed.report(
record_identifier=bed_digest,
values={"added_to_qdrant": True},
force_overwrite=True,
)
5 changes: 5 additions & 0 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ def build_argparser() -> ArgumentParser:
action="store_true",
help="just commit the JSON to the database",
)
sub_all.add_argument(
"--skip-qdrant",
action="store_true",
help="whether to skip qdrant indexing",
)

# all-pep
sub_all_pep.add_argument(
Expand Down

0 comments on commit 9665ac8

Please sign in to comment.