-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import os | ||
|
||
import dolomite_base as dl | ||
import h5py | ||
from dolomite_base.read_object import registry | ||
from genomicranges import GenomicRangesList | ||
|
||
from .read_genomic_ranges import read_genomic_ranges | ||
|
||
registry["genomic_ranges_list"] = "dolomite_ranges.read_genomic_ranges_list" | ||
|
||
|
||
def read_genomic_ranges_list(path: str, metadata: dict, **kwargs) -> GenomicRangesList: | ||
"""Load genomic ranges into a | ||
:py:class:`~genomicranges.GenomicRanges.GenomicRangesList` object. | ||
This method | ||
should generally not be called directly but instead be invoked by | ||
:py:meth:`~dolomite_base.read_object.read_object`. | ||
Args: | ||
path: | ||
Path to the directory containing the object. | ||
metadata: | ||
Metadata for the object. | ||
kwargs: | ||
Further arguments, ignored. | ||
Returns: | ||
A :py:class:`~genomicranges.GenomicRangesList.GenomicRangesList` object. | ||
""" | ||
|
||
with h5py.File(os.path.join(path, "partitions.h5"), "r") as handle: | ||
ghandle = handle["genomic_ranges_list"] | ||
|
||
lengths = dl.load_vector_from_hdf5( | ||
ghandle["lengths"], expected_type=int, report_1darray=True | ||
) | ||
|
||
names = None | ||
if "names" in ghandle: | ||
names = dl.load_vector_from_hdf5( | ||
ghandle["names"], expected_type=str, report_1darray=True | ||
) | ||
|
||
_all_granges = read_genomic_ranges( | ||
path=os.path.join(path, "concatenated"), metadata=None | ||
) | ||
|
||
counter = 0 | ||
_split_granges = [] | ||
if lengths.sum() == 0: | ||
_split_granges = _all_granges | ||
else: | ||
for ilen in lengths: | ||
_frag = _all_granges[counter : (counter + ilen)] | ||
_split_granges.append(_frag) | ||
counter += ilen | ||
|
||
grl = GenomicRangesList(names=names, range_lengths=lengths, ranges=_split_granges) | ||
|
||
_elem_annotation_path = os.path.join(path, "element_annotations") | ||
if os.path.exists(_elem_annotation_path): | ||
_mcols = dl.read_object(_elem_annotation_path) | ||
grl = grl.set_mcols(_mcols) | ||
|
||
_meta_path = os.path.join(path, "other_annotations") | ||
if os.path.exists(_meta_path): | ||
_meta = dl.read_object(_meta_path) | ||
grl = grl.set_metadata(_meta.as_dict()) | ||
|
||
return grl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import os | ||
|
||
import dolomite_base as dl | ||
import h5py | ||
from biocutils import combine_sequences | ||
from genomicranges import GenomicRangesList | ||
|
||
|
||
@dl.save_object.register | ||
@dl.validate_saves | ||
def save_genomic_ranges_list(x: GenomicRangesList, path: str, **kwargs): | ||
"""Method for saving :py:class:`~genomicranges.GenomicRanges.GenomicRangesList` | ||
objects to their corresponding file representations, see | ||
:py:meth:`~dolomite_base.save_object.save_object` for details. | ||
Args: | ||
x: | ||
Object to be staged. | ||
path: | ||
Path to a directory in which to save ``x``. | ||
kwargs: | ||
Further arguments to be passed to individual methods. | ||
Returns: | ||
`x` is saved to `path`. | ||
""" | ||
os.mkdir(path) | ||
|
||
with open(os.path.join(path, "OBJECT"), "w", encoding="utf-8") as handle: | ||
handle.write( | ||
'{ "type": "genomic_ranges_list", "genomic_ranges_list": { "version": "1.0" } }' | ||
) | ||
|
||
with h5py.File(os.path.join(path, "partitions.h5"), "w") as handle: | ||
ghandle = handle.create_group("genomic_ranges_list") | ||
|
||
dl.write_integer_vector_to_hdf5( | ||
ghandle, name="lengths", h5type="u4", x=x.get_range_lengths() | ||
) | ||
|
||
if x.get_names() is not None: | ||
dl.write_string_vector_to_hdf5(ghandle, name="names", x=x.get_names()) | ||
|
||
_all_ranges = x.get_ranges() | ||
if isinstance(_all_ranges, list) and len(_all_ranges) > 1: | ||
_all_ranges = combine_sequences(*x.get_ranges()) | ||
|
||
dl.save_object(_all_ranges, path=os.path.join(path, "concatenated")) | ||
|
||
_elem_annotation = x.get_mcols() | ||
if _elem_annotation is not None and _elem_annotation.shape[1] > 0: | ||
dl.save_object(_elem_annotation, path=os.path.join(path, "element_annotations")) | ||
|
||
_meta = x.get_metadata() | ||
if _meta is not None and len(_meta) > 0: | ||
dl.save_object(_meta, path=os.path.join(path, "other_annotations")) | ||
|
||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import os | ||
from tempfile import mkdtemp | ||
|
||
from biocframe import BiocFrame | ||
from dolomite_base import read_object, save_object | ||
from genomicranges import GenomicRanges, GenomicRangesList | ||
from iranges import IRanges | ||
import dolomite_ranges | ||
|
||
|
||
def test_genomic_ranges_list(): | ||
a = GenomicRanges( | ||
seqnames=["chr1", "chr2", "chr1", "chr3"], | ||
ranges=IRanges([1, 3, 2, 4], [10, 30, 50, 60]), | ||
strand=["-", "+", "*", "+"], | ||
mcols=BiocFrame({"score": [1, 2, 3, 4]}), | ||
) | ||
|
||
b = GenomicRanges( | ||
seqnames=["chr2", "chr4", "chr5"], | ||
ranges=IRanges([3, 6, 4], [30, 50, 60]), | ||
strand=["-", "+", "*"], | ||
mcols=BiocFrame({"score": [2, 3, 4]}), | ||
) | ||
|
||
grl = GenomicRangesList(ranges=[a, b], names=["a", "b"]) | ||
|
||
dir = os.path.join(mkdtemp(), "granges") | ||
save_object(grl, dir) | ||
|
||
roundtrip = read_object(dir) | ||
assert roundtrip.get_names() == grl.get_names() | ||
assert len(roundtrip.get_ranges()) == len(grl.get_ranges()) | ||
assert (roundtrip["a"].get_start() == grl["a"].get_start()).all() | ||
assert (roundtrip["a"].get_strand() == grl["a"].get_strand()).all() | ||
assert (roundtrip.get_range_lengths() == grl.get_range_lengths()).all() | ||
|
||
def test_genomic_ranges_list_empty(): | ||
grl = GenomicRangesList.empty(n=100) | ||
|
||
dir = os.path.join(mkdtemp(), "granges_empty") | ||
save_object(grl, dir) | ||
|
||
roundtrip = read_object(dir) | ||
assert roundtrip.get_names() == grl.get_names() | ||
assert len(roundtrip.get_ranges()) == len(grl.get_ranges()) | ||
assert (roundtrip.get_range_lengths() == grl.get_range_lengths()).all() |