Skip to content

Commit

Permalink
Add comments & more test
Browse files Browse the repository at this point in the history
  • Loading branch information
es94129 committed Jan 2, 2025
1 parent a3b63cb commit 3d33e23
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
1 change: 1 addition & 0 deletions streaming/base/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ def __init__(self,
**kwargs,
}

# Construct a Stream instance using registry-based construction
default = construct_from_registry(
name='stream',
registry=streams_registry,
Expand Down
25 changes: 25 additions & 0 deletions tests/test_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from streaming import Stream, StreamingDataset
from streaming.base.distributed import barrier
from streaming.base.registry_utils import construct_from_registry
from streaming.base.stream import streams_registry
from tests.common.utils import convert_to_mds


Expand Down Expand Up @@ -69,3 +71,26 @@ def test_missing_index_json_local(local_remote_dir: Any):
stream = Stream(remote=None, local=remote_dir)
with pytest.raises(RuntimeError, match='No `remote` provided, but local file.*'):
_ = StreamingDataset(streams=[stream], batch_size=1)


def test_construct_stream_from_registry():
remote = 'remote_dir'
local = tempfile.mkdtemp()

kwargs = {
'remote': remote,
'local': local,
}

stream_instance = construct_from_registry(
'stream',
streams_registry,
partial_function=False,
kwargs=kwargs,
)

assert isinstance(stream_instance, Stream)
assert remote == stream_instance.remote
assert local == stream_instance.local

shutil.rmtree(local, ignore_errors=True)

0 comments on commit 3d33e23

Please sign in to comment.