Skip to content

Commit

Permalink
Add missing call to parent constructor in SimulationDataset
Browse files Browse the repository at this point in the history
The `SimulationDataset` was missing a call to the parent
`StreamingDataset` constructor, which led to errors when
accessing attributes that are set in that constructor, such as
`epoch_seed_change`:

```
AttributeError: 'SimulationDataset' object has no attribute 'epoch_seed_change'
Traceback:
File "/home/scott/projects/streaming/.venv/lib64/python3.12/site-packages/streamlit/runtime/scriptrunner/exec_code.py", line 88, in exec_func_with_error_handling
    result = func()
             ^^^^^^
File "/home/scott/projects/streaming/.venv/lib64/python3.12/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 579, in code_to_exec
    exec(code, module.__dict__)
File "/home/scott/projects/streaming/simulation/interfaces/sim_ui.py", line 409, in <module>
    submit_jobs(shuffle_quality, dataset, time_per_sample, node_internet_bandwidth,
File "/home/scott/projects/streaming/simulation/interfaces/sim_ui.py", line 110, in submit_jobs
    for output in gen_sim:
                  ^^^^^^^
File "/home/scott/projects/streaming/simulation/core/main.py", line 110, in simulate
    samples_per_node = dataset.get_samples_per_node(epoch, 0)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scott/projects/streaming/simulation/core/sim_dataset.py", line 367, in get_samples_per_node
    partition = generate_work(self.batching_method, self, self.world, epoch, sample_in_epoch)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scott/projects/streaming/streaming/base/batching/__init__.py", line 45, in generate_work
    return get(dataset, world, epoch, sample_in_epoch)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scott/projects/streaming/streaming/base/batching/random.py", line 49, in generate_work_random_batching
    shuffle_units, small_per_big = dataset.resample_streams(epoch)
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scott/projects/streaming/streaming/base/dataset.py", line 878, in resample_streams
    epoch, self.epoch_seed_change)
           ^^^^^^^^^^^^^^^^^^^^^^
```

Closes #831
  • Loading branch information
srstevenson committed Dec 3, 2024
1 parent 93af28a commit c78a1c7
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions simulation/core/sim_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,29 @@ def __init__(self,
# Time how long it takes for StreamingDataset instantiation
t0 = time.time()

super().__init__(streams=streams,
remote=remote,
local=local,
split=split,
download_retry=download_retry,
download_timeout=download_timeout,
validate_hash=validate_hash,
keep_zip=keep_zip,
epoch_size=epoch_size,
predownload=predownload,
cache_limit=cache_limit,
partition_algo=partition_algo,
num_canonical_nodes=num_canonical_nodes,
batch_size=batch_size,
shuffle=shuffle,
shuffle_algo=shuffle_algo,
shuffle_seed=shuffle_seed,
shuffle_block_size=shuffle_block_size,
sampling_method=sampling_method,
sampling_granularity=sampling_granularity,
batching_method=batching_method,
allow_unsafe_types=allow_unsafe_types)

# Global arguments (which do not live in Streams).
self.nodes = nodes
self.devices = devices
Expand Down

0 comments on commit c78a1c7

Please sign in to comment.