From e68903ec6c5e6644f9e60223827e40dd61cd2d1e Mon Sep 17 00:00:00 2001 From: xiaohanzhangcmu Date: Mon, 4 Nov 2024 23:02:38 -0800 Subject: [PATCH 1/3] update --- tests/data/test_dataloader.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py index 3acffa1f5d..d6968ee7b4 100644 --- a/tests/data/test_dataloader.py +++ b/tests/data/test_dataloader.py @@ -1519,6 +1519,7 @@ def test_ft_dataloader_with_extra_keys(): # TODO: Change this back to xfail after figuring out why it caused CI to hang @pytest.mark.skip def test_text_dataloader_with_extra_keys(): + from streaming.base.constant import BARRIER_FILELOCK, CACHE_FILELOCK max_seq_len = 1024 cfg = { 'dataset': { @@ -1549,15 +1550,23 @@ def test_text_dataloader_with_extra_keys(): device_batch_size = 2 - mock_stat = MagicMock() - mock_stat.st_size = 1024 # Mock st_size with a desired value - mock_stat.st_mode = 33188 # Regular file mode for Unix-based systems + def custom_stat_mock(path: Any): + if any([BARRIER_FILELOCK in path, CACHE_FILELOCK in path]): + return original_os_stat(path) + else: + mock_stat = MagicMock() + mock_stat.st_size = 1024 # Mock st_size with a desired value + mock_stat.st_mode = 33188 # Regular file mode for Unix-based systems + return mock_stat + + original_os_stat = os.stat + #with patch('streaming.base.stream.get_shards', return_value=None): with patch('os.makedirs'), \ patch('builtins.open', new_callable=mock_open, read_data='{"version": 2, "shards": []}'), \ patch('json.load') as mock_json_load, \ - patch('os.stat', return_value=mock_stat), \ + patch('os.stat', side_effect=custom_stat_mock), \ patch('torch.distributed.is_available', return_value=True), \ patch('torch.distributed.is_initialized', return_value=True), \ patch('torch.distributed.broadcast_object_list'), \ From 27add5835bc5aba704e75dce02523b2e0344c1c3 Mon Sep 17 00:00:00 2001 From: xiaohanzhangcmu Date: Mon, 4 Nov 2024 23:04:01 -0800 Subject: [PATCH 2/3] update --- tests/data/test_dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py index d6968ee7b4..0aaf95bb32 100644 --- a/tests/data/test_dataloader.py +++ b/tests/data/test_dataloader.py @@ -1517,7 +1517,7 @@ def test_ft_dataloader_with_extra_keys(): ).dataloader # TODO: Change this back to xfail after figuring out why it caused CI to hang -@pytest.mark.skip +@pytest.mark.xfail def test_text_dataloader_with_extra_keys(): from streaming.base.constant import BARRIER_FILELOCK, CACHE_FILELOCK max_seq_len = 1024 From 4922cd11413a274aafcbaa6254316bf83a7ac8cc Mon Sep 17 00:00:00 2001 From: xiaohanzhangcmu Date: Wed, 6 Nov 2024 09:23:54 -0800 Subject: [PATCH 3/3] update --- tests/data/test_dataloader.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py index 0aaf95bb32..e42c02cff3 100644 --- a/tests/data/test_dataloader.py +++ b/tests/data/test_dataloader.py @@ -1549,19 +1549,13 @@ def test_text_dataloader_with_extra_keys(): ) device_batch_size = 2 - def custom_stat_mock(path: Any): - if any([BARRIER_FILELOCK in path, CACHE_FILELOCK in path]): + if BARRIER_FILELOCK in path or CACHE_FILELOCK in path: return original_os_stat(path) - else: - mock_stat = MagicMock() - mock_stat.st_size = 1024 # Mock st_size with a desired value - mock_stat.st_mode = 33188 # Regular file mode for Unix-based systems - return mock_stat + return MagicMock(st_size=1024, st_mode=33188) # Mock regular file attributes original_os_stat = os.stat - #with patch('streaming.base.stream.get_shards', return_value=None): with patch('os.makedirs'), \ patch('builtins.open', new_callable=mock_open, read_data='{"version": 2, "shards": []}'), \