We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Here's the stacktrace:
Downloading and preparing dataset slither-audited-smart-contracts/big-multilabel (download: 1.63 GiB, generated: 4.39 GiB, post-processed: Unknown size, total: 6.01 GiB) to C:/Users/username/.cache/huggingface/datasets/mwritescode___slither-audited-smart-contracts/big-multilabel/1.1.0/4cf503b59ce9d3157914e47f6253de773b7ab828f46642685d4b470b88ca1f13... Downloading data files: 100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 498.91it/s] Extracting data files: 100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 248.87it/s] --------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) File ~\anaconda3\lib\site-packages\datasets\builder.py:1597, in GeneratorBasedBuilder._prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id) 1596 try: -> 1597 writer = writer_class( 1598 features=self.info.features, 1599 path=fpath.replace("SSSSS", f"{shard_id:05d}").replace("JJJJJ", f"{job_id:05d}"), 1600 writer_batch_size=self._writer_batch_size, 1601 hash_salt=split_info.name, 1602 check_duplicates=check_duplicate_keys, 1603 storage_options=self._fs.storage_options, 1604 embed_local_files=embed_local_files, 1605 ) 1606 try: File ~\anaconda3\lib\site-packages\datasets\arrow_writer.py:334, in ArrowWriter.__init__(self, schema, features, path, stream, fingerprint, writer_batch_size, hash_salt, check_duplicates, disable_nullable, update_features, with_metadata, unit, embed_local_files, storage_options) 329 self._path = ( 330 fs_token_paths[2][0] 331 if not is_remote_filesystem(self._fs) 332 else self._fs.unstrip_protocol(fs_token_paths[2][0]) 333 ) --> 334 self.stream = self._fs.open(fs_token_paths[2][0], "wb") 335 self._closable_stream = True File ~\anaconda3\lib\site-packages\fsspec\spec.py:1199, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs) 1198 ac = kwargs.pop("autocommit", not self._intrans) -> 1199 f = self._open( 1200 path, 1201 mode=mode, 1202 block_size=block_size, 1203 autocommit=ac, 1204 cache_options=cache_options, 1205 **kwargs, 1206 ) 1207 if compression is not None: File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:183, in LocalFileSystem._open(self, path, mode, block_size, **kwargs) 182 self.makedirs(self._parent(path), exist_ok=True) --> 183 return LocalFileOpener(path, mode, fs=self, **kwargs) File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:314, in LocalFileOpener.__init__(self, path, mode, autocommit, fs, compression, **kwargs) 313 self.blocksize = io.DEFAULT_BUFFER_SIZE --> 314 self._open() File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:319, in LocalFileOpener._open(self) 318 if self.autocommit or "w" not in self.mode: --> 319 self.f = open(self.path, mode=self.mode) 320 if self.compression: FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/username/.cache/huggingface/datasets/mwritescode___slither-audited-smart-contracts/big-multilabel/1.1.0/4cf503b59ce9d3157914e47f6253de773b7ab828f46642685d4b470b88ca1f13.incomplete/slither-audited-smart-contracts-validation-00000-00000-of-NNNNN.arrow' The above exception was the direct cause of the following exception: DatasetGenerationError Traceback (most recent call last) Cell In[2], line 11 6 from datasets import load_dataset 8 # Due to a bug in the HuggingFace dataset, at the moment two file checksums do not correspond to what 9 # is in the dataset metadata, thus we have to load the data splits with the flag ignore_verification 10 # set to true ---> 11 train_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='train', ignore_verifications=True) 12 test_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='test', ignore_verifications=True) 13 val_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='validation', ignore_verifications=True) File ~\anaconda3\lib\site-packages\datasets\load.py:1797, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs) 1794 try_from_hf_gcs = path not in _PACKAGED_DATASETS_MODULES 1796 # Download and prepare data -> 1797 builder_instance.download_and_prepare( 1798 download_config=download_config, 1799 download_mode=download_mode, 1800 verification_mode=verification_mode, 1801 try_from_hf_gcs=try_from_hf_gcs, 1802 num_proc=num_proc, 1803 storage_options=storage_options, 1804 ) 1806 # Build dataset for splits 1807 keep_in_memory = ( 1808 keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size) 1809 ) File ~\anaconda3\lib\site-packages\datasets\builder.py:890, in DatasetBuilder.download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs) 888 if num_proc is not None: 889 prepare_split_kwargs["num_proc"] = num_proc --> 890 self._download_and_prepare( 891 dl_manager=dl_manager, 892 verification_mode=verification_mode, 893 **prepare_split_kwargs, 894 **download_and_prepare_kwargs, 895 ) 896 # Sync info 897 self.info.dataset_size = sum(split.num_bytes for split in self.info.splits.values()) File ~\anaconda3\lib\site-packages\datasets\builder.py:1649, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_splits_kwargs) 1648 def _download_and_prepare(self, dl_manager, verification_mode, **prepare_splits_kwargs): -> 1649 super()._download_and_prepare( 1650 dl_manager, 1651 verification_mode, 1652 check_duplicate_keys=verification_mode == VerificationMode.BASIC_CHECKS 1653 or verification_mode == VerificationMode.ALL_CHECKS, 1654 **prepare_splits_kwargs, 1655 ) File ~\anaconda3\lib\site-packages\datasets\builder.py:985, in DatasetBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs) 981 split_dict.add(split_generator.split_info) 983 try: 984 # Prepare split will record examples associated to the split --> 985 self._prepare_split(split_generator, **prepare_split_kwargs) 986 except OSError as e: 987 raise OSError( 988 "Cannot find data file. " 989 + (self.manual_download_instructions or "") 990 + "\nOriginal error:\n" 991 + str(e) 992 ) from None File ~\anaconda3\lib\site-packages\datasets\builder.py:1487, in GeneratorBasedBuilder._prepare_split(self, split_generator, check_duplicate_keys, file_format, num_proc, max_shard_size) 1485 job_id = 0 1486 with pbar: -> 1487 for job_id, done, content in self._prepare_split_single( 1488 gen_kwargs=gen_kwargs, job_id=job_id, **_prepare_split_args 1489 ): 1490 if done: 1491 result = content File ~\anaconda3\lib\site-packages\datasets\builder.py:1644, in GeneratorBasedBuilder._prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id) 1642 if isinstance(e, SchemaInferenceError) and e.__context__ is not None: 1643 e = e.__context__ -> 1644 raise DatasetGenerationError("An error occurred while generating the dataset") from e 1646 yield job_id, True, (total_num_examples, total_num_bytes, writer._features, num_shards, shard_lengths) DatasetGenerationError: An error occurred while generating the dataset
Is there an updated process to generate the dataset?
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Here's the stacktrace:
Is there an updated process to generate the dataset?
The text was updated successfully, but these errors were encountered: