diff --git a/llmfoundry/data/finetuning/dataloader.py b/llmfoundry/data/finetuning/dataloader.py index 612b8d6385..3e64360a67 100644 --- a/llmfoundry/data/finetuning/dataloader.py +++ b/llmfoundry/data/finetuning/dataloader.py @@ -28,6 +28,7 @@ from llmfoundry.data.text_data import build_streams from llmfoundry.utils.config_utils import to_dict_container from llmfoundry.utils.exceptions import ( + FinetuningFileNotFoundError, MissingHuggingFaceURLSplitError, NotEnoughDatasetSamplesError, ) @@ -585,10 +586,8 @@ def _download_remote_hf_dataset(remote_path: str, split: str) -> str: f'{name}/{split}{ext}' for ext in SUPPORTED_EXTENSIONS ] - raise FileNotFoundError( - f'Could not find a file with any of ' + \ - f'the supported extensions: {SUPPORTED_EXTENSIONS}\n' + \ - f'at {files_searched}', + raise FinetuningFileNotFoundError( + files_searched=files_searched, ) from e else: log.debug( diff --git a/llmfoundry/utils/exceptions.py b/llmfoundry/utils/exceptions.py index 9cbea2cac8..4a4321637f 100644 --- a/llmfoundry/utils/exceptions.py +++ b/llmfoundry/utils/exceptions.py @@ -481,3 +481,19 @@ def __reduce__(self): def __str__(self): return self.message + + +class FinetuningFileNotFoundError(UserError): + """Error thrown when a file can't be found with any supported extension.""" + + def __init__(self, files_searched: list[str]) -> None: + from llmfoundry.data.finetuning.tasks import SUPPORTED_EXTENSIONS + message = ( + f'Could not find a file with any of ' + \ + f'the supported extensions: {SUPPORTED_EXTENSIONS}\n' + \ + f'at {files_searched}' + ) + super().__init__( + message, + files_searched=files_searched, + )