diff --git a/tzrec/datasets/csv_dataset.py b/tzrec/datasets/csv_dataset.py index c899752..b874661 100644 --- a/tzrec/datasets/csv_dataset.py +++ b/tzrec/datasets/csv_dataset.py @@ -103,6 +103,8 @@ def __init__( self._input_files = [] for input_path in self._input_path.split(","): self._input_files.extend(glob.glob(input_path)) + if len(self._input_files) == 0: + raise RuntimeError(f"No csv files exist in {self._input_path}.") dataset = ds.dataset(self._input_files[0], format=self._csv_fmt) self.schema = [] self._ordered_cols = None diff --git a/tzrec/datasets/parquet_dataset.py b/tzrec/datasets/parquet_dataset.py index c8f9765..04f7844 100644 --- a/tzrec/datasets/parquet_dataset.py +++ b/tzrec/datasets/parquet_dataset.py @@ -76,6 +76,8 @@ def __init__( self._input_files = [] for input_path in self._input_path.split(","): self._input_files.extend(glob.glob(input_path)) + if len(self._input_files) == 0: + raise RuntimeError(f"No parquet files exist in {self._input_path}.") dataset = ds.dataset(self._input_files[0], format="parquet") if self._selected_cols: self._ordered_cols = [] diff --git a/tzrec/tools/hitrate.py b/tzrec/tools/hitrate.py index 75eb79f..eb172a4 100644 --- a/tzrec/tools/hitrate.py +++ b/tzrec/tools/hitrate.py @@ -223,6 +223,7 @@ def batch_hitrate( batch_size=args.batch_size, ivf_nlist=args.ivf_nlist, reader_type=args.reader_type, + odps_data_quota_name=args.odps_data_quota_name, ) index.nprobe = args.ivf_nprobe