From 3503355d6ebfba9b3f7244287058f6627903f4a3 Mon Sep 17 00:00:00 2001 From: Hongsheng Jin Date: Thu, 19 Dec 2024 11:48:58 +0800 Subject: [PATCH] fix odps quota in hitrate.py & refine error info of CsvReader and ParquetReader (#67) --- tzrec/datasets/csv_dataset.py | 2 ++ tzrec/datasets/parquet_dataset.py | 2 ++ tzrec/tools/hitrate.py | 1 + 3 files changed, 5 insertions(+) diff --git a/tzrec/datasets/csv_dataset.py b/tzrec/datasets/csv_dataset.py index c899752..b874661 100644 --- a/tzrec/datasets/csv_dataset.py +++ b/tzrec/datasets/csv_dataset.py @@ -103,6 +103,8 @@ def __init__( self._input_files = [] for input_path in self._input_path.split(","): self._input_files.extend(glob.glob(input_path)) + if len(self._input_files) == 0: + raise RuntimeError(f"No csv files exist in {self._input_path}.") dataset = ds.dataset(self._input_files[0], format=self._csv_fmt) self.schema = [] self._ordered_cols = None diff --git a/tzrec/datasets/parquet_dataset.py b/tzrec/datasets/parquet_dataset.py index c8f9765..04f7844 100644 --- a/tzrec/datasets/parquet_dataset.py +++ b/tzrec/datasets/parquet_dataset.py @@ -76,6 +76,8 @@ def __init__( self._input_files = [] for input_path in self._input_path.split(","): self._input_files.extend(glob.glob(input_path)) + if len(self._input_files) == 0: + raise RuntimeError(f"No parquet files exist in {self._input_path}.") dataset = ds.dataset(self._input_files[0], format="parquet") if self._selected_cols: self._ordered_cols = [] diff --git a/tzrec/tools/hitrate.py b/tzrec/tools/hitrate.py index 75eb79f..eb172a4 100644 --- a/tzrec/tools/hitrate.py +++ b/tzrec/tools/hitrate.py @@ -223,6 +223,7 @@ def batch_hitrate( batch_size=args.batch_size, ivf_nlist=args.ivf_nlist, reader_type=args.reader_type, + odps_data_quota_name=args.odps_data_quota_name, ) index.nprobe = args.ivf_nprobe