From 54fb6d6ccc54c3765f6a06cea4a8dbee461b3203 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Tue, 31 Dec 2024 18:05:00 +0800 Subject: [PATCH] fix: default value is overwritten --- python/python/lance/dataset.py | 33 +++++++++++++++-------------- python/python/tests/test_dataset.py | 7 ++++++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py index 7e7229b6a9..2274ca2a4b 100644 --- a/python/python/lance/dataset.py +++ b/python/python/lance/dataset.py @@ -507,13 +507,13 @@ def to_table( batch_size: Optional[int] = None, batch_readahead: Optional[int] = None, fragment_readahead: Optional[int] = None, - scan_in_order: bool = True, + scan_in_order: Optional[bool] = None, *, - prefilter: bool = False, - with_row_id: bool = False, - with_row_address: bool = False, - use_stats: bool = True, - fast_search: bool = False, + prefilter: Optional[bool] = None, + with_row_id: Optional[bool] = None, + with_row_address: Optional[bool] = None, + use_stats: Optional[bool] = None, + fast_search: Optional[bool] = None, full_text_query: Optional[Union[str, dict]] = None, io_buffer_size: Optional[int] = None, late_materialization: Optional[bool | List[str]] = None, @@ -558,11 +558,11 @@ def to_table( The number of batches to read ahead. fragment_readahead: int, optional The number of fragments to read ahead. - scan_in_order: bool, default True + scan_in_order: bool, optional, default True Whether to read the fragments and batches in order. If false, throughput may be higher, but batches will be returned out of order and memory use might increase. - prefilter: bool, default False + prefilter: bool, optional, default False Run filter before the vector search. late_materialization: bool or List[str], default None Allows custom control over late materialization. See @@ -570,12 +570,13 @@ def to_table( use_scalar_index: bool, default True Allows custom control over scalar index usage. See ``ScannerBuilder.use_scalar_index`` for more information. - with_row_id: bool, default False + with_row_id: bool, optional, default False Return row ID. - with_row_address: bool, default False + with_row_address: bool, optional, default False Return row address - use_stats: bool, default True + use_stats: bool, optional, default True Use stats pushdown during filters. + fast_search: bool, optional, default False full_text_query: str or dict, optional query string to search for, the results will be ranked by BM25. e.g. "hello world", would match documents contains "hello" or "world". @@ -687,12 +688,12 @@ def to_batches( batch_size: Optional[int] = None, batch_readahead: Optional[int] = None, fragment_readahead: Optional[int] = None, - scan_in_order: bool = True, + scan_in_order: Optional[bool] = None, *, - prefilter: bool = False, - with_row_id: bool = False, - with_row_address: bool = False, - use_stats: bool = True, + prefilter: Optional[bool] = None, + with_row_id: Optional[bool] = None, + with_row_address: Optional[bool] = None, + use_stats: Optional[bool] = None, full_text_query: Optional[Union[str, dict]] = None, io_buffer_size: Optional[int] = None, late_materialization: Optional[bool | List[str]] = None, diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 587f6a8165..955702aa14 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -2806,3 +2806,10 @@ def test_dataset_drop(tmp_path: Path): assert Path(tmp_path).exists() lance.LanceDataset.drop(tmp_path) assert not Path(tmp_path).exists() + + +def test_dataset_schema(tmp_path: Path): + table = pa.table({"x": [0]}) + ds = lance.write_dataset(table, str(tmp_path)) # noqa: F841 + ds._default_scan_options = {"with_row_id": True} + assert ds.schema == ds.to_table().schema