lancedb · eddyxu · Dec 31, 2024 · Dec 31, 2024
diff --git a/python/python/lance/dataset.py b/python/python/lance/dataset.py
@@ -507,13 +507,13 @@ def to_table(
         batch_size: Optional[int] = None,
         batch_readahead: Optional[int] = None,
         fragment_readahead: Optional[int] = None,
-        scan_in_order: bool = True,
+        scan_in_order: Optional[bool] = None,
         *,
-        prefilter: bool = False,
-        with_row_id: bool = False,
-        with_row_address: bool = False,
-        use_stats: bool = True,
-        fast_search: bool = False,
+        prefilter: Optional[bool] = None,
+        with_row_id: Optional[bool] = None,
+        with_row_address: Optional[bool] = None,
+        use_stats: Optional[bool] = None,
+        fast_search: Optional[bool] = None,
         full_text_query: Optional[Union[str, dict]] = None,
         io_buffer_size: Optional[int] = None,
         late_materialization: Optional[bool | List[str]] = None,
@@ -558,24 +558,25 @@ def to_table(
             The number of batches to read ahead.
         fragment_readahead: int, optional
             The number of fragments to read ahead.
-        scan_in_order: bool, default True
+        scan_in_order: bool, optional, default True
             Whether to read the fragments and batches in order. If false,
             throughput may be higher, but batches will be returned out of order
             and memory use might increase.
-        prefilter: bool, default False
+        prefilter: bool, optional, default False
             Run filter before the vector search.
         late_materialization: bool or List[str], default None
             Allows custom control over late materialization.  See
             ``ScannerBuilder.late_materialization`` for more information.
         use_scalar_index: bool, default True
             Allows custom control over scalar index usage.  See
             ``ScannerBuilder.use_scalar_index`` for more information.
-        with_row_id: bool, default False
+        with_row_id: bool, optional, default False
             Return row ID.
-        with_row_address: bool, default False
+        with_row_address: bool, optional, default False
             Return row address
-        use_stats: bool, default True
+        use_stats: bool, optional, default True
             Use stats pushdown during filters.
+        fast_search: bool, optional, default False
         full_text_query: str or dict, optional
             query string to search for, the results will be ranked by BM25.
             e.g. "hello world", would match documents contains "hello" or "world".
@@ -687,12 +688,12 @@ def to_batches(
         batch_size: Optional[int] = None,
         batch_readahead: Optional[int] = None,
         fragment_readahead: Optional[int] = None,
-        scan_in_order: bool = True,
+        scan_in_order: Optional[bool] = None,
         *,
-        prefilter: bool = False,
-        with_row_id: bool = False,
-        with_row_address: bool = False,
-        use_stats: bool = True,
+        prefilter: Optional[bool] = None,
+        with_row_id: Optional[bool] = None,
+        with_row_address: Optional[bool] = None,
+        use_stats: Optional[bool] = None,
         full_text_query: Optional[Union[str, dict]] = None,
         io_buffer_size: Optional[int] = None,
         late_materialization: Optional[bool | List[str]] = None,

diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py
@@ -2806,3 +2806,10 @@ def test_dataset_drop(tmp_path: Path):
     assert Path(tmp_path).exists()
     lance.LanceDataset.drop(tmp_path)
     assert not Path(tmp_path).exists()
+
+
+def test_dataset_schema(tmp_path: Path):
+    table = pa.table({"x": [0]})
+    ds = lance.write_dataset(table, str(tmp_path))  # noqa: F841
+    ds._default_scan_options = {"with_row_id": True}
+    assert ds.schema == ds.to_table().schema