From 8f906ad06da3f9692cca66aa5432c43dd34389e9 Mon Sep 17 00:00:00 2001 From: Harry Date: Sat, 21 Oct 2023 05:13:49 +0700 Subject: [PATCH] add bytewax GCS support --- .../contrib/bytewax/bytewax_materialization_dataflow.py | 5 +---- setup.py | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py index 7f549648c8..cb5d9d2d82 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_dataflow.py @@ -4,7 +4,6 @@ import pyarrow as pa import pyarrow.parquet as pq -import s3fs from bytewax.dataflow import Dataflow # type: ignore from bytewax.execution import cluster_main from bytewax.inputs import ManualInputConfig @@ -36,9 +35,7 @@ def __init__( self._run_dataflow() def process_path(self, path): - fs = s3fs.S3FileSystem() - logger.info(f"Processing path {path}") - dataset = pq.ParquetDataset(path, filesystem=fs, use_legacy_dataset=False) + dataset = pq.ParquetDataset(path, use_legacy_dataset=False) batches = [] for fragment in dataset.fragments: for batch in fragment.to_table().to_batches(): diff --git a/setup.py b/setup.py index 13c3decd53..738b6cc547 100644 --- a/setup.py +++ b/setup.py @@ -91,6 +91,7 @@ "google-cloud-datastore>=2.1.0,<3", "google-cloud-storage>=1.34.0,<3", "google-cloud-bigtable>=2.11.0,<3", + "gcsfs", ] REDIS_REQUIRED = [ @@ -158,7 +159,7 @@ "moto", "mypy>=0.981,<2", "avro==1.10.0", - "gcsfs>=0.4.0,<=2022.01.0", + "gcsfs", "urllib3>=1.25.4,<2", "psutil==5.9.0", "py>=1.11.0", # https://github.com/pytest-dev/pytest/issues/10420