From 86d62215f2338ea9d48c6e723e907c82cbe5500b Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Wed, 24 Jan 2024 11:06:54 -0500 Subject: [PATCH 01/30] fix: Correct the returning class proto type of StreamFeatureView to StreamFeatureViewProto instead of FeatureViewProto. (#3843) --- sdk/python/feast/feature_view.py | 3 ++- sdk/python/feast/stream_feature_view.py | 7 ++++++- sdk/python/tests/unit/test_feature_views.py | 22 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index e26759ba92e..67f9662d317 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Tuple, Type from google.protobuf.duration_pb2 import Duration +from google.protobuf.message import Message from typeguard import typechecked from feast import utils @@ -274,7 +275,7 @@ def ensure_valid(self): raise ValueError("Feature view has no entities.") @property - def proto_class(self) -> Type[FeatureViewProto]: + def proto_class(self) -> Type[Message]: return FeatureViewProto def with_join_key_map(self, join_key_map: Dict[str, str]): diff --git a/sdk/python/feast/stream_feature_view.py b/sdk/python/feast/stream_feature_view.py index d3a2164788f..6a204d68136 100644 --- a/sdk/python/feast/stream_feature_view.py +++ b/sdk/python/feast/stream_feature_view.py @@ -3,9 +3,10 @@ import warnings from datetime import datetime, timedelta from types import FunctionType -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Type, Union import dill +from google.protobuf.message import Message from typeguard import typechecked from feast import flags_helper, utils @@ -298,6 +299,10 @@ def __copy__(self): fv.projection = copy.copy(self.projection) return fv + @property + def proto_class(self) -> Type[Message]: + return StreamFeatureViewProto + def stream_feature_view( *, diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index 379396e5c63..afef332d372 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -10,6 +10,9 @@ from feast.feature_view import FeatureView from feast.field import Field from feast.infra.offline_stores.file_source import FileSource +from feast.protos.feast.core.StreamFeatureView_pb2 import ( + StreamFeatureView as StreamFeatureViewProto, +) from feast.protos.feast.types.Value_pb2 import ValueType from feast.stream_feature_view import StreamFeatureView, stream_feature_view from feast.types import Float32 @@ -277,3 +280,22 @@ def test_hash(): def test_field_types(): with pytest.raises(TypeError): Field(name="name", dtype=ValueType.INT32) + + +def test_stream_feature_view_proto_type(): + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + ) + sfv = StreamFeatureView( + name="test stream featureview proto class", + entities=[], + ttl=timedelta(days=30), + source=stream_source, + aggregations=[], + ) + assert sfv.proto_class is StreamFeatureViewProto From 9a3590ea771ca3c3224f5e1a833453144e54284e Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Wed, 24 Jan 2024 16:16:15 -0500 Subject: [PATCH 02/30] fix: Verify the existence of Registry tables in snowflake before calling CREATE sql command. Allow read-only user to call feast apply. (#3851) Signed-off-by: Shuchu Han --- sdk/python/feast/infra/registry/snowflake.py | 71 ++++++++++-- .../registry/snowflake_registry_table.py | 104 ++++++++++++++++++ .../infra/utils/snowflake/snowflake_utils.py | 10 +- 3 files changed, 171 insertions(+), 14 deletions(-) create mode 100644 sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index 56c7bc1f659..40ec27e7d9c 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -124,15 +124,19 @@ def __init__( f'"{self.registry_config.database}"."{self.registry_config.schema_}"' ) - with GetSnowflakeConnection(self.registry_config) as conn: - sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_creation.sql" - with open(sql_function_file, "r") as file: - sqlFile = file.read() - - sqlCommands = sqlFile.split(";") - for command in sqlCommands: - query = command.replace("REGISTRY_PATH", f"{self.registry_path}") - execute_snowflake_statement(conn, query) + if not self._verify_registry_database(): + # Verify the existing resitry database schema from snowflake. If any table names and column types is wrong, run table recreation SQL. + with GetSnowflakeConnection(self.registry_config) as conn: + sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_creation.sql" + with open(sql_function_file, "r") as file: + sqlFile = file.read() + + sqlCommands = sqlFile.split(";") + for command in sqlCommands: + query = command.replace( + "REGISTRY_PATH", f"{self.registry_path}" + ) + execute_snowflake_statement(conn, query) self.cached_registry_proto = self.proto() proto_registry_utils.init_project_metadata(self.cached_registry_proto, project) @@ -145,6 +149,55 @@ def __init__( ) self.project = project + def _verify_registry_database( + self, + ) -> bool: + """Verify the records in registry database. To check: + 1, the 11 tables are existed. + 2, the column types are correct. + + Example return from snowflake's cursor.describe("SELECT * FROM a_table") command: + [ResultMetadata(name='ENTITY_NAME', type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=False), + ResultMetadata(name='PROJECT_ID', type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=False), + ResultMetadata(name='LAST_UPDATED_TIMESTAMP', type_code=6, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=False), + ResultMetadata(name='ENTITY_PROTO', type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=False)] + + Returns: + True if the necessary 11 tables are existed in Snowflake and schema of each table is correct. + False if failure happens. + """ + + from feast.infra.utils.snowflake.registry.snowflake_registry_table import ( + snowflake_registry_table_names_and_column_types as expect_tables, + ) + + res = True + + try: + with GetSnowflakeConnection(self.registry_config) as conn: + for table_name in expect_tables: + result_metadata_list = conn.cursor().describe( + f"SELECT * FROM {table_name}" + ) + for col in result_metadata_list: + if ( + expect_tables[table_name][col.name]["type_code"] + != col.type_code + ): + res = False + break + except Exception as e: + res = False # Set to False for all errors. + logger.debug( + f"Failed to verify Registry tables and columns types with exception: {e}." + ) + finally: + # The implementation in snowflake_utils.py will cache the established connection without re-connection logic. + # conn.close() + pass + + return res + def refresh(self, project: Optional[str] = None): if project: project_metadata = proto_registry_utils.get_project_metadata( diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py new file mode 100644 index 00000000000..d24fbc27ecb --- /dev/null +++ b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +""" +The table names and column types are following the creation detail listed +in "snowflake_table_creation.sql". + +Snowflake Reference: +1, ResultMetadata: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-python-connector-resultmetadata-object +2, Type Codes: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-python-connector-type-codes +---------------------------------------------- +type_code String Representation Data Type +0 FIXED NUMBER/INT +1 REAL REAL +2 TEXT VARCHAR/STRING +3 DATE DATE +4 TIMESTAMP TIMESTAMP +5 VARIANT VARIANT +6 TIMESTAMP_LTZ TIMESTAMP_LTZ +7 TIMESTAMP_TZ TIMESTAMP_TZ +8 TIMESTAMP_NTZ TIMESTAMP_TZ +9 OBJECT OBJECT +10 ARRAY ARRAY +11 BINARY BINARY +12 TIME TIME +13 BOOLEAN BOOLEAN +---------------------------------------------- + +(last update: 2023-11-30) + +""" + +snowflake_registry_table_names_and_column_types = { + "DATA_SOURCES": { + "DATA_SOURCE_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "DATA_SOURCE_PROTO": {"type_code": 11, "type": "BINARY"}, + }, + "ENTITIES": { + "ENTITY_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "ENTITY_PROTO": {"type_code": 11, "type": "BINARY"}, + }, + "FEAST_METADATA": { + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "METADATA_KEY": {"type_code": 2, "type": "VARCHAR"}, + "METADATA_VALUE": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + }, + "FEATURE_SERVICES": { + "FEATURE_SERVICE_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "FEATURE_SERVICE_PROTO": {"type_code": 11, "type": "BINARY"}, + }, + "FEATURE_VIEWS": { + "FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, + "MATERIALIZED_INTERVALS": {"type_code": 11, "type": "BINARY"}, + "USER_METADATA": {"type_code": 11, "type": "BINARY"}, + }, + "MANAGED_INFRA": { + "INFRA_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "INFRA_PROTO": {"type_code": 11, "type": "BINARY"}, + }, + "ON_DEMAND_FEATURE_VIEWS": { + "ON_DEMAND_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "ON_DEMAND_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, + "USER_METADATA": {"type_code": 11, "type": "BINARY"}, + }, + "REQUEST_FEATURE_VIEWS": { + "REQUEST_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "REQUEST_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, + "USER_METADATA": {"type_code": 11, "type": "BINARY"}, + }, + "SAVED_DATASETS": { + "SAVED_DATASET_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "SAVED_DATASET_PROTO": {"type_code": 11, "type": "BINARY"}, + }, + "STREAM_FEATURE_VIEWS": { + "STREAM_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "STREAM_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, + "USER_METADATA": {"type_code": 11, "type": "BINARY"}, + }, + "VALIDATION_REFERENCES": { + "VALIDATION_REFERENCE_NAME": {"type_code": 2, "type": "VARCHAR"}, + "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, + "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, + "VALIDATION_REFERENCE_PROTO": {"type_code": 11, "type": "BINARY"}, + }, +} diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index a4cda89a6f6..3a56619bdb5 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -49,19 +49,19 @@ def __init__(self, config: str, autocommit=True): def __enter__(self): - assert self.config.type in [ + assert self.config.type in { "snowflake.registry", "snowflake.offline", "snowflake.engine", "snowflake.online", - ] + } if self.config.type not in _cache: if self.config.type == "snowflake.registry": config_header = "connections.feast_registry" elif self.config.type == "snowflake.offline": config_header = "connections.feast_offline_store" - if self.config.type == "snowflake.engine": + elif self.config.type == "snowflake.engine": config_header = "connections.feast_batch_engine" elif self.config.type == "snowflake.online": config_header = "connections.feast_online_store" @@ -113,11 +113,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): def assert_snowflake_feature_names(feature_view: FeatureView) -> None: for feature in feature_view.features: - assert feature.name not in [ + assert feature.name not in { "entity_key", "feature_name", "feature_value", - ], f"Feature Name: {feature.name} is a protected name to ensure query stability" + }, f"Feature Name: {feature.name} is a protected name to ensure query stability" return None From c0d358a49d5f576bb9f1017d1ee0db2d6cd5f1a5 Mon Sep 17 00:00:00 2001 From: Edson Tirelli Date: Thu, 25 Jan 2024 15:25:19 -0500 Subject: [PATCH 03/30] =?UTF-8?q?revert:=20Verify=20the=20existence=20of?= =?UTF-8?q?=20Registry=20tables=20in=20snowflake=E2=80=A6=20(#3907)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "fix: Verify the existence of Registry tables in snowflake before calling CREATE sql command. Allow read-only user to call feast apply. (#3851)" This reverts commit 9a3590ea771ca3c3224f5e1a833453144e54284e. Signed-off-by: Edson Tirelli --- sdk/python/feast/infra/registry/snowflake.py | 71 ++---------- .../registry/snowflake_registry_table.py | 104 ------------------ .../infra/utils/snowflake/snowflake_utils.py | 10 +- 3 files changed, 14 insertions(+), 171 deletions(-) delete mode 100644 sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index 40ec27e7d9c..56c7bc1f659 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -124,19 +124,15 @@ def __init__( f'"{self.registry_config.database}"."{self.registry_config.schema_}"' ) - if not self._verify_registry_database(): - # Verify the existing resitry database schema from snowflake. If any table names and column types is wrong, run table recreation SQL. - with GetSnowflakeConnection(self.registry_config) as conn: - sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_creation.sql" - with open(sql_function_file, "r") as file: - sqlFile = file.read() - - sqlCommands = sqlFile.split(";") - for command in sqlCommands: - query = command.replace( - "REGISTRY_PATH", f"{self.registry_path}" - ) - execute_snowflake_statement(conn, query) + with GetSnowflakeConnection(self.registry_config) as conn: + sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_creation.sql" + with open(sql_function_file, "r") as file: + sqlFile = file.read() + + sqlCommands = sqlFile.split(";") + for command in sqlCommands: + query = command.replace("REGISTRY_PATH", f"{self.registry_path}") + execute_snowflake_statement(conn, query) self.cached_registry_proto = self.proto() proto_registry_utils.init_project_metadata(self.cached_registry_proto, project) @@ -149,55 +145,6 @@ def __init__( ) self.project = project - def _verify_registry_database( - self, - ) -> bool: - """Verify the records in registry database. To check: - 1, the 11 tables are existed. - 2, the column types are correct. - - Example return from snowflake's cursor.describe("SELECT * FROM a_table") command: - [ResultMetadata(name='ENTITY_NAME', type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=False), - ResultMetadata(name='PROJECT_ID', type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=False), - ResultMetadata(name='LAST_UPDATED_TIMESTAMP', type_code=6, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=False), - ResultMetadata(name='ENTITY_PROTO', type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=False)] - - Returns: - True if the necessary 11 tables are existed in Snowflake and schema of each table is correct. - False if failure happens. - """ - - from feast.infra.utils.snowflake.registry.snowflake_registry_table import ( - snowflake_registry_table_names_and_column_types as expect_tables, - ) - - res = True - - try: - with GetSnowflakeConnection(self.registry_config) as conn: - for table_name in expect_tables: - result_metadata_list = conn.cursor().describe( - f"SELECT * FROM {table_name}" - ) - for col in result_metadata_list: - if ( - expect_tables[table_name][col.name]["type_code"] - != col.type_code - ): - res = False - break - except Exception as e: - res = False # Set to False for all errors. - logger.debug( - f"Failed to verify Registry tables and columns types with exception: {e}." - ) - finally: - # The implementation in snowflake_utils.py will cache the established connection without re-connection logic. - # conn.close() - pass - - return res - def refresh(self, project: Optional[str] = None): if project: project_metadata = proto_registry_utils.get_project_metadata( diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py deleted file mode 100644 index d24fbc27ecb..00000000000 --- a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_registry_table.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -The table names and column types are following the creation detail listed -in "snowflake_table_creation.sql". - -Snowflake Reference: -1, ResultMetadata: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-python-connector-resultmetadata-object -2, Type Codes: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-python-connector-type-codes ----------------------------------------------- -type_code String Representation Data Type -0 FIXED NUMBER/INT -1 REAL REAL -2 TEXT VARCHAR/STRING -3 DATE DATE -4 TIMESTAMP TIMESTAMP -5 VARIANT VARIANT -6 TIMESTAMP_LTZ TIMESTAMP_LTZ -7 TIMESTAMP_TZ TIMESTAMP_TZ -8 TIMESTAMP_NTZ TIMESTAMP_TZ -9 OBJECT OBJECT -10 ARRAY ARRAY -11 BINARY BINARY -12 TIME TIME -13 BOOLEAN BOOLEAN ----------------------------------------------- - -(last update: 2023-11-30) - -""" - -snowflake_registry_table_names_and_column_types = { - "DATA_SOURCES": { - "DATA_SOURCE_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "DATA_SOURCE_PROTO": {"type_code": 11, "type": "BINARY"}, - }, - "ENTITIES": { - "ENTITY_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "ENTITY_PROTO": {"type_code": 11, "type": "BINARY"}, - }, - "FEAST_METADATA": { - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "METADATA_KEY": {"type_code": 2, "type": "VARCHAR"}, - "METADATA_VALUE": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - }, - "FEATURE_SERVICES": { - "FEATURE_SERVICE_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "FEATURE_SERVICE_PROTO": {"type_code": 11, "type": "BINARY"}, - }, - "FEATURE_VIEWS": { - "FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, - "MATERIALIZED_INTERVALS": {"type_code": 11, "type": "BINARY"}, - "USER_METADATA": {"type_code": 11, "type": "BINARY"}, - }, - "MANAGED_INFRA": { - "INFRA_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "INFRA_PROTO": {"type_code": 11, "type": "BINARY"}, - }, - "ON_DEMAND_FEATURE_VIEWS": { - "ON_DEMAND_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "ON_DEMAND_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, - "USER_METADATA": {"type_code": 11, "type": "BINARY"}, - }, - "REQUEST_FEATURE_VIEWS": { - "REQUEST_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "REQUEST_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, - "USER_METADATA": {"type_code": 11, "type": "BINARY"}, - }, - "SAVED_DATASETS": { - "SAVED_DATASET_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "SAVED_DATASET_PROTO": {"type_code": 11, "type": "BINARY"}, - }, - "STREAM_FEATURE_VIEWS": { - "STREAM_FEATURE_VIEW_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "STREAM_FEATURE_VIEW_PROTO": {"type_code": 11, "type": "BINARY"}, - "USER_METADATA": {"type_code": 11, "type": "BINARY"}, - }, - "VALIDATION_REFERENCES": { - "VALIDATION_REFERENCE_NAME": {"type_code": 2, "type": "VARCHAR"}, - "PROJECT_ID": {"type_code": 2, "type": "VARCHAR"}, - "LAST_UPDATED_TIMESTAMP": {"type_code": 6, "type": "TIMESTAMP_LTZ"}, - "VALIDATION_REFERENCE_PROTO": {"type_code": 11, "type": "BINARY"}, - }, -} diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index 3a56619bdb5..a4cda89a6f6 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -49,19 +49,19 @@ def __init__(self, config: str, autocommit=True): def __enter__(self): - assert self.config.type in { + assert self.config.type in [ "snowflake.registry", "snowflake.offline", "snowflake.engine", "snowflake.online", - } + ] if self.config.type not in _cache: if self.config.type == "snowflake.registry": config_header = "connections.feast_registry" elif self.config.type == "snowflake.offline": config_header = "connections.feast_offline_store" - elif self.config.type == "snowflake.engine": + if self.config.type == "snowflake.engine": config_header = "connections.feast_batch_engine" elif self.config.type == "snowflake.online": config_header = "connections.feast_online_store" @@ -113,11 +113,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): def assert_snowflake_feature_names(feature_view: FeatureView) -> None: for feature in feature_view.features: - assert feature.name not in { + assert feature.name not in [ "entity_key", "feature_name", "feature_value", - }, f"Feature Name: {feature.name} is a protected name to ensure query stability" + ], f"Feature Name: {feature.name} is a protected name to ensure query stability" return None From 8d6bec8fc47986c84f366ce3edfe7d03fa6b2e9f Mon Sep 17 00:00:00 2001 From: John Lemmon <137814163+JohnLemmonMedely@users.noreply.github.com> Date: Thu, 25 Jan 2024 14:27:04 -0600 Subject: [PATCH 04/30] feat: Add support for arrays in snowflake (#3769) Adds support for arrays in snowflake Signed-off-by: john.lemmon --- .../feast/infra/offline_stores/snowflake.py | 31 ++++ .../infra/offline_stores/snowflake_source.py | 6 +- .../snowflake_python_udfs_creation.sql | 56 ++++++ .../snowflake/snowpark/snowflake_udfs.py | 175 ++++++++++++++++++ sdk/python/feast/type_map.py | 8 + sdk/python/tests/data/data_creator.py | 1 + .../feature_repos/repo_configuration.py | 4 +- .../universal/data_sources/snowflake.py | 4 +- .../materialization/test_snowflake.py | 84 +++++++-- 9 files changed, 350 insertions(+), 19 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 38568ce79b2..4f11b1ac42c 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -1,4 +1,5 @@ import contextlib +import json import os import uuid import warnings @@ -51,6 +52,17 @@ ) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage +from feast.types import ( + Array, + Bool, + Bytes, + Float32, + Float64, + Int32, + Int64, + String, + UnixTimestamp, +) from feast.usage import log_exceptions_and_usage try: @@ -320,6 +332,7 @@ def query_generator() -> Iterator[str]: on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + feature_views=feature_views, metadata=RetrievalMetadata( features=feature_refs, keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), @@ -398,9 +411,12 @@ def __init__( config: RepoConfig, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + feature_views: Optional[List[FeatureView]] = None, metadata: Optional[RetrievalMetadata] = None, ): + if feature_views is None: + feature_views = [] if not isinstance(query, str): self._query_generator = query else: @@ -416,6 +432,7 @@ def query_generator() -> Iterator[str]: self.config = config self._full_feature_names = full_feature_names self._on_demand_feature_views = on_demand_feature_views or [] + self._feature_views = feature_views self._metadata = metadata self.export_path: Optional[str] if self.config.offline_store.blob_export_location: @@ -436,6 +453,20 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: self.snowflake_conn, self.to_sql() ).fetch_pandas_all() + for feature_view in self._feature_views: + for feature in feature_view.features: + if feature.dtype in [ + Array(String), + Array(Bytes), + Array(Int32), + Array(Int64), + Array(UnixTimestamp), + Array(Float64), + Array(Float32), + Array(Bool), + ]: + df[feature.name] = [json.loads(x) for x in df[feature.name]] + return df def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index 95bd46f1ec1..0cbf82dd1c1 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -279,12 +279,12 @@ def get_table_column_names_and_types( else: row["snowflake_type"] = "NUMBERwSCALE" - elif row["type_code"] in [5, 9, 10, 12]: + elif row["type_code"] in [5, 9, 12]: error = snowflake_unsupported_map[row["type_code"]] raise NotImplementedError( f"The following Snowflake Data Type is not supported: {error}" ) - elif row["type_code"] in [1, 2, 3, 4, 6, 7, 8, 11, 13]: + elif row["type_code"] in [1, 2, 3, 4, 6, 7, 8, 10, 11, 13]: row["snowflake_type"] = snowflake_type_code_map[row["type_code"]] else: raise NotImplementedError( @@ -305,6 +305,7 @@ def get_table_column_names_and_types( 6: "TIMESTAMP_LTZ", 7: "TIMESTAMP_TZ", 8: "TIMESTAMP_NTZ", + 10: "ARRAY", 11: "BINARY", 13: "BOOLEAN", } @@ -312,7 +313,6 @@ def get_table_column_names_and_types( snowflake_unsupported_map = { 5: "VARIANT -- Try converting to VARCHAR", 9: "OBJECT -- Try converting to VARCHAR", - 10: "ARRAY -- Try converting to VARCHAR", 12: "TIME -- Try converting to VARCHAR", } diff --git a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_python_udfs_creation.sql b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_python_udfs_creation.sql index a197a3ee4cd..a444c0b7c5c 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_python_udfs_creation.sql +++ b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_python_udfs_creation.sql @@ -14,6 +14,62 @@ CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_varchar_to_string_pro HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_varchar_to_string_proto' IMPORTS = ('@STAGE_HOLDER/feast.zip'); +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_bytes_to_list_bytes_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_bytes_to_list_bytes_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_varchar_to_list_string_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_varchar_to_list_string_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_number_to_list_int32_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int32_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_number_to_list_int64_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int64_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_float_to_list_double_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_float_to_list_double_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_boolean_to_list_bool_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_boolean_to_list_bool_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + +CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_timestamp_to_list_unix_timestamp_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_timestamp_to_list_unix_timestamp_proto' + IMPORTS = ('@STAGE_HOLDER/feast.zip'); + CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_number_to_int32_proto(df NUMBER) RETURNS BINARY LANGUAGE PYTHON diff --git a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py index 02311ca55d6..f5d5f10631f 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py +++ b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py @@ -1,6 +1,7 @@ import sys from binascii import unhexlify +import numpy as np import pandas from _snowflake import vectorized @@ -59,6 +60,180 @@ def feast_snowflake_varchar_to_string_proto(df): return df +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_bytes_to_list_bytes_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_bytes_to_list_bytes_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" +# ValueType.STRING_LIST = 12 +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_bytes_to_list_bytes_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + # Sometimes bytes come in as strings so we need to convert back to float + numpy_arrays = np.asarray(df[0].to_list()).astype(bytes) + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(numpy_arrays, ValueType.BYTES_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_varchar_to_list_string_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_varchar_to_list_string_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_varchar_to_list_string_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(df[0].to_numpy(), ValueType.STRING_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_number_to_list_int32_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int32_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_number_to_list_int32_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(df[0].to_numpy(), ValueType.INT32_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_number_to_list_int64_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int64_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_number_to_list_int64_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(df[0].to_numpy(), ValueType.INT64_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_float_to_list_double_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_float_to_list_double_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_float_to_list_double_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + numpy_arrays = np.asarray(df[0].to_list()).astype(float) + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(numpy_arrays, ValueType.DOUBLE_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_boolean_to_list_bool_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_boolean_to_list_bool_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_boolean_to_list_bool_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(df[0].to_numpy(), ValueType.BOOL_LIST), + ) + ) + return df + + +""" +CREATE OR REPLACE FUNCTION feast_snowflake_array_timestamp_to_list_unix_timestamp_proto(df ARRAY) + RETURNS BINARY + LANGUAGE PYTHON + RUNTIME_VERSION = '3.8' + PACKAGES = ('protobuf', 'pandas') + HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_timestamp_to_list_unix_timestamp_proto' + IMPORTS = ('@feast_stage/feast.zip'); +""" + + +@vectorized(input=pandas.DataFrame) +def feast_snowflake_array_timestamp_to_list_unix_timestamp_proto(df): + sys._xoptions["snowflake_partner_attribution"].append("feast") + + numpy_arrays = np.asarray(df[0].to_list()).astype(np.datetime64) + + df = list( + map( + ValueProto.SerializeToString, + python_values_to_proto_values(numpy_arrays, ValueType.UNIX_TIMESTAMP_LIST), + ) + ) + return df + + """ CREATE OR REPLACE FUNCTION feast_snowflake_number_to_int32_proto(df NUMBER) RETURNS BINARY diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 9dbbb5a64ce..e51e1e743bb 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -680,6 +680,14 @@ def _convert_value_name_to_snowflake_udf(value_name: str, project_name: str) -> "FLOAT": f"feast_{project_name}_snowflake_float_to_double_proto", "BOOL": f"feast_{project_name}_snowflake_boolean_to_bool_proto", "UNIX_TIMESTAMP": f"feast_{project_name}_snowflake_timestamp_to_unix_timestamp_proto", + "BYTES_LIST": f"feast_{project_name}_snowflake_array_bytes_to_list_bytes_proto", + "STRING_LIST": f"feast_{project_name}_snowflake_array_varchar_to_list_string_proto", + "INT32_LIST": f"feast_{project_name}_snowflake_array_number_to_list_int32_proto", + "INT64_LIST": f"feast_{project_name}_snowflake_array_number_to_list_int64_proto", + "DOUBLE_LIST": f"feast_{project_name}_snowflake_array_float_to_list_double_proto", + "FLOAT_LIST": f"feast_{project_name}_snowflake_array_float_to_list_double_proto", + "BOOL_LIST": f"feast_{project_name}_snowflake_array_boolean_to_list_bool_proto", + "UNIX_TIMESTAMP_LIST": f"feast_{project_name}_snowflake_array_timestamp_to_list_unix_timestamp_proto", } return name_map[value_name].upper() diff --git a/sdk/python/tests/data/data_creator.py b/sdk/python/tests/data/data_creator.py index 2155468445a..8d5b1979fa3 100644 --- a/sdk/python/tests/data/data_creator.py +++ b/sdk/python/tests/data/data_creator.py @@ -59,6 +59,7 @@ def get_feature_values_for_dtype( "int64": [1, 2, 3, 4, 5], "float": [1.0, None, 3.0, 4.0, 5.0], "string": ["1", None, "3", "4", "5"], + "bytes": [b"1", None, b"3", b"4", b"5"], "bool": [True, None, False, True, False], "datetime": [ datetime(1980, 1, 1), diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index fda5b3c11de..027dea2c582 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -83,8 +83,8 @@ "password": os.getenv("SNOWFLAKE_CI_PASSWORD", ""), "role": os.getenv("SNOWFLAKE_CI_ROLE", ""), "warehouse": os.getenv("SNOWFLAKE_CI_WAREHOUSE", ""), - "database": "FEAST", - "schema": "ONLINE", + "database": os.getenv("SNOWFLAKE_CI_DATABASE", "FEAST"), + "schema": os.getenv("SNOWFLAKE_CI_SCHEMA_ONLINE", "ONLINE"), } BIGTABLE_CONFIG = { diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index c7e5961a88a..c14780da97d 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -36,8 +36,8 @@ def __init__(self, project_name: str, *args, **kwargs): password=os.environ["SNOWFLAKE_CI_PASSWORD"], role=os.environ["SNOWFLAKE_CI_ROLE"], warehouse=os.environ["SNOWFLAKE_CI_WAREHOUSE"], - database="FEAST", - schema="OFFLINE", + database=os.environ.get("SNOWFLAKE_CI_DATABASE", "FEAST"), + schema=os.environ.get("SNOWFLAKE_CI_SCHEMA_OFFLINE", "OFFLINE"), storage_integration_name=os.getenv("BLOB_EXPORT_STORAGE_NAME", "FEAST_S3"), blob_export_location=os.getenv( "BLOB_EXPORT_URI", "s3://feast-snowflake-offload/export" diff --git a/sdk/python/tests/integration/materialization/test_snowflake.py b/sdk/python/tests/integration/materialization/test_snowflake.py index 0cf1471dfeb..daa96a87c97 100644 --- a/sdk/python/tests/integration/materialization/test_snowflake.py +++ b/sdk/python/tests/integration/materialization/test_snowflake.py @@ -1,10 +1,13 @@ import os -from datetime import timedelta +from datetime import datetime, timedelta import pytest +from pytz import utc +from feast import Field from feast.entity import Entity from feast.feature_view import FeatureView +from feast.types import Array, Bool, Bytes, Float64, Int32, Int64, String, UnixTimestamp from tests.data.data_creator import create_basic_driver_dataset from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, @@ -24,8 +27,8 @@ "password": os.getenv("SNOWFLAKE_CI_PASSWORD", ""), "role": os.getenv("SNOWFLAKE_CI_ROLE", ""), "warehouse": os.getenv("SNOWFLAKE_CI_WAREHOUSE", ""), - "database": "FEAST", - "schema": "MATERIALIZATION", + "database": os.getenv("SNOWFLAKE_CI_DATABASE", "FEAST"), + "schema": os.getenv("SNOWFLAKE_CI_SCHEMA_MATERIALIZATION", "MATERIALIZATION"), } SNOWFLAKE_ONLINE_CONFIG = { @@ -35,15 +38,16 @@ "password": os.getenv("SNOWFLAKE_CI_PASSWORD", ""), "role": os.getenv("SNOWFLAKE_CI_ROLE", ""), "warehouse": os.getenv("SNOWFLAKE_CI_WAREHOUSE", ""), - "database": "FEAST", - "schema": "ONLINE", + "database": os.getenv("SNOWFLAKE_CI_DATABASE", "FEAST"), + "schema": os.getenv("SNOWFLAKE_CI_SCHEMA_ONLINE", "ONLINE"), } +@pytest.mark.parametrize("online_store", [SNOWFLAKE_ONLINE_CONFIG, "sqlite"]) @pytest.mark.integration -def test_snowflake_materialization_consistency_internal(): +def test_snowflake_materialization_consistency(online_store): snowflake_config = IntegrationTestRepoConfig( - online_store=SNOWFLAKE_ONLINE_CONFIG, + online_store=online_store, offline_store_creator=SnowflakeDataSourceCreator, batch_engine=SNOWFLAKE_ENGINE_CONFIG, ) @@ -84,15 +88,32 @@ def test_snowflake_materialization_consistency_internal(): snowflake_environment.data_source_creator.teardown() +@pytest.mark.parametrize( + "feature_dtype, feast_dtype", + [ + ("string", Array(String)), + ("bytes", Array(Bytes)), + ("int32", Array(Int32)), + ("int64", Array(Int64)), + ("float", Array(Float64)), + ("bool", Array(Bool)), + ("datetime", Array(UnixTimestamp)), + ], +) +@pytest.mark.parametrize("feature_is_empty_list", [False]) +@pytest.mark.parametrize("online_store", [SNOWFLAKE_ONLINE_CONFIG, "sqlite"]) @pytest.mark.integration -def test_snowflake_materialization_consistency_external(): +def test_snowflake_materialization_consistency_internal_with_lists( + feature_dtype, feast_dtype, feature_is_empty_list, online_store +): snowflake_config = IntegrationTestRepoConfig( + online_store=online_store, offline_store_creator=SnowflakeDataSourceCreator, batch_engine=SNOWFLAKE_ENGINE_CONFIG, ) snowflake_environment = construct_test_environment(snowflake_config, None) - df = create_basic_driver_dataset() + df = create_basic_driver_dataset(Int32, feature_dtype, True, feature_is_empty_list) ds = snowflake_environment.data_source_creator.create_data_source( df, snowflake_environment.feature_store.project, @@ -105,23 +126,62 @@ def test_snowflake_materialization_consistency_external(): join_keys=["driver_id"], ) + schema = [ + Field(name="driver_id", dtype=Int32), + Field(name="value", dtype=feast_dtype), + ] driver_stats_fv = FeatureView( name="driver_hourly_stats", entities=[driver], ttl=timedelta(weeks=52), + schema=schema, source=ds, ) try: fs.apply([driver, driver_stats_fv]) - # materialization is run in two steps and - # we use timestamp from generated dataframe as a split point split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) print(f"Split datetime: {split_dt}") + now = datetime.utcnow() + + full_feature_names = True + start_date = (now - timedelta(hours=5)).replace(tzinfo=utc) + end_date = split_dt + fs.materialize( + feature_views=[driver_stats_fv.name], + start_date=start_date, + end_date=end_date, + ) + + expected_values = { + "int32": [3] * 2, + "int64": [3] * 2, + "float": [3.0] * 2, + "string": ["3"] * 2, + "bytes": [b"3"] * 2, + "bool": [False] * 2, + "datetime": [datetime(1981, 1, 1, tzinfo=utc)] * 2, + } + expected_value = [] if feature_is_empty_list else expected_values[feature_dtype] + + response_dict = fs.get_online_features( + [f"{driver_stats_fv.name}:value"], + [{"driver_id": 1}], + full_feature_names=full_feature_names, + ).to_dict() + + actual_value = response_dict[f"{driver_stats_fv.name}__value"][0] + assert actual_value is not None, f"Response: {response_dict}" + if feature_dtype == "float": + for actual_num, expected_num in zip(actual_value, expected_value): + assert ( + abs(actual_num - expected_num) < 1e-6 + ), f"Response: {response_dict}, Expected: {expected_value}" + else: + assert actual_value == expected_value - validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) finally: fs.teardown() snowflake_environment.data_source_creator.teardown() From ea8ad1731a5ebe798b11181fc0027f7cac0e1526 Mon Sep 17 00:00:00 2001 From: Alex Vinnik <33845028+alex-vinnik-sp@users.noreply.github.com> Date: Sat, 27 Jan 2024 04:53:31 -0600 Subject: [PATCH 05/30] feat: Support s3gov schema by snowflake offline store during materialization (#3891) --- .../feast/infra/offline_stores/snowflake.py | 7 ++- .../infra/offline_stores/test_snowflake.py | 57 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 sdk/python/tests/unit/infra/offline_stores/test_snowflake.py diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 4f11b1ac42c..dd13ffc96c7 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -615,12 +615,17 @@ def to_remote_storage(self) -> List[str]: HEADER = TRUE """ cursor = execute_snowflake_statement(self.snowflake_conn, query) + # s3gov schema is used by Snowflake in AWS govcloud regions + # remove gov portion from schema and pass it to online store upload + native_export_path = self.export_path.replace("s3gov://", "s3://") + return self._get_file_names_from_copy_into(cursor, native_export_path) + def _get_file_names_from_copy_into(self, cursor, native_export_path) -> List[str]: file_name_column_index = [ idx for idx, rm in enumerate(cursor.description) if rm.name == "FILE_NAME" ][0] return [ - f"{self.export_path}/{row[file_name_column_index]}" + f"{native_export_path}/{row[file_name_column_index]}" for row in cursor.fetchall() ] diff --git a/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py new file mode 100644 index 00000000000..afc3ae97aef --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py @@ -0,0 +1,57 @@ +import re +from unittest.mock import ANY, MagicMock, patch + +import pytest + +from feast.infra.offline_stores.snowflake import ( + SnowflakeOfflineStoreConfig, + SnowflakeRetrievalJob, +) +from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig +from feast.repo_config import RepoConfig + + +@pytest.fixture(params=["s3", "s3gov"]) +def retrieval_job(request): + offline_store_config = SnowflakeOfflineStoreConfig( + type="snowflake.offline", + account="snow", + user="snow", + password="snow", + role="snow", + warehouse="snow", + database="FEAST", + schema="OFFLINE", + storage_integration_name="FEAST_S3", + blob_export_location=f"{request.param}://feast-snowflake-offload/export", + ) + retrieval_job = SnowflakeRetrievalJob( + query="SELECT * FROM snowflake", + snowflake_conn=MagicMock(), + config=RepoConfig( + registry="s3://ml-test/repo/registry.db", + project="test", + provider="snowflake.offline", + online_store=SqliteOnlineStoreConfig(type="sqlite"), + offline_store=offline_store_config, + ), + full_feature_names=True, + on_demand_feature_views=[], + ) + return retrieval_job + + +def test_to_remote_storage(retrieval_job): + stored_files = ["just a path", "maybe another"] + with patch.object( + retrieval_job, "to_snowflake", return_value=None + ) as mock_to_snowflake, patch.object( + retrieval_job, "_get_file_names_from_copy_into", return_value=stored_files + ) as mock_get_file_names_from_copy: + assert ( + retrieval_job.to_remote_storage() == stored_files + ), "should return the list of files" + mock_to_snowflake.assert_called_once() + mock_get_file_names_from_copy.assert_called_once_with(ANY, ANY) + native_path = mock_get_file_names_from_copy.call_args[0][1] + assert re.match("^s3://.*", native_path), "path should be s3://*" From 2f99a617b6a5d8eae1e27c780bbfa94594f54441 Mon Sep 17 00:00:00 2001 From: senbong Date: Wed, 31 Jan 2024 01:50:13 +0800 Subject: [PATCH 06/30] fix: Create index only if not exists during MySQL online store update (#3905) Update mysql.py to create index only if not exists during update Signed-off-by: senbong --- .../contrib/mysql_online_store/mysql.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py b/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py index fa7dd2c2a49..c09cb126f0c 100644 --- a/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py +++ b/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py @@ -178,8 +178,11 @@ def update( # We don't create any special state for the entities in this implementation. for table in tables_to_keep: + + table_name = _table_id(project, table) + index_name = f"{table_name}_ek" cur.execute( - f"""CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key VARCHAR(512), + f"""CREATE TABLE IF NOT EXISTS {table_name} (entity_key VARCHAR(512), feature_name VARCHAR(256), value BLOB, event_ts timestamp NULL DEFAULT NULL, @@ -187,9 +190,16 @@ def update( PRIMARY KEY(entity_key, feature_name))""" ) - cur.execute( - f"ALTER TABLE {_table_id(project, table)} ADD INDEX {_table_id(project, table)}_ek (entity_key);" + index_exists = cur.execute( + f""" + SELECT 1 FROM information_schema.statistics + WHERE table_schema = DATABASE() AND table_name = '{table_name}' AND index_name = '{index_name}' + """ ) + if not index_exists: + cur.execute( + f"ALTER TABLE {table_name} ADD INDEX {index_name} (entity_key);" + ) for table in tables_to_delete: _drop_table_and_index(cur, project, table) From 987f0fdc99df1ef4507baff75e3df0e02bf42034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Jord=C3=A0=20Roca?= <108732053+marti-jorda-roca@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:50:28 +0100 Subject: [PATCH 07/30] fix: Bytewax materialization engine fails when loading feature_store.yaml (#3912) * bytewax materialization loads yaml config correctly Signed-off-by: marti-jorda-roca * added postgres dependency for SQL registries Signed-off-by: marti-jorda-roca --------- Signed-off-by: marti-jorda-roca --- .../feast/infra/materialization/contrib/bytewax/Dockerfile | 2 +- .../feast/infra/materialization/contrib/bytewax/dataflow.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/Dockerfile b/sdk/python/feast/infra/materialization/contrib/bytewax/Dockerfile index a26661ead35..a7d0af9b416 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/Dockerfile +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/Dockerfile @@ -25,5 +25,5 @@ COPY README.md README.md # git dir to infer the version of feast we're installing. # https://github.com/pypa/setuptools_scm#usage-from-docker # I think it also assumes that this dockerfile is being built from the root of the directory. -RUN --mount=source=.git,target=.git,type=bind pip3 install --no-cache-dir '.[aws,gcp,bytewax,snowflake]' +RUN --mount=source=.git,target=.git,type=bind pip3 install --no-cache-dir '.[aws,gcp,bytewax,snowflake,postgres]' diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/dataflow.py b/sdk/python/feast/infra/materialization/contrib/bytewax/dataflow.py index 9d9b328c0e9..bbc32cc1651 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/dataflow.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/dataflow.py @@ -12,10 +12,10 @@ logging.basicConfig(level=logging.INFO) with open("/var/feast/feature_store.yaml") as f: - feast_config = yaml.safe_load(f) + feast_config = yaml.load(f, Loader=yaml.Loader) with open("/var/feast/bytewax_materialization_config.yaml") as b: - bytewax_config = yaml.safe_load(b) + bytewax_config = yaml.load(b, Loader=yaml.Loader) config = RepoConfig(**feast_config) store = FeatureStore(config=config) From fa8cfd478ebd0aa24d48589b116f517eb9cc6c83 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:24:51 -0800 Subject: [PATCH 08/30] Update maintainers.md (#3918) --- community/maintainers.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/community/maintainers.md b/community/maintainers.md index 8aca48fd0db..9f8d349a5da 100644 --- a/community/maintainers.md +++ b/community/maintainers.md @@ -9,7 +9,12 @@ In alphabetical order | Name | GitHub Username | Email | Organization | | -------------- | ---------------- |-----------------------------| ------------------ | | Achal Shah | `achals` | achals@gmail.com | Tecton | -| Willem Pienaar | `woop` | will.pienaar@gmail.com | Tecton | +| Edson Tirelli | `etirelli` | ed.tirelli@gmail.com | Red Hat | +| Francisco Javier Arceo | `franciscojavierarceo` | arceofrancisco@gmail.com | Affirm | +| Hao Xu | `hao-affirm` | sduxuhao@gmail.com | JPMorgan | +| Jeremy Ary | `jeremyary` | jeremy.ary@gmail.com | Red Hat | +| Shuchu Han | `shuchu` | shuchu.han@gmail.com | Independent | +| Willem Pienaar | `woop` | will.pienaar@gmail.com | Cleric | | Zhiling Chen | `zhilingc` | chnzhlng@gmail.com | GetGround | ## Emeritus Maintainers From 936ecfdaf8419191c8c32a44890f20d4b1eba9f8 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Tue, 30 Jan 2024 16:22:51 -0500 Subject: [PATCH 09/30] docs: Updating maintainers.md (#3919) --- community/maintainers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/maintainers.md b/community/maintainers.md index 9f8d349a5da..0b3d4ab6480 100644 --- a/community/maintainers.md +++ b/community/maintainers.md @@ -11,7 +11,7 @@ In alphabetical order | Achal Shah | `achals` | achals@gmail.com | Tecton | | Edson Tirelli | `etirelli` | ed.tirelli@gmail.com | Red Hat | | Francisco Javier Arceo | `franciscojavierarceo` | arceofrancisco@gmail.com | Affirm | -| Hao Xu | `hao-affirm` | sduxuhao@gmail.com | JPMorgan | +| Hao Xu | `HaoXuAI` | sduxuhao@gmail.com | JPMorgan | | Jeremy Ary | `jeremyary` | jeremy.ary@gmail.com | Red Hat | | Shuchu Han | `shuchu` | shuchu.han@gmail.com | Independent | | Willem Pienaar | `woop` | will.pienaar@gmail.com | Cleric | From 8bce6dc143837b1dc88f59336994148894d5ccbe Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 31 Jan 2024 17:23:14 -0500 Subject: [PATCH 10/30] docs: Updated development guide to include compiling the protos (#3896) * [docs] updated development guide to include compiling the protos Signed-off-by: franciscojavierarceo * adding note on spinning up the docker image Signed-off-by: franciscojavierarceo --------- Signed-off-by: franciscojavierarceo --- docs/project/development-guide.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/project/development-guide.md b/docs/project/development-guide.md index 931d0243d2b..43dae1d678d 100644 --- a/docs/project/development-guide.md +++ b/docs/project/development-guide.md @@ -154,6 +154,16 @@ pip install -e ".[dev]" This will allow the installed feast version to automatically reflect changes to your local development version of Feast without needing to reinstall everytime you make code changes. +10. Compile the protubufs +```sh +make compile-protos-python +``` + +11. Spin up Docker Image +```sh +docker build -t docker-whale -f ./sdk/python/feast/infra/feature_servers/multicloud/Dockerfile . +``` + ### Code Style & Linting Feast Python SDK / CLI codebase: - Conforms to [Black code style](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html) From 7153cad6082edfded96999c49ee1bdc9329e11c3 Mon Sep 17 00:00:00 2001 From: Harry Date: Fri, 2 Feb 2024 20:17:46 +0700 Subject: [PATCH 11/30] fix: Prevent spamming pull busybox from dockerhub (#3923) Signed-off-by: Hai Nguyen --- .../contrib/bytewax/bytewax_materialization_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py index 5c7a719532d..060a47ce585 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py @@ -421,7 +421,7 @@ def _create_job_definition(self, job_id, namespace, pods, env, index_offset=0): } ], "image": "busybox", - "imagePullPolicy": "Always", + "imagePullPolicy": "IfNotPresent", "name": "init-hostfile", "resources": {}, "securityContext": { From bf026a018c16081669170ee79e7b191fcd328d8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:00:49 -0500 Subject: [PATCH 12/30] chore: Bump jupyterlab from 4.0.8 to 4.0.11 in /sdk/python/requirements (#3895) --- sdk/python/requirements/py3.10-ci-requirements.txt | 2 +- sdk/python/requirements/py3.8-ci-requirements.txt | 2 +- sdk/python/requirements/py3.9-ci-requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 5e407c1a991..094418cda6b 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -409,7 +409,7 @@ jupyter-server==2.11.2 # notebook-shim jupyter-server-terminals==0.4.4 # via jupyter-server -jupyterlab==4.0.8 +jupyterlab==4.0.11 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 02eaf6dc307..c477c208355 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -425,7 +425,7 @@ jupyter-server==2.11.2 # notebook-shim jupyter-server-terminals==0.4.4 # via jupyter-server -jupyterlab==4.0.8 +jupyterlab==4.0.11 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 43c49a49527..6f400fe2408 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -416,7 +416,7 @@ jupyter-server==2.11.2 # notebook-shim jupyter-server-terminals==0.4.4 # via jupyter-server -jupyterlab==4.0.8 +jupyterlab==4.0.11 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert From f494f02e1254b91b56b0b69f4a15edafe8d7291a Mon Sep 17 00:00:00 2001 From: Tornike Gurgenidze Date: Sun, 4 Feb 2024 21:26:12 +0400 Subject: [PATCH 13/30] fix: Remove unnecessary dependency on mysqlclient (#3925) --- sdk/python/feast/infra/feature_servers/multicloud/Dockerfile | 1 - .../feast/infra/feature_servers/multicloud/Dockerfile.dev | 1 - sdk/python/requirements/py3.10-ci-requirements.txt | 2 -- sdk/python/requirements/py3.8-ci-requirements.txt | 2 -- sdk/python/requirements/py3.9-ci-requirements.txt | 2 -- sdk/python/tests/unit/test_sql_registry.py | 2 +- setup.py | 2 +- 7 files changed, 2 insertions(+), 10 deletions(-) diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile index c95c515fb4b..fdd8e3ac51d 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile @@ -4,7 +4,6 @@ RUN apt update && \ apt install -y \ jq \ python3-dev \ - default-libmysqlclient-dev \ build-essential RUN pip install pip --upgrade diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev index ecbc199a5b9..3fc1355d7a8 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev @@ -4,7 +4,6 @@ RUN apt update && \ apt install -y \ jq \ python3-dev \ - default-libmysqlclient-dev \ build-essential RUN pip install pip --upgrade diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 094418cda6b..740356907d8 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -472,8 +472,6 @@ mypy-extensions==1.0.0 # mypy mypy-protobuf==3.1.0 # via feast (setup.py) -mysqlclient==2.2.0 - # via feast (setup.py) nbclient==0.9.0 # via nbconvert nbconvert==7.11.0 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index c477c208355..3bda9e72f9f 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -488,8 +488,6 @@ mypy-extensions==1.0.0 # mypy mypy-protobuf==3.1.0 # via feast (setup.py) -mysqlclient==2.2.0 - # via feast (setup.py) nbclient==0.9.0 # via nbconvert nbconvert==7.11.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6f400fe2408..6989d5b4ccf 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -479,8 +479,6 @@ mypy-extensions==1.0.0 # mypy mypy-protobuf==3.1.0 # via feast (setup.py) -mysqlclient==2.2.0 - # via feast (setup.py) nbclient==0.9.0 # via nbconvert nbconvert==7.11.0 diff --git a/sdk/python/tests/unit/test_sql_registry.py b/sdk/python/tests/unit/test_sql_registry.py index 39896d3a9dd..b96dc6fe770 100644 --- a/sdk/python/tests/unit/test_sql_registry.py +++ b/sdk/python/tests/unit/test_sql_registry.py @@ -103,7 +103,7 @@ def mysql_registry(): registry_config = RegistryConfig( registry_type="sql", - path=f"mysql+mysqldb://{POSTGRES_USER}:{POSTGRES_PASSWORD}@127.0.0.1:{container_port}/{POSTGRES_DB}", + path=f"mysql+pymysql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@127.0.0.1:{container_port}/{POSTGRES_DB}", ) yield SqlRegistry(registry_config, "project", None) diff --git a/setup.py b/setup.py index 33bf76e1819..4905a7697d7 100644 --- a/setup.py +++ b/setup.py @@ -116,7 +116,7 @@ "psycopg2-binary>=2.8.3,<3", ] -MYSQL_REQUIRED = ["mysqlclient", "pymysql", "types-PyMySQL"] +MYSQL_REQUIRED = ["pymysql", "types-PyMySQL"] HBASE_REQUIRED = [ "happybase>=1.2.0,<3", From 1f3cab825c927d8a9337de5cd340d0bb4ea70558 Mon Sep 17 00:00:00 2001 From: Harry Date: Tue, 6 Feb 2024 12:28:25 +0700 Subject: [PATCH 14/30] ci: Extend python base version for test cases (#3929) Signed-off-by: Hai Nguyen --- .github/workflows/pr_integration_tests.yml | 2 +- .github/workflows/pr_local_integration_tests.yml | 2 +- .github/workflows/unit_tests.yml | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 73344ec2ddd..26c85b0126e 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -86,7 +86,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8" ] + python-version: [ "3.8", "3.10" ] os: [ ubuntu-latest ] env: OS: ${{ matrix.os }} diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml index 111a9b51a9c..aeb4100dc85 100644 --- a/.github/workflows/pr_local_integration_tests.yml +++ b/.github/workflows/pr_local_integration_tests.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8" ] + python-version: [ "3.8", "3.10" ] os: [ ubuntu-latest ] env: OS: ${{ matrix.os }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index f03cd33346c..31e6d08c743 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -12,8 +12,6 @@ jobs: exclude: - os: macOS-latest python-version: "3.9" - - os: macOS-latest - python-version: "3.10" env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} From 373e624abb8779b8a60d30aa08d25414d987bb1b Mon Sep 17 00:00:00 2001 From: Tornike Gurgenidze Date: Wed, 7 Feb 2024 03:26:32 +0400 Subject: [PATCH 15/30] feat: Add gRPC Registry Server (#3924) --- protos/feast/registry/RegistryServer.proto | 230 ++++++++++++++++++ sdk/python/feast/cli.py | 21 +- sdk/python/feast/constants.py | 3 + sdk/python/feast/feature_store.py | 7 + .../feast/infra/registry/base_registry.py | 4 +- sdk/python/feast/infra/registry/registry.py | 8 +- sdk/python/feast/registry_server.py | 202 +++++++++++++++ sdk/python/tests/unit/test_registry_server.py | 60 +++++ setup.py | 2 +- 9 files changed, 532 insertions(+), 5 deletions(-) create mode 100644 protos/feast/registry/RegistryServer.proto create mode 100644 sdk/python/feast/registry_server.py create mode 100644 sdk/python/tests/unit/test_registry_server.py diff --git a/protos/feast/registry/RegistryServer.proto b/protos/feast/registry/RegistryServer.proto new file mode 100644 index 00000000000..3e7773e89a4 --- /dev/null +++ b/protos/feast/registry/RegistryServer.proto @@ -0,0 +1,230 @@ +syntax = "proto3"; + +package feast.registry; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/empty.proto"; +import "feast/core/Registry.proto"; +import "feast/core/Entity.proto"; +import "feast/core/DataSource.proto"; +import "feast/core/FeatureView.proto"; +import "feast/core/RequestFeatureView.proto"; +import "feast/core/StreamFeatureView.proto"; +import "feast/core/OnDemandFeatureView.proto"; +import "feast/core/FeatureService.proto"; +import "feast/core/SavedDataset.proto"; +import "feast/core/ValidationProfile.proto"; +import "feast/core/InfraObject.proto"; + +service RegistryServer{ + // Entity RPCs + rpc GetEntity (GetEntityRequest) returns (feast.core.Entity) {} + rpc ListEntities (ListEntitiesRequest) returns (ListEntitiesResponse) {} + + // DataSource RPCs + rpc GetDataSource (GetDataSourceRequest) returns (feast.core.DataSource) {} + rpc ListDataSources (ListDataSourcesRequest) returns (ListDataSourcesResponse) {} + + // FeatureView RPCs + rpc GetFeatureView (GetFeatureViewRequest) returns (feast.core.FeatureView) {} + rpc ListFeatureViews (ListFeatureViewsRequest) returns (ListFeatureViewsResponse) {} + + // RequestFeatureView RPCs + rpc GetRequestFeatureView (GetRequestFeatureViewRequest) returns (feast.core.RequestFeatureView) {} + rpc ListRequestFeatureViews (ListRequestFeatureViewsRequest) returns (ListRequestFeatureViewsResponse) {} + + // StreamFeatureView RPCs + rpc GetStreamFeatureView (GetStreamFeatureViewRequest) returns (feast.core.StreamFeatureView) {} + rpc ListStreamFeatureViews (ListStreamFeatureViewsRequest) returns (ListStreamFeatureViewsResponse) {} + + // OnDemandFeatureView RPCs + rpc GetOnDemandFeatureView (GetOnDemandFeatureViewRequest) returns (feast.core.OnDemandFeatureView) {} + rpc ListOnDemandFeatureViews (ListOnDemandFeatureViewsRequest) returns (ListOnDemandFeatureViewsResponse) {} + + // FeatureService RPCs + rpc GetFeatureService (GetFeatureServiceRequest) returns (feast.core.FeatureService) {} + rpc ListFeatureServices (ListFeatureServicesRequest) returns (ListFeatureServicesResponse) {} + + // SavedDataset RPCs + rpc GetSavedDataset (GetSavedDatasetRequest) returns (feast.core.SavedDataset) {} + rpc ListSavedDatasets (ListSavedDatasetsRequest) returns (ListSavedDatasetsResponse) {} + + // ValidationReference RPCs + rpc GetValidationReference (GetValidationReferenceRequest) returns (feast.core.ValidationReference) {} + rpc ListValidationReferences (ListValidationReferencesRequest) returns (ListValidationReferencesResponse) {} + + rpc ListProjectMetadata (ListProjectMetadataRequest) returns (ListProjectMetadataResponse) {} + rpc GetInfra (GetInfraRequest) returns (feast.core.Infra) {} + rpc Refresh (RefreshRequest) returns (google.protobuf.Empty) {} + rpc Proto (google.protobuf.Empty) returns (feast.core.Registry) {} + +} + +message RefreshRequest { + string project = 1; +} + +message GetInfraRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListProjectMetadataRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListProjectMetadataResponse { + repeated feast.core.ProjectMetadata project_metadata = 1; +} + +message GetEntityRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListEntitiesRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListEntitiesResponse { + repeated feast.core.Entity entities = 1; +} + +// DataSources + +message GetDataSourceRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListDataSourcesRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListDataSourcesResponse { + repeated feast.core.DataSource data_sources = 1; +} + +// FeatureViews + +message GetFeatureViewRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListFeatureViewsRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListFeatureViewsResponse { + repeated feast.core.FeatureView feature_views = 1; +} + +// RequestFeatureView + +message GetRequestFeatureViewRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListRequestFeatureViewsRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListRequestFeatureViewsResponse { + repeated feast.core.RequestFeatureView request_feature_views = 1; +} + +// StreamFeatureView + +message GetStreamFeatureViewRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListStreamFeatureViewsRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListStreamFeatureViewsResponse { + repeated feast.core.StreamFeatureView stream_feature_views = 1; +} + +// OnDemandFeatureView + +message GetOnDemandFeatureViewRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListOnDemandFeatureViewsRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListOnDemandFeatureViewsResponse { + repeated feast.core.OnDemandFeatureView on_demand_feature_views = 1; +} + +// FeatureServices + +message GetFeatureServiceRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListFeatureServicesRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListFeatureServicesResponse { + repeated feast.core.FeatureService feature_services = 1; +} + +// SavedDataset + +message GetSavedDatasetRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListSavedDatasetsRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListSavedDatasetsResponse { + repeated feast.core.SavedDataset saved_datasets = 1; +} + +// ValidationReference + +message GetValidationReferenceRequest { + string name = 1; + string project = 2; + bool allow_cache = 3; +} + +message ListValidationReferencesRequest { + string project = 1; + bool allow_cache = 2; +} + +message ListValidationReferencesResponse { + repeated feast.core.ValidationReference validation_references = 1; +} diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 2eb2c27bcb7..985c44b821f 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -25,7 +25,10 @@ from pygments import formatters, highlight, lexers from feast import utils -from feast.constants import DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT +from feast.constants import ( + DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT, + DEFAULT_REGISTRY_SERVER_PORT, +) from feast.errors import FeastObjectNotFoundException, FeastProviderLoginError from feast.feature_view import FeatureView from feast.infra.contrib.grpc_server import get_grpc_server @@ -753,6 +756,22 @@ def serve_transformations_command(ctx: click.Context, port: int): store.serve_transformations(port) +@cli.command("serve_registry") +@click.option( + "--port", + "-p", + type=click.INT, + default=DEFAULT_REGISTRY_SERVER_PORT, + help="Specify a port for the server", +) +@click.pass_context +def serve_registry_command(ctx: click.Context, port: int): + """Start a registry server locally on a given port.""" + store = create_feature_store(ctx) + + store.serve_registry(port) + + @cli.command("validate") @click.option( "--feature-service", diff --git a/sdk/python/feast/constants.py b/sdk/python/feast/constants.py index 574d79f4167..c022ecba557 100644 --- a/sdk/python/feast/constants.py +++ b/sdk/python/feast/constants.py @@ -44,5 +44,8 @@ # Default FTS port DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT = 6569 +# Default registry server port +DEFAULT_REGISTRY_SERVER_PORT = 6570 + # Environment variable for feature server docker image tag DOCKER_IMAGE_TAG_ENV_NAME: str = "FEAST_SERVER_DOCKER_IMAGE_TAG" diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index d3f98f80323..4a53672b2e2 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -2278,6 +2278,13 @@ def serve_ui( root_path=root_path, ) + @log_exceptions_and_usage + def serve_registry(self, port: int) -> None: + """Start registry server locally on a given port.""" + from feast import registry_server + + registry_server.start_server(self, port) + @log_exceptions_and_usage def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index 14b098bb123..8928a5800dd 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -329,7 +329,9 @@ def list_feature_views( # request feature view operations @abstractmethod - def get_request_feature_view(self, name: str, project: str) -> RequestFeatureView: + def get_request_feature_view( + self, name: str, project: str, allow_cache: bool = False + ) -> RequestFeatureView: """ Retrieves a request feature view. diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index 1a72cbb4a58..fc7be75e0d3 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -528,8 +528,12 @@ def list_feature_views( ) return proto_registry_utils.list_feature_views(registry_proto, project) - def get_request_feature_view(self, name: str, project: str): - registry_proto = self._get_registry_proto(project=project, allow_cache=False) + def get_request_feature_view( + self, name: str, project: str, allow_cache: bool = False + ): + registry_proto = self._get_registry_proto( + project=project, allow_cache=allow_cache + ) return proto_registry_utils.get_request_feature_view( registry_proto, name, project ) diff --git a/sdk/python/feast/registry_server.py b/sdk/python/feast/registry_server.py new file mode 100644 index 00000000000..221715480e5 --- /dev/null +++ b/sdk/python/feast/registry_server.py @@ -0,0 +1,202 @@ +from concurrent import futures + +import grpc +from google.protobuf.empty_pb2 import Empty + +from feast import FeatureStore +from feast.protos.feast.registry import RegistryServer_pb2, RegistryServer_pb2_grpc + + +class RegistryServer(RegistryServer_pb2_grpc.RegistryServerServicer): + def __init__(self, store: FeatureStore) -> None: + super().__init__() + self.proxied_registry = store.registry + + def GetEntity(self, request: RegistryServer_pb2.GetEntityRequest, context): + return self.proxied_registry.get_entity( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListEntities(self, request, context): + return RegistryServer_pb2.ListEntitiesResponse( + entities=[ + entity.to_proto() + for entity in self.proxied_registry.list_entities( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetDataSource(self, request: RegistryServer_pb2.GetDataSourceRequest, context): + return self.proxied_registry.get_data_source( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListDataSources(self, request, context): + return RegistryServer_pb2.ListDataSourcesResponse( + data_sources=[ + data_source.to_proto() + for data_source in self.proxied_registry.list_data_sources( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetFeatureView( + self, request: RegistryServer_pb2.GetFeatureViewRequest, context + ): + return self.proxied_registry.get_feature_view( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListFeatureViews(self, request, context): + return RegistryServer_pb2.ListFeatureViewsResponse( + feature_views=[ + feature_view.to_proto() + for feature_view in self.proxied_registry.list_feature_views( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetRequestFeatureView( + self, request: RegistryServer_pb2.GetRequestFeatureViewRequest, context + ): + return self.proxied_registry.get_request_feature_view( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListRequestFeatureViews(self, request, context): + return RegistryServer_pb2.ListRequestFeatureViewsResponse( + request_feature_views=[ + request_feature_view.to_proto() + for request_feature_view in self.proxied_registry.list_request_feature_views( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetStreamFeatureView( + self, request: RegistryServer_pb2.GetStreamFeatureViewRequest, context + ): + return self.proxied_registry.get_stream_feature_view( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListStreamFeatureViews(self, request, context): + return RegistryServer_pb2.ListStreamFeatureViewsResponse( + stream_feature_views=[ + stream_feature_view.to_proto() + for stream_feature_view in self.proxied_registry.list_stream_feature_views( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetOnDemandFeatureView( + self, request: RegistryServer_pb2.GetOnDemandFeatureViewRequest, context + ): + return self.proxied_registry.get_on_demand_feature_view( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListOnDemandFeatureViews(self, request, context): + return RegistryServer_pb2.ListOnDemandFeatureViewsResponse( + on_demand_feature_views=[ + on_demand_feature_view.to_proto() + for on_demand_feature_view in self.proxied_registry.list_on_demand_feature_views( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetFeatureService( + self, request: RegistryServer_pb2.GetFeatureServiceRequest, context + ): + return self.proxied_registry.get_feature_service( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListFeatureServices( + self, request: RegistryServer_pb2.ListFeatureServicesRequest, context + ): + return RegistryServer_pb2.ListFeatureServicesResponse( + feature_services=[ + feature_service.to_proto() + for feature_service in self.proxied_registry.list_feature_services( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetSavedDataset( + self, request: RegistryServer_pb2.GetSavedDatasetRequest, context + ): + return self.proxied_registry.get_saved_dataset( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListSavedDatasets( + self, request: RegistryServer_pb2.ListSavedDatasetsRequest, context + ): + return RegistryServer_pb2.ListSavedDatasetsResponse( + saved_datasets=[ + saved_dataset.to_proto() + for saved_dataset in self.proxied_registry.list_saved_datasets( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetValidationReference( + self, request: RegistryServer_pb2.GetValidationReferenceRequest, context + ): + return self.proxied_registry.get_validation_reference( + name=request.name, project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def ListValidationReferences( + self, request: RegistryServer_pb2.ListValidationReferencesRequest, context + ): + return RegistryServer_pb2.ListValidationReferencesResponse( + validation_references=[ + validation_reference.to_proto() + for validation_reference in self.proxied_registry.list_validation_references( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def ListProjectMetadata( + self, request: RegistryServer_pb2.ListProjectMetadataRequest, context + ): + return RegistryServer_pb2.ListProjectMetadataResponse( + project_metadata=[ + project_metadata.to_proto() + for project_metadata in self.proxied_registry.list_project_metadata( + project=request.project, allow_cache=request.allow_cache + ) + ] + ) + + def GetInfra(self, request: RegistryServer_pb2.GetInfraRequest, context): + return self.proxied_registry.get_infra( + project=request.project, allow_cache=request.allow_cache + ).to_proto() + + def Refresh(self, request, context): + self.proxied_registry.refresh(request.project) + return Empty() + + def Proto(self, request, context): + return self.proxied_registry.proto() + + +def start_server(store: FeatureStore, port: int): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + RegistryServer_pb2_grpc.add_RegistryServerServicer_to_server( + RegistryServer(store), server + ) + server.add_insecure_port(f"[::]:{port}") + server.start() + server.wait_for_termination() diff --git a/sdk/python/tests/unit/test_registry_server.py b/sdk/python/tests/unit/test_registry_server.py new file mode 100644 index 00000000000..734bbfe19b8 --- /dev/null +++ b/sdk/python/tests/unit/test_registry_server.py @@ -0,0 +1,60 @@ +import assertpy +import grpc_testing +import pytest +from google.protobuf.empty_pb2 import Empty + +from feast import Entity, FeatureStore +from feast.protos.feast.registry import RegistryServer_pb2 +from feast.registry_server import RegistryServer + + +def call_registry_server(server, method: str, request=None): + service = RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"] + rpc = server.invoke_unary_unary( + service.methods_by_name[method], (), request if request else Empty(), None + ) + + return rpc.termination() + + +@pytest.fixture +def registry_server(environment): + store: FeatureStore = environment.feature_store + + servicer = RegistryServer(store=store) + + return grpc_testing.server_from_dictionary( + {RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"]: servicer}, + grpc_testing.strict_real_time(), + ) + + +def test_registry_server_get_entity(environment, registry_server): + store: FeatureStore = environment.feature_store + entity = Entity(name="driver", join_keys=["driver_id"]) + store.apply(entity) + + expected = store.get_entity(entity.name) + + get_entity_request = RegistryServer_pb2.GetEntityRequest( + name=entity.name, project=store.project, allow_cache=False + ) + response, trailing_metadata, code, details = call_registry_server( + registry_server, "GetEntity", get_entity_request + ) + response_entity = Entity.from_proto(response) + + assertpy.assert_that(response_entity).is_equal_to(expected) + + +def test_registry_server_proto(environment, registry_server): + store: FeatureStore = environment.feature_store + entity = Entity(name="driver", join_keys=["driver_id"]) + store.apply(entity) + + expected = store.registry.proto() + response, trailing_metadata, code, details = call_registry_server( + registry_server, "Proto" + ) + + assertpy.assert_that(response).is_equal_to(expected) diff --git a/setup.py b/setup.py index 4905a7697d7..29b8dc5a685 100644 --- a/setup.py +++ b/setup.py @@ -234,7 +234,7 @@ else: use_scm_version = None -PROTO_SUBDIRS = ["core", "serving", "types", "storage"] +PROTO_SUBDIRS = ["core", "registry", "serving", "types", "storage"] PYTHON_CODE_PREFIX = "sdk/python" From b4aed657bf830502344ede5c98841d0d77ebf4ef Mon Sep 17 00:00:00 2001 From: Harry Date: Wed, 7 Feb 2024 13:50:12 +0700 Subject: [PATCH 16/30] chore: Set upper bound for moto package (#3937) chore: set upper bound for moto package Signed-off-by: Hai Nguyen --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 29b8dc5a685..4fb80871b22 100644 --- a/setup.py +++ b/setup.py @@ -155,7 +155,7 @@ "grpcio-testing>=1.56.2,<2", "minio==7.1.0", "mock==2.0.0", - "moto", + "moto<5", "mypy>=0.981,<0.990", "avro==1.10.0", "fsspec<2023.10.0", From 49d2988a562c66b3949cf2368fe44ed41e767eab Mon Sep 17 00:00:00 2001 From: Dongwoo Park <40623259+Woo-Dong@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:39:29 +0900 Subject: [PATCH 17/30] fix: Trino as an OfflineStore Access Denied when BasicAuthenticaion (#3898) --- .../infra/offline_stores/contrib/trino_offline_store/trino.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py index f662cda9130..d4cfdb66329 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py @@ -40,7 +40,7 @@ class BasicAuthModel(FeastConfigBaseModel): username: StrictStr - password: SecretStr + password: StrictStr class KerberosAuthModel(FeastConfigBaseModel): From c16e5afcc5273b0c26b79dd4e233a28618ac490a Mon Sep 17 00:00:00 2001 From: TS <67011812+tsisodia10@users.noreply.github.com> Date: Thu, 8 Feb 2024 09:54:41 -0500 Subject: [PATCH 18/30] fix: Typo Correction in Feast UI Readme (#3939) Modify the README to point to correct project list Signed-off-by: Twinkll Sisodia --- ui/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/README.md b/ui/README.md index e91a8741ec5..a9ce5d3ec73 100644 --- a/ui/README.md +++ b/ui/README.md @@ -46,7 +46,7 @@ ReactDOM.render( ); ``` -When you start the React app, it will look for `project-list.json` to find a list of your projects. The JSON should looks something like this. +When you start the React app, it will look for `projects-list.json` to find a list of your projects. The JSON should looks something like this. ```json { From bdce99d8e4581b7c59558b91840f019a16194b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20T=C3=B3th?= Date: Fri, 9 Feb 2024 23:44:47 +0100 Subject: [PATCH 19/30] docs: Add ScyllaDB as online store alternative (fixed DCO) (#3944) --- docs/SUMMARY.md | 1 + docs/reference/online-stores/README.md | 4 +- docs/reference/online-stores/scylladb.md | 94 ++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 docs/reference/online-stores/scylladb.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index c80ded2adf0..8affea898ef 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -99,6 +99,7 @@ * [MySQL (contrib)](reference/online-stores/mysql.md) * [Rockset (contrib)](reference/online-stores/rockset.md) * [Hazelcast (contrib)](reference/online-stores/hazelcast.md) + * [ScyllaDB (contrib)](reference/online-stores/scylladb.md) * [Providers](reference/providers/README.md) * [Local](reference/providers/local.md) * [Google Cloud Platform](reference/providers/google-cloud-platform.md) diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index f86e6f6a1df..d90bfcf1632 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -54,4 +54,6 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli [hazelcast.md](hazelcast.md) {% endcontent-ref %} - +{% content-ref url="scylladb.md" %} +[scylladb.md](scylladb.md) +{% endcontent-ref %} diff --git a/docs/reference/online-stores/scylladb.md b/docs/reference/online-stores/scylladb.md new file mode 100644 index 00000000000..e28e810e214 --- /dev/null +++ b/docs/reference/online-stores/scylladb.md @@ -0,0 +1,94 @@ +# ScyllaDB Cloud online store + +## Description + +ScyllaDB is a low-latency and high-performance Cassandra-compatible (uses CQL) database. You can use the existing Cassandra connector to use ScyllaDB as an online store in Feast. + +The [ScyllaDB](https://www.scylladb.com/) online store provides support for materializing feature values into a ScyllaDB or [ScyllaDB Cloud](https://www.scylladb.com/product/scylla-cloud/) cluster for serving online features real-time. + +## Getting started + +Install Feast with Cassandra support: +```bash +pip install "feast[cassandra]" +``` + +Create a new Feast project: +```bash +feast init REPO_NAME -t cassandra +``` + +### Example (ScyllaDB) + +{% code title="feature_store.yaml" %} +```yaml +project: scylla_feature_repo +registry: data/registry.db +provider: local +online_store: + type: cassandra + hosts: + - 172.17.0.2 + keyspace: feast + username: scylla + password: password +``` +{% endcode %} + +### Example (ScyllaDB Cloud) + +{% code title="feature_store.yaml" %} +```yaml +project: scylla_feature_repo +registry: data/registry.db +provider: local +online_store: + type: cassandra + hosts: + - node-0.aws_us_east_1.xxxxxxxx.clusters.scylla.cloud + - node-1.aws_us_east_1.xxxxxxxx.clusters.scylla.cloud + - node-2.aws_us_east_1.xxxxxxxx.clusters.scylla.cloud + keyspace: feast + username: scylla + password: password +``` +{% endcode %} + + +The full set of configuration options is available in [CassandraOnlineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStoreConfig). +For a full explanation of configuration options please look at file +`sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/README.md`. + +Storage specifications can be found at `docs/specs/online_store_format.md`. + +## Functionality Matrix + +The set of functionality supported by online stores is described in detail [here](overview.md#functionality). +Below is a matrix indicating which functionality is supported by the Cassandra plugin. + +| | Cassandra | +| :-------------------------------------------------------- | :-------- | +| write feature values to the online store | yes | +| read feature values from the online store | yes | +| update infrastructure (e.g. tables) in the online store | yes | +| teardown infrastructure (e.g. tables) in the online store | yes | +| generate a plan of infrastructure changes | yes | +| support for on-demand transforms | yes | +| readable by Python SDK | yes | +| readable by Java | no | +| readable by Go | no | +| support for entityless feature views | yes | +| support for concurrent writing to the same key | no | +| support for ttl (time to live) at retrieval | no | +| support for deleting expired data | no | +| collocated by feature view | yes | +| collocated by feature service | no | +| collocated by entity key | no | + +To compare this set of functionality against other online stores, please see the full [functionality matrix](overview.md#functionality-matrix). + +## Resources + +* [Sample application with ScyllaDB](https://feature-store.scylladb.com/stable/) +* [ScyllaDB website](https://www.scylladb.com/) +* [ScyllaDB Cloud documentation](https://cloud.docs.scylladb.com/stable/) From 7d75fc525a7f2f46811d168ce71f91b5736ad788 Mon Sep 17 00:00:00 2001 From: Job Almekinders <55230856+job-almekinders@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:45:22 +0100 Subject: [PATCH 20/30] fix: Add conn.commit() to Postgresonline_write_batch.online_write_batch (#3904) --- sdk/python/feast/infra/online_stores/contrib/postgres.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py index a12e66f1090..49f87ddb0ae 100644 --- a/sdk/python/feast/infra/online_stores/contrib/postgres.py +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -99,6 +99,7 @@ def online_write_batch( cur_batch, page_size=batch_size, ) + conn.commit() if progress: progress(len(cur_batch)) From d3a2a45d9bc2b690a7aa784ec7b0411e91244dab Mon Sep 17 00:00:00 2001 From: Tornike Gurgenidze Date: Sat, 10 Feb 2024 02:45:38 +0400 Subject: [PATCH 21/30] fix: Transformation server doesn't generate files from proto (#3902) --- sdk/python/feast/infra/transformation_servers/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/transformation_servers/Dockerfile b/sdk/python/feast/infra/transformation_servers/Dockerfile index c072ed01604..41f272c757c 100644 --- a/sdk/python/feast/infra/transformation_servers/Dockerfile +++ b/sdk/python/feast/infra/transformation_servers/Dockerfile @@ -15,7 +15,7 @@ COPY README.md README.md # Install dependencies -RUN --mount=source=.git,target=.git,type=bind pip3 install --no-cache-dir -e '.[gcp,aws]' +RUN --mount=source=.git,target=.git,type=bind pip3 install --no-cache-dir '.[gcp,aws]' # Start feature transformation server CMD [ "python", "app.py" ] From 4e450ad3b1b6d2f66fd87e07805bb57772390142 Mon Sep 17 00:00:00 2001 From: Chester Date: Sat, 10 Feb 2024 21:00:09 +0800 Subject: [PATCH 22/30] chore: Bumping fastapi + starlette (#3938) --- Makefile | 2 +- sdk/python/feast/data_source.py | 19 +++++----- sdk/python/feast/feature_service.py | 2 +- sdk/python/feast/feature_view.py | 2 +- sdk/python/feast/importer.py | 3 +- .../infra/contrib/spark_kafka_processor.py | 11 +++++- .../feast/infra/contrib/stream_processor.py | 11 +++--- .../athena_offline_store/athena_source.py | 6 +-- .../athena_offline_store/tests/data_source.py | 2 +- .../mssql_offline_store/tests/data_source.py | 8 ++-- .../tests/data_source.py | 2 +- .../spark_offline_store/tests/data_source.py | 4 +- .../feast/infra/offline_stores/file_source.py | 2 +- .../infra/offline_stores/offline_store.py | 37 +++++++------------ .../feast/infra/offline_stores/redshift.py | 6 +-- .../infra/offline_stores/snowflake_source.py | 4 +- .../feast/infra/online_stores/dynamodb.py | 4 +- .../feast/infra/passthrough_provider.py | 2 +- sdk/python/feast/infra/provider.py | 2 +- .../feast/infra/registry/base_registry.py | 2 + .../feast/infra/registry/registry_store.py | 4 +- sdk/python/feast/infra/registry/snowflake.py | 2 +- sdk/python/feast/infra/utils/aws_utils.py | 2 +- sdk/python/feast/infra/utils/hbase_utils.py | 8 ++-- .../infra/utils/snowflake/snowflake_utils.py | 6 ++- sdk/python/feast/type_map.py | 9 +++-- .../requirements/py3.10-ci-requirements.txt | 26 ++++--------- .../requirements/py3.10-requirements.txt | 12 ++---- .../requirements/py3.8-ci-requirements.txt | 26 ++++--------- .../requirements/py3.8-requirements.txt | 12 ++---- .../requirements/py3.9-ci-requirements.txt | 26 ++++--------- .../requirements/py3.9-requirements.txt | 12 ++---- sdk/python/tests/data/data_creator.py | 2 +- sdk/python/tests/foo_provider.py | 8 ++-- .../universal/data_source_creator.py | 4 +- .../universal/data_sources/bigquery.py | 2 +- .../universal/data_sources/file.py | 6 +-- .../universal/data_sources/redshift.py | 2 +- .../universal/data_sources/snowflake.py | 2 +- .../universal/online_store_creator.py | 4 +- .../offline_stores/test_offline_store.py | 15 +++++--- setup.py | 4 +- 42 files changed, 147 insertions(+), 178 deletions(-) diff --git a/Makefile b/Makefile index 4b85c0e4483..6736e64078f 100644 --- a/Makefile +++ b/Makefile @@ -310,7 +310,7 @@ format-python: cd ${ROOT_DIR}/sdk/python; python -m black --target-version py38 feast tests lint-python: - cd ${ROOT_DIR}/sdk/python; python -m mypy + cd ${ROOT_DIR}/sdk/python; python -m mypy --exclude=/tests/ --follow-imports=skip feast cd ${ROOT_DIR}/sdk/python; python -m isort feast/ tests/ --check-only cd ${ROOT_DIR}/sdk/python; python -m flake8 feast/ tests/ cd ${ROOT_DIR}/sdk/python; python -m black --check feast tests diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b7ce19aad9b..3421fd5d309 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import enum import warnings from abc import ABC, abstractmethod @@ -485,12 +484,12 @@ def to_proto(self) -> DataSourceProto: return data_source_proto def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: @@ -534,12 +533,12 @@ def __init__( self.schema = schema def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError def __eq__(self, other): if not isinstance(other, RequestSource): @@ -610,12 +609,12 @@ def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: @typechecked class KinesisSource(DataSource): def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def from_proto(data_source: DataSourceProto): @@ -639,7 +638,7 @@ def from_proto(data_source: DataSourceProto): @staticmethod def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: - pass + raise NotImplementedError def get_table_query_string(self) -> str: raise NotImplementedError @@ -772,12 +771,12 @@ def __hash__(self): return super().__hash__() def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def from_proto(data_source: DataSourceProto): diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index c3037a55da2..7ec923205a3 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -56,7 +56,7 @@ def __init__( *, name: str, features: List[Union[FeatureView, OnDemandFeatureView]], - tags: Dict[str, str] = None, + tags: Optional[Dict[str, str]] = None, description: str = "", owner: str = "", logging_config: Optional[LoggingConfig] = None, diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 67f9662d317..f87ae7ab132 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -101,7 +101,7 @@ def __init__( name: str, source: DataSource, schema: Optional[List[Field]] = None, - entities: List[Entity] = None, + entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = timedelta(days=0), online: bool = True, description: str = "", diff --git a/sdk/python/feast/importer.py b/sdk/python/feast/importer.py index bbd592101a6..d1d7d629010 100644 --- a/sdk/python/feast/importer.py +++ b/sdk/python/feast/importer.py @@ -1,4 +1,5 @@ import importlib +from typing import Optional from feast.errors import ( FeastClassImportError, @@ -7,7 +8,7 @@ ) -def import_class(module_name: str, class_name: str, class_type: str = None): +def import_class(module_name: str, class_name: str, class_type: Optional[str] = None): """ Dynamically loads and returns a class from a module. diff --git a/sdk/python/feast/infra/contrib/spark_kafka_processor.py b/sdk/python/feast/infra/contrib/spark_kafka_processor.py index ea55d89988a..bac1c28b064 100644 --- a/sdk/python/feast/infra/contrib/spark_kafka_processor.py +++ b/sdk/python/feast/infra/contrib/spark_kafka_processor.py @@ -5,6 +5,7 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql.avro.functions import from_avro from pyspark.sql.functions import col, from_json +from pyspark.sql.streaming import StreamingQuery from feast.data_format import AvroFormat, JsonFormat from feast.data_source import KafkaSource, PushMode @@ -63,7 +64,13 @@ def __init__( self.join_keys = [fs.get_entity(entity).join_key for entity in sfv.entities] super().__init__(fs=fs, sfv=sfv, data_source=sfv.stream_source) - def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: + # Type hinting for data_source type. + # data_source type has been checked to be an instance of KafkaSource. + self.data_source: KafkaSource = self.data_source # type: ignore + + def ingest_stream_feature_view( + self, to: PushMode = PushMode.ONLINE + ) -> StreamingQuery: ingested_stream_df = self._ingest_stream_data() transformed_df = self._construct_transformation_plan(ingested_stream_df) online_store_query = self._write_stream_data(transformed_df, to) @@ -122,7 +129,7 @@ def _ingest_stream_data(self) -> StreamTable: def _construct_transformation_plan(self, df: StreamTable) -> StreamTable: return self.sfv.udf.__call__(df) if self.sfv.udf else df - def _write_stream_data(self, df: StreamTable, to: PushMode): + def _write_stream_data(self, df: StreamTable, to: PushMode) -> StreamingQuery: # Validation occurs at the fs.write_to_online_store() phase against the stream feature view schema. def batch_write(row: DataFrame, batch_id: int): rows: pd.DataFrame = row.toPandas() diff --git a/sdk/python/feast/infra/contrib/stream_processor.py b/sdk/python/feast/infra/contrib/stream_processor.py index 24817c82eaa..df4e144f8c6 100644 --- a/sdk/python/feast/infra/contrib/stream_processor.py +++ b/sdk/python/feast/infra/contrib/stream_processor.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Optional from pyspark.sql import DataFrame +from typing_extensions import TypeAlias from feast.data_source import DataSource, PushMode from feast.importer import import_class @@ -17,7 +18,7 @@ } # TODO: support more types other than just Spark. -StreamTable = DataFrame +StreamTable: TypeAlias = DataFrame class ProcessorConfig(FeastConfigBaseModel): @@ -54,13 +55,13 @@ def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: Ingests data from the stream source attached to the stream feature view; transforms the data and then persists it to the online store and/or offline store, depending on the 'to' parameter. """ - pass + raise NotImplementedError def _ingest_stream_data(self) -> StreamTable: """ Ingests data into a StreamTable. """ - pass + raise NotImplementedError def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: """ @@ -68,14 +69,14 @@ def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: evaluation, the StreamTable will not be materialized until it is actually evaluated. For example: df.collect() in spark or tbl.execute() in Flink. """ - pass + raise NotImplementedError def _write_stream_data(self, table: StreamTable, to: PushMode) -> None: """ Launches a job to persist stream data to the online store and/or offline store, depending on the 'to' parameter, and returns a handle for the job. """ - pass + raise NotImplementedError def get_stream_processor_object( diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py index 8e9e3893f3a..0aca42cd682 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py @@ -297,9 +297,9 @@ class SavedDatasetAthenaStorage(SavedDatasetStorage): def __init__( self, table_ref: str, - query: str = None, - database: str = None, - data_source: str = None, + query: Optional[str] = None, + database: Optional[str] = None, + data_source: Optional[str] = None, ): self.athena_options = AthenaOptions( table=table_ref, query=query, database=database, data_source=data_source diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py index 384ab69e81f..f68e109d6c1 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: table_name = destination_name diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py index 9b751d98efe..2604cf7c18b 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Optional import pandas as pd import pytest @@ -66,7 +66,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: # Make sure the field mapping is correct and convert the datetime datasources. @@ -99,10 +99,10 @@ def create_data_source( ) def create_saved_dataset_destination(self) -> SavedDatasetStorage: - pass + raise NotImplementedError def get_prefixed_table_name(self, destination_name: str) -> str: return f"{self.project_name}_{destination_name}" def teardown(self): - pass + raise NotImplementedError diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index f4479501323..224fcea30f9 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -85,7 +85,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py index 71c07b20c27..7b4fda3b5f5 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py @@ -2,7 +2,7 @@ import shutil import tempfile import uuid -from typing import Dict, List +from typing import Dict, List, Optional import pandas as pd from pyspark import SparkConf @@ -70,7 +70,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: if timestamp_field in df: diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index ac824b359f4..887b4100796 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -183,7 +183,7 @@ def create_filesystem_and_path( return None, path def get_table_query_string(self) -> str: - pass + raise NotImplementedError class FileOptions: diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 6141e3c435b..30135feccb3 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import warnings -from abc import ABC, abstractmethod +from abc import ABC from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union @@ -150,9 +150,8 @@ def to_sql(self) -> str: """ Return RetrievalJob generated SQL statement if applicable. """ - pass + raise NotImplementedError - @abstractmethod def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. @@ -162,9 +161,8 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: Does not handle on demand transformations or dataset validation. For either of those, `to_df` should be used. """ - pass + raise NotImplementedError - @abstractmethod def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """ Synchronously executes the underlying query and returns the result as an arrow table. @@ -174,21 +172,18 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: Does not handle on demand transformations or dataset validation. For either of those, `to_arrow` should be used. """ - pass + raise NotImplementedError @property - @abstractmethod def full_feature_names(self) -> bool: """Returns True if full feature names should be applied to the results of the query.""" - pass + raise NotImplementedError @property - @abstractmethod def on_demand_feature_views(self) -> List[OnDemandFeatureView]: """Returns a list containing all the on demand feature views to be handled.""" - pass + raise NotImplementedError - @abstractmethod def persist( self, storage: SavedDatasetStorage, @@ -204,13 +199,12 @@ def persist( allow_overwrite: If True, a pre-existing location (e.g. table or file) can be overwritten. Currently not all individual offline store implementations make use of this parameter. """ - pass + raise NotImplementedError @property - @abstractmethod def metadata(self) -> Optional[RetrievalMetadata]: """Returns metadata about the retrieval job.""" - pass + raise NotImplementedError def supports_remote_storage_export(self) -> bool: """Returns True if the RetrievalJob supports `to_remote_storage`.""" @@ -226,7 +220,7 @@ def to_remote_storage(self) -> List[str]: Returns: A list of parquet file paths in remote storage. """ - raise NotImplementedError() + raise NotImplementedError class OfflineStore(ABC): @@ -239,7 +233,6 @@ class OfflineStore(ABC): """ @staticmethod - @abstractmethod def pull_latest_from_table_or_query( config: RepoConfig, data_source: DataSource, @@ -270,10 +263,9 @@ def pull_latest_from_table_or_query( Returns: A RetrievalJob that can be executed to get the entity rows. """ - pass + raise NotImplementedError @staticmethod - @abstractmethod def get_historical_features( config: RepoConfig, feature_views: List[FeatureView], @@ -302,10 +294,9 @@ def get_historical_features( Returns: A RetrievalJob that can be executed to get the features. """ - pass + raise NotImplementedError @staticmethod - @abstractmethod def pull_all_from_table_or_query( config: RepoConfig, data_source: DataSource, @@ -334,7 +325,7 @@ def pull_all_from_table_or_query( Returns: A RetrievalJob that can be executed to get the entity rows. """ - pass + raise NotImplementedError @staticmethod def write_logged_features( @@ -358,7 +349,7 @@ def write_logged_features( logging_config: A LoggingConfig object that determines where the logs will be written. registry: The registry for the current feature store. """ - raise NotImplementedError() + raise NotImplementedError @staticmethod def offline_write_batch( @@ -377,4 +368,4 @@ def offline_write_batch( progress: Function to be called once a portion of the data has been written, used to show progress. """ - raise NotImplementedError() + raise NotImplementedError diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 837cf49655d..6034bf5ac7b 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -51,13 +51,13 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): type: Literal["redshift"] = "redshift" """ Offline store type selector""" - cluster_id: Optional[StrictStr] + cluster_id: Optional[StrictStr] = None """ Redshift cluster identifier, for provisioned clusters """ - user: Optional[StrictStr] + user: Optional[StrictStr] = None """ Redshift user name, only required for provisioned clusters """ - workgroup: Optional[StrictStr] + workgroup: Optional[StrictStr] = None """ Redshift workgroup identifier, for serverless """ region: StrictStr diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index 0cbf82dd1c1..e29197c68d4 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -1,5 +1,5 @@ import warnings -from typing import Callable, Dict, Iterable, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple from typeguard import typechecked @@ -223,7 +223,7 @@ def get_table_column_names_and_types( query = f"SELECT * FROM {self.get_table_query_string()} LIMIT 5" cursor = execute_snowflake_statement(conn, query) - metadata = [ + metadata: List[Dict[str, Any]] = [ { "column_name": column.name, "type_code": column.type_code, diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 525978e736b..a1eef16f40d 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -288,12 +288,12 @@ def _get_dynamodb_resource(self, region: str, endpoint_url: Optional[str] = None ) return self._dynamodb_resource - def _sort_dynamodb_response(self, responses: list, order: list): + def _sort_dynamodb_response(self, responses: list, order: list) -> Any: """DynamoDB Batch Get Item doesn't return items in a particular order.""" # Assign an index to order order_with_index = {value: idx for idx, value in enumerate(order)} # Sort table responses by index - table_responses_ordered = [ + table_responses_ordered: Any = [ (order_with_index[tbl_res["entity_id"]], tbl_res) for tbl_res in responses ] table_responses_ordered = sorted( diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 28b10c12595..811abe106c3 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -180,7 +180,7 @@ def online_read( config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List: set_usage_attribute("provider", self.__class__.__name__) result = [] diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 82879b264af..2a9670cacef 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -211,7 +211,7 @@ def online_read( config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: """ Reads features values for the given entity keys. diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index 8928a5800dd..f89b0794788 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -503,7 +503,9 @@ def list_validation_references( Returns: List of request feature views """ + raise NotImplementedError + @abstractmethod def list_project_metadata( self, project: str, allow_cache: bool = False ) -> List[ProjectMetadata]: diff --git a/sdk/python/feast/infra/registry/registry_store.py b/sdk/python/feast/infra/registry/registry_store.py index c42a55cd9d2..5151fd74b27 100644 --- a/sdk/python/feast/infra/registry/registry_store.py +++ b/sdk/python/feast/infra/registry/registry_store.py @@ -17,7 +17,7 @@ def get_registry_proto(self) -> RegistryProto: Returns: Returns either the registry proto stored at the registry path, or an empty registry proto. """ - pass + raise NotImplementedError @abstractmethod def update_registry_proto(self, registry_proto: RegistryProto): @@ -40,7 +40,7 @@ def teardown(self): class NoopRegistryStore(RegistryStore): def get_registry_proto(self) -> RegistryProto: - pass + return RegistryProto() def update_registry_proto(self, registry_proto: RegistryProto): pass diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index 56c7bc1f659..c1ebf13d6b8 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -418,7 +418,7 @@ def _delete_object( """ cursor = execute_snowflake_statement(conn, query) - if cursor.rowcount < 1 and not_found_exception: + if cursor.rowcount < 1 and not_found_exception: # type: ignore raise not_found_exception(name, project) self._set_last_updated_metadata(datetime.utcnow(), project) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index ef83c6d1c62..c3604ee41f0 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -816,7 +816,7 @@ def execute_athena_query( database: str, workgroup: str, query: str, - temp_table: str = None, + temp_table: Optional[str] = None, ) -> str: """Execute athena statement synchronously. Waits for the query to finish. diff --git a/sdk/python/feast/infra/utils/hbase_utils.py b/sdk/python/feast/infra/utils/hbase_utils.py index d44f93f1619..72afda2ef3d 100644 --- a/sdk/python/feast/infra/utils/hbase_utils.py +++ b/sdk/python/feast/infra/utils/hbase_utils.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from happybase import ConnectionPool @@ -38,9 +38,9 @@ class HBaseConnector: def __init__( self, - pool: ConnectionPool = None, - host: str = None, - port: int = None, + pool: Optional[ConnectionPool] = None, + host: Optional[str] = None, + port: Optional[int] = None, connection_pool_size: int = 4, ): if pool is None: diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index a4cda89a6f6..8eb5177ac23 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -43,7 +43,11 @@ class GetSnowflakeConnection: - def __init__(self, config: str, autocommit=True): + def __init__( + self, + config: str, + autocommit=True, + ): self.config = config self.autocommit = autocommit diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index e51e1e743bb..ad3e273d37b 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -51,7 +51,7 @@ import pyarrow # null timestamps get converted to -9223372036854775808 -NULL_TIMESTAMP_INT_VALUE = np.datetime64("NaT").astype(int) +NULL_TIMESTAMP_INT_VALUE: int = np.datetime64("NaT").astype(int) def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: @@ -114,7 +114,10 @@ def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: def python_type_to_feast_value_type( - name: str, value: Any = None, recurse: bool = True, type_name: Optional[str] = None + name: str, + value: Optional[Any] = None, + recurse: bool = True, + type_name: Optional[str] = None, ) -> ValueType: """ Finds the equivalent Feast Value Type for a Python value. Both native @@ -321,7 +324,7 @@ def _python_datetime_to_int_timestamp( elif isinstance(value, Timestamp): int_timestamps.append(int(value.ToSeconds())) elif isinstance(value, np.datetime64): - int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) + int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) # type: ignore[attr-defined] elif isinstance(value, type(np.nan)): int_timestamps.append(NULL_TIMESTAMP_INT_VALUE) else: diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 740356907d8..9435a68deb7 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -121,9 +121,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -173,7 +171,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -226,9 +224,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -462,7 +458,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -801,9 +797,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -829,14 +823,12 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -961,9 +953,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 18486d7fa9a..5d5d451e148 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -175,12 +175,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -210,9 +208,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 3bda9e72f9f..808a58e11be 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -127,9 +127,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -178,7 +176,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -231,9 +229,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -478,7 +474,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -824,9 +820,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -846,14 +840,12 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -981,9 +973,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index c180c50c81e..163fa4c9a87 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -180,12 +180,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -216,9 +214,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6989d5b4ccf..f9d7ac3fb9f 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -121,9 +121,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -173,7 +171,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -226,9 +224,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -469,7 +465,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -810,9 +806,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -838,14 +832,12 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -973,9 +965,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 3b6f88b4e2a..4d9b8f107de 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -175,12 +175,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -211,9 +209,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/tests/data/data_creator.py b/sdk/python/tests/data/data_creator.py index 8d5b1979fa3..1fc66aee845 100644 --- a/sdk/python/tests/data/data_creator.py +++ b/sdk/python/tests/data/data_creator.py @@ -9,7 +9,7 @@ def create_basic_driver_dataset( entity_type: FeastType = Int32, - feature_dtype: str = None, + feature_dtype: Optional[str] = None, feature_is_list: bool = False, list_has_empty_list: bool = False, ) -> pd.DataFrame: diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index d27e2645d4e..ba256a3813c 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -71,16 +71,16 @@ def get_historical_features( project: str, full_feature_names: bool = False, ) -> RetrievalJob: - pass + return RetrievalJob() def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: - pass + return [] def retrieve_saved_dataset(self, config: RepoConfig, dataset: SavedDataset): pass @@ -102,4 +102,4 @@ def retrieve_feature_service_logs( config: RepoConfig, registry: BaseRegistry, ) -> RetrievalJob: - pass + return RetrievalJob() diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index b36af0db472..d64463606ff 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -20,7 +20,7 @@ def create_data_source( destination_name: str, event_timestamp_column="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = None, ) -> DataSource: """ @@ -53,7 +53,7 @@ def create_saved_dataset_destination(self) -> SavedDatasetStorage: ... def create_logged_features_destination(self) -> LoggingDestination: - pass + raise NotImplementedError @abstractmethod def teardown(self): diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py index 384037eef14..215d19ba7f3 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py @@ -66,7 +66,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 124dd4c88d6..3263785683e 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -41,7 +41,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -96,7 +96,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -171,7 +171,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: filename = f"{destination_name}.parquet" port = self.minio.get_exposed_port("9000") diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index dfe8e3d33bf..e6f20d6125b 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index c14780da97d..1414291a18d 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py index c3872ea697f..10a81437395 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py @@ -8,7 +8,7 @@ def __init__(self, project_name: str, **kwargs): self.project_name = project_name def create_online_store(self) -> FeastConfigBaseModel: - ... + raise NotImplementedError def teardown(self): - ... + raise NotImplementedError diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py index ef0cce04707..220bdba0dae 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -39,6 +39,9 @@ class MockRetrievalJob(RetrievalJob): + def to_sql(self) -> str: + return "" + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. @@ -46,7 +49,7 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: Does not handle on demand transformations or dataset validation. For either of those, `to_df` should be used. """ - pass + return pd.DataFrame() def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """ @@ -55,17 +58,17 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: Does not handle on demand transformations or dataset validation. For either of those, `to_arrow` should be used. """ - pass + return pyarrow.Table() @property def full_feature_names(self) -> bool: """Returns True if full feature names should be applied to the results of the query.""" - pass + return False @property def on_demand_feature_views(self) -> List[OnDemandFeatureView]: """Returns a list containing all the on demand feature views to be handled.""" - pass + return [] def persist( self, @@ -87,7 +90,7 @@ def persist( @property def metadata(self) -> Optional[RetrievalMetadata]: """Returns metadata about the retrieval job.""" - pass + raise NotImplementedError # Since RetreivalJob are not really tested for subclasses we add some tests here. @@ -208,7 +211,7 @@ def retrieval_job(request, environment): def test_to_sql(): - assert MockRetrievalJob().to_sql() is None + assert MockRetrievalJob().to_sql() == "" @pytest.mark.parametrize("timeout", (None, 30)) diff --git a/setup.py b/setup.py index 4fb80871b22..81ae63a7a44 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ "toml>=0.10.0,<1", "tqdm>=4,<5", "typeguard==2.13.3", - "fastapi>=0.68.0,<0.100", + "fastapi>=0.68.0", "uvicorn[standard]>=0.14.0,<1", "gunicorn", "dask>=2021.1.0", @@ -156,7 +156,7 @@ "minio==7.1.0", "mock==2.0.0", "moto<5", - "mypy>=0.981,<0.990", + "mypy>=1.4.1", "avro==1.10.0", "fsspec<2023.10.0", "urllib3>=1.25.4,<3", From dbb59ba0932e5962b34b14e7218a1ddae86a9686 Mon Sep 17 00:00:00 2001 From: Tornike Gurgenidze Date: Tue, 13 Feb 2024 23:45:28 +0400 Subject: [PATCH 23/30] fix: Rewrite Spark materialization engine to use mapInPandas (#3936) rewrite spark materilization engine to use mapInPandas Signed-off-by: tokoko --- .../spark/spark_materialization_engine.py | 67 ++++++++++--------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/sdk/python/feast/infra/materialization/contrib/spark/spark_materialization_engine.py b/sdk/python/feast/infra/materialization/contrib/spark/spark_materialization_engine.py index ed4388aeb31..798d3a8e6f5 100644 --- a/sdk/python/feast/infra/materialization/contrib/spark/spark_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/contrib/spark/spark_materialization_engine.py @@ -3,6 +3,7 @@ from typing import Callable, List, Literal, Optional, Sequence, Union, cast import dill +import pandas import pandas as pd import pyarrow from tqdm import tqdm @@ -178,9 +179,9 @@ def _materialize_one( self.repo_config.batch_engine.partitions ) - spark_df.foreachPartition( - lambda x: _process_by_partition(x, spark_serialized_artifacts) - ) + spark_df.mapInPandas( + lambda x: _map_by_partition(x, spark_serialized_artifacts), "status int" + ).count() # dummy action to force evaluation return SparkMaterializationJob( job_id=job_id, status=MaterializationJobStatus.SUCCEEDED @@ -225,38 +226,40 @@ def unserialize(self): return feature_view, online_store, repo_config -def _process_by_partition(rows, spark_serialized_artifacts: _SparkSerializedArtifacts): - """Load pandas df to online store""" - - # convert to pyarrow table - dicts = [] - for row in rows: - dicts.append(row.asDict()) +def _map_by_partition(iterator, spark_serialized_artifacts: _SparkSerializedArtifacts): + for pdf in iterator: + if pdf.shape[0] == 0: + print("Skipping") + return - df = pd.DataFrame.from_records(dicts) - if df.shape[0] == 0: - print("Skipping") - return + table = pyarrow.Table.from_pandas(pdf) - table = pyarrow.Table.from_pandas(df) + ( + feature_view, + online_store, + repo_config, + ) = spark_serialized_artifacts.unserialize() + + if feature_view.batch_source.field_mapping is not None: + table = _run_pyarrow_field_mapping( + table, feature_view.batch_source.field_mapping + ) - # unserialize artifacts - feature_view, online_store, repo_config = spark_serialized_artifacts.unserialize() + join_key_to_value_type = { + entity.name: entity.dtype.to_value_type() + for entity in feature_view.entity_columns + } - if feature_view.batch_source.field_mapping is not None: - table = _run_pyarrow_field_mapping( - table, feature_view.batch_source.field_mapping + rows_to_write = _convert_arrow_to_proto( + table, feature_view, join_key_to_value_type + ) + online_store.online_write_batch( + repo_config, + feature_view, + rows_to_write, + lambda x: None, ) - join_key_to_value_type = { - entity.name: entity.dtype.to_value_type() - for entity in feature_view.entity_columns - } - - rows_to_write = _convert_arrow_to_proto(table, feature_view, join_key_to_value_type) - online_store.online_write_batch( - repo_config, - feature_view, - rows_to_write, - lambda x: None, - ) + yield pd.DataFrame( + [pd.Series(range(1, 2))] + ) # dummy result because mapInPandas needs to return something From 5c9f592890da7c4b857191050c1dacd0b39f78a0 Mon Sep 17 00:00:00 2001 From: cburroughs Date: Wed, 14 Feb 2024 17:52:47 -0500 Subject: [PATCH 24/30] chore: Loosen fsspec requirements to allow recent releases (#3922) * chore: Loosen fsspec requirements to allow recent releases (I'm not sure the project has a super consistent pattern for when to specify a maximum version, but was going for the smallest possible change.) Signed-off-by: Chris Burroughs * drop redundant fsspec now that this is in another extra Signed-off-by: Chris Burroughs * post rebase regen Signed-off-by: Chris Burroughs --------- Signed-off-by: Chris Burroughs --- .../requirements/py3.10-ci-requirements.txt | 266 +++++++++--------- .../requirements/py3.10-requirements.txt | 82 +++--- .../requirements/py3.8-ci-requirements.txt | 239 ++++++++-------- .../requirements/py3.8-requirements.txt | 80 +++--- .../requirements/py3.9-ci-requirements.txt | 265 +++++++++-------- .../requirements/py3.9-requirements.txt | 82 +++--- setup.py | 5 +- 7 files changed, 510 insertions(+), 509 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 9435a68deb7..ffb4662eb15 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -4,11 +4,12 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt # -alabaster==0.7.13 + +alabaster==0.7.16 # via sphinx altair==4.2.0 # via great-expectations -anyio==4.0.0 +anyio==4.2.0 # via # httpx # jupyter-server @@ -32,14 +33,14 @@ async-lru==2.0.4 # via jupyterlab async-timeout==4.0.3 # via redis -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing avro==1.10.0 # via feast (setup.py) -azure-core==1.29.5 +azure-core==1.30.0 # via # azure-identity # azure-storage-blob @@ -47,21 +48,21 @@ azure-identity==1.15.0 # via feast (setup.py) azure-storage-blob==12.19.0 # via feast (setup.py) -babel==2.13.1 +babel==2.14.0 # via # jupyterlab-server # sphinx -beautifulsoup4==4.12.2 +beautifulsoup4==4.12.3 # via nbconvert black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.29.2 +boto3==1.34.42 # via # feast (setup.py) # moto -botocore==1.32.2 +botocore==1.34.42 # via # boto3 # moto @@ -74,13 +75,13 @@ build==1.0.3 # pip-tools bytewax==0.15.1 # via feast (setup.py) -cachecontrol==0.13.1 +cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.2 # via google-auth -cassandra-driver==3.28.0 +cassandra-driver==3.29.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -116,13 +117,13 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -comm==0.2.0 +comm==0.2.1 # via # ipykernel # ipywidgets -coverage[toml]==7.3.2 +coverage[toml]==7.4.1 # via pytest-cov -cryptography==41.0.6 +cryptography==41.0.7 # via # azure-identity # azure-storage-blob @@ -135,11 +136,11 @@ cryptography==41.0.6 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.11.0 +dask==2024.2.0 # via feast (setup.py) -db-dtypes==1.1.1 +db-dtypes==1.2.0 # via google-cloud-bigquery -debugpy==1.8.0 +debugpy==1.8.1 # via ipykernel decorator==5.1.1 # via ipython @@ -147,14 +148,14 @@ defusedxml==0.7.1 # via nbconvert deprecation==2.1.0 # via testcontainers -dill==0.3.7 +dill==0.3.8 # via # bytewax # feast (setup.py) # multiprocess -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -docker==6.1.3 +docker==7.0.0 # via # feast (setup.py) # testcontainers @@ -162,7 +163,7 @@ docutils==0.19 # via sphinx entrypoints==0.4 # via altair -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via # anyio # ipython @@ -171,13 +172,13 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro -fastjsonschema==2.19.0 +fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 # via @@ -191,7 +192,7 @@ flake8==6.0.0 # via feast (setup.py) fqdn==1.5.1 # via jsonschema -fsspec==2023.9.2 +fsspec==2023.12.2 # via # dask # feast (setup.py) @@ -199,7 +200,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.14.0 +google-api-core[grpc]==2.17.1 # via # feast (setup.py) # firebase-admin @@ -211,9 +212,9 @@ google-api-core[grpc]==2.14.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.108.0 +google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.23.4 +google-auth==2.27.0 # via # google-api-core # google-api-python-client @@ -221,26 +222,26 @@ google-auth==2.23.4 # google-cloud-core # google-cloud-storage # kubernetes -google-auth-httplib2==0.1.1 +google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.22.0 +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) -google-cloud-bigtable==2.21.0 +google-cloud-bigtable==2.23.0 # via feast (setup.py) -google-cloud-core==2.3.3 +google-cloud-core==2.4.1 # via # google-cloud-bigquery # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.18.0 +google-cloud-datastore==2.19.0 # via feast (setup.py) -google-cloud-firestore==2.13.1 +google-cloud-firestore==2.14.0 # via firebase-admin -google-cloud-storage==2.13.0 +google-cloud-storage==2.14.0 # via # feast (setup.py) # firebase-admin @@ -248,11 +249,11 @@ google-crc32c==1.5.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.0 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos[grpc]==1.61.0 +googleapis-common-protos[grpc]==1.62.0 # via # feast (setup.py) # google-api-core @@ -260,11 +261,11 @@ googleapis-common-protos[grpc]==1.61.0 # grpcio-status great-expectations==0.15.50 # via feast (setup.py) -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # google-api-core @@ -276,15 +277,15 @@ grpcio==1.59.2 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-status==1.59.2 +grpcio-status==1.60.1 # via google-api-core -grpcio-testing==1.59.2 +grpcio-testing==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -296,9 +297,9 @@ happybase==1.2.0 # via feast (setup.py) hazelcast-python-client==5.3.0 # via feast (setup.py) -hiredis==2.2.3 +hiredis==2.3.2 # via feast (setup.py) -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httplib2==0.22.0 # via @@ -306,11 +307,13 @@ httplib2==0.22.0 # google-auth-httplib2 httptools==0.6.1 # via uvicorn -httpx==0.25.1 - # via feast (setup.py) -identify==2.5.31 +httpx==0.26.0 + # via + # feast (setup.py) + # jupyterlab +identify==2.5.34 # via pre-commit -idna==3.4 +idna==3.6 # via # anyio # httpx @@ -319,7 +322,7 @@ idna==3.4 # snowflake-connector-python imagesize==1.4.1 # via sphinx -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # dask # feast (setup.py) @@ -328,20 +331,20 @@ importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 # via pytest -ipykernel==6.26.0 +ipykernel==6.29.2 # via jupyterlab -ipython==8.17.2 +ipython==8.21.0 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.1 +ipywidgets==8.1.2 # via great-expectations isodate==0.6.1 # via azure-storage-blob isoduration==20.11.0 # via jsonschema -isort==5.12.0 +isort==5.13.2 # via feast (setup.py) jedi==0.19.1 # via ipython @@ -368,7 +371,7 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.20.0 +jsonschema[format-nongpl]==4.21.1 # via # altair # feast (setup.py) @@ -376,14 +379,14 @@ jsonschema[format-nongpl]==4.20.0 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema jupyter-client==8.6.0 # via # ipykernel # jupyter-server # nbclient -jupyter-core==5.5.0 +jupyter-core==5.7.1 # via # ipykernel # jupyter-client @@ -396,24 +399,24 @@ jupyter-events==0.9.0 # via jupyter-server jupyter-lsp==2.2.2 # via jupyterlab -jupyter-server==2.11.2 +jupyter-server==2.12.5 # via # jupyter-lsp # jupyterlab # jupyterlab-server # notebook # notebook-shim -jupyter-server-terminals==0.4.4 +jupyter-server-terminals==0.5.2 # via jupyter-server -jupyterlab==4.0.11 +jupyterlab==4.1.1 # via notebook -jupyterlab-pygments==0.2.2 +jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.25.1 +jupyterlab-server==2.25.3 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.9 +jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -421,12 +424,12 @@ locket==1.0.0 # via partd makefun==1.15.2 # via great-expectations -markupsafe==2.1.3 +markupsafe==2.1.5 # via # jinja2 # nbconvert # werkzeug -marshmallow==3.20.1 +marshmallow==3.20.2 # via great-expectations matplotlib-inline==0.1.6 # via @@ -440,23 +443,23 @@ mistune==3.0.2 # via # great-expectations # nbconvert -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.9 +moto==4.2.14 # via feast (setup.py) -msal==1.25.0 +msal==1.26.0 # via # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.1.0 # via azure-identity msgpack==1.0.7 # via cachecontrol -multiprocess==0.70.15 +multiprocess==0.70.16 # via bytewax mypy==1.8.0 # via @@ -470,7 +473,7 @@ mypy-protobuf==3.1.0 # via feast (setup.py) nbclient==0.9.0 # via nbconvert -nbconvert==7.11.0 +nbconvert==7.16.0 # via jupyter-server nbformat==5.9.2 # via @@ -478,11 +481,11 @@ nbformat==5.9.2 # jupyter-server # nbclient # nbconvert -nest-asyncio==1.5.8 +nest-asyncio==1.6.0 # via ipykernel nodeenv==1.8.0 # via pre-commit -notebook==7.0.6 +notebook==7.1.0 # via great-expectations notebook-shim==0.2.3 # via @@ -500,7 +503,7 @@ numpy==1.24.4 # scipy oauthlib==3.2.2 # via requests-oauthlib -overrides==7.4.0 +overrides==7.7.0 # via jupyter-server packaging==23.2 # via @@ -517,6 +520,7 @@ packaging==23.2 # jupyterlab # jupyterlab-server # marshmallow + # msal-extensions # nbconvert # pytest # snowflake-connector-python @@ -532,17 +536,17 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 +pandocfilters==1.5.1 # via nbconvert parso==0.8.3 # via jedi partd==1.4.1 # via dask -pathspec==0.11.2 +pathspec==0.12.1 # via black pbr==6.0.0 # via mock -pexpect==4.8.0 +pexpect==4.9.0 # via ipython pip-tools==7.3.0 # via feast (setup.py) @@ -552,7 +556,7 @@ platformdirs==3.11.0 # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.3.0 +pluggy==1.4.0 # via pytest ply==3.11 # via thriftpy2 @@ -560,11 +564,11 @@ portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 # via feast (setup.py) -prometheus-client==0.18.0 +prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.41 +prompt-toolkit==3.0.43 # via ipython -proto-plus==1.22.3 +proto-plus==1.23.0 # via # feast (setup.py) # google-cloud-bigquery @@ -608,13 +612,13 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==14.0.1 +pyarrow==15.0.0 # via # db-dtypes # feast (setup.py) # google-cloud-bigquery # snowflake-connector-python -pyasn1==0.5.0 +pyasn1==0.5.1 # via # pyasn1-modules # rsa @@ -626,14 +630,14 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) # great-expectations pyflakes==3.0.1 # via flake8 -pygments==2.16.1 +pygments==2.17.2 # via # feast (setup.py) # ipython @@ -643,11 +647,11 @@ pyjwt[crypto]==2.8.0 # via # msal # snowflake-connector-python -pymssql==2.2.10 +pymssql==2.2.11 # via feast (setup.py) pymysql==1.1.0 # via feast (setup.py) -pyodbc==5.0.1 +pyodbc==5.1.0 # via feast (setup.py) pyopenssl==23.3.0 # via snowflake-connector-python @@ -659,7 +663,7 @@ pyproject-hooks==1.0.0 # via build pyspark==3.5.0 # via feast (setup.py) -pytest==7.4.3 +pytest==7.4.4 # via # feast (setup.py) # pytest-benchmark @@ -681,7 +685,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.4.0 +pytest-xdist==3.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -695,11 +699,11 @@ python-dateutil==2.8.2 # pandas # rockset # trino -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events -pytz==2023.3.post1 +pytz==2024.1 # via # great-expectations # pandas @@ -714,19 +718,19 @@ pyyaml==6.0.1 # pre-commit # responses # uvicorn -pyzmq==25.1.1 +pyzmq==25.1.2 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2023.10.3 +regex==2023.12.25 # via feast (setup.py) requests==2.31.0 # via @@ -749,7 +753,7 @@ requests==2.31.0 # trino requests-oauthlib==1.3.1 # via kubernetes -responses==0.24.1 +responses==0.25.0 # via moto rfc3339-validator==0.1.4 # via @@ -761,7 +765,7 @@ rfc3986-validator==0.1.1 # jupyter-events rockset==2.1.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -769,9 +773,9 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -s3transfer==0.7.0 +s3transfer==0.10.0 # via boto3 -scipy==1.11.3 +scipy==1.12.0 # via great-expectations send2trash==1.8.2 # via jupyter-server @@ -780,7 +784,6 @@ six==1.16.0 # asttokens # azure-core # bleach - # cassandra-driver # geomet # happybase # isodate @@ -796,39 +799,33 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.5.0 +snowflake-connector-python[pandas]==3.7.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via - # feast (setup.py) - # sphinxcontrib-applehelp - # sphinxcontrib-devhelp - # sphinxcontrib-htmlhelp - # sphinxcontrib-qthelp - # sphinxcontrib-serializinghtml -sphinxcontrib-applehelp==1.0.7 + # via feast (setup.py) +sphinxcontrib-applehelp==1.0.8 # via sphinx -sphinxcontrib-devhelp==1.0.5 +sphinxcontrib-devhelp==1.0.6 # via sphinx -sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-htmlhelp==2.0.5 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.6 +sphinxcontrib-qthelp==1.0.7 # via sphinx -sphinxcontrib-serializinghtml==1.1.9 +sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -858,12 +855,12 @@ tomli==2.0.1 # pytest tomlkit==0.12.3 # via snowflake-connector-python -toolz==0.12.0 +toolz==0.12.1 # via # altair # dask # partd -tornado==6.3.3 +tornado==6.4 # via # ipykernel # jupyter-client @@ -871,11 +868,11 @@ tornado==6.3.3 # jupyterlab # notebook # terminado -tqdm==4.66.1 +tqdm==4.66.2 # via # feast (setup.py) # great-expectations -traitlets==5.13.0 +traitlets==5.14.1 # via # comm # ipykernel @@ -900,28 +897,29 @@ types-protobuf==3.19.22 # mypy-protobuf types-pymysql==1.1.0.1 # via feast (setup.py) -types-pyopenssl==23.3.0.0 +types-pyopenssl==24.0.0.20240130 # via types-redis -types-python-dateutil==2.8.19.14 +types-python-dateutil==2.8.19.20240106 # via # arrow # feast (setup.py) -types-pytz==2023.3.1.1 +types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.10 +types-redis==4.6.0.20240106 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.1 +types-setuptools==69.0.0.20240125 # via feast (setup.py) -types-tabulate==0.9.0.3 +types-tabulate==0.9.0.20240106 # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # async-lru # azure-core # azure-storage-blob @@ -951,8 +949,7 @@ urllib3==1.26.18 # requests # responses # rockset - # snowflake-connector-python -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn @@ -964,7 +961,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.10 +wcwidth==0.2.13 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -972,18 +969,17 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.6.4 +websocket-client==1.7.0 # via - # docker # jupyter-server # kubernetes websockets==12.0 # via uvicorn werkzeug==3.0.1 # via moto -wheel==0.41.3 +wheel==0.42.0 # via pip-tools -widgetsnbextension==4.0.9 +widgetsnbextension==4.0.10 # via ipywidgets wrapt==1.16.0 # via testcontainers diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 5d5d451e148..d38a287d720 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -4,21 +4,22 @@ # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt # -anyio==4.0.0 + +anyio==4.2.0 # via # httpx # starlette # watchfiles appdirs==1.4.4 # via fissix -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing bowler==0.9.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -36,35 +37,35 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.11.0 +dask==2024.2.0 # via feast (setup.py) -dill==0.3.7 +dill==0.3.8 # via feast (setup.py) -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via anyio -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.10.0 +fsspec==2024.2.0 # via dask -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # grpcio-health-checking # grpcio-reflection # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -72,18 +73,18 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httptools==0.6.1 # via uvicorn -httpx==0.25.1 +httpx==0.26.0 # via feast (setup.py) -idna==3.4 +idna==3.6 # via # anyio # httpx # requests -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # dask # feast (setup.py) @@ -91,19 +92,19 @@ importlib-resources==6.1.1 # via feast (setup.py) jinja2==3.1.3 # via feast (setup.py) -jsonschema==4.20.0 +jsonschema==4.21.1 # via feast (setup.py) -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd -markupsafe==2.1.3 +markupsafe==2.1.5 # via jinja2 -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.7.0 +mypy==1.8.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -127,7 +128,7 @@ pandavro==1.5.2 # via feast (setup.py) partd==1.4.1 # via dask -proto-plus==1.22.3 +proto-plus==1.23.0 # via feast (setup.py) protobuf==4.23.3 # via @@ -137,32 +138,32 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==14.0.1 +pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) -pygments==2.16.1 +pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 # via pandas -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn -pytz==2023.3.post1 +pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask # feast (setup.py) # uvicorn -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -174,11 +175,11 @@ sniffio==1.3.0 # via # anyio # httpx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -188,26 +189,27 @@ toml==0.10.2 # via feast (setup.py) tomli==2.0.1 # via mypy -toolz==0.12.0 +toolz==0.12.1 # via # dask # partd -tqdm==4.66.1 +tqdm==4.66.2 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==4.24.0.4 +types-protobuf==4.24.0.20240129 # via mypy-protobuf -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # fastapi # mypy # pydantic # sqlalchemy2-stubs # uvicorn -urllib3==2.1.0 +urllib3==2.2.0 # via requests -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 808a58e11be..33dd89c362e 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,11 +4,12 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.8-ci-requirements.txt # + alabaster==0.7.13 # via sphinx altair==4.2.0 # via great-expectations -anyio==4.0.0 +anyio==4.2.0 # via # httpx # jupyter-server @@ -32,14 +33,14 @@ async-lru==2.0.4 # via jupyterlab async-timeout==4.0.3 # via redis -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing avro==1.10.0 # via feast (setup.py) -azure-core==1.29.5 +azure-core==1.30.0 # via # azure-identity # azure-storage-blob @@ -47,7 +48,7 @@ azure-identity==1.15.0 # via feast (setup.py) azure-storage-blob==12.19.0 # via feast (setup.py) -babel==2.13.1 +babel==2.14.0 # via # jupyterlab-server # sphinx @@ -57,17 +58,17 @@ backports-zoneinfo==0.2.1 # via # trino # tzlocal -beautifulsoup4==4.12.2 +beautifulsoup4==4.12.3 # via nbconvert black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.29.2 +boto3==1.34.42 # via # feast (setup.py) # moto -botocore==1.32.2 +botocore==1.34.42 # via # boto3 # moto @@ -80,13 +81,13 @@ build==1.0.3 # pip-tools bytewax==0.15.1 # via feast (setup.py) -cachecontrol==0.13.1 +cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.2 # via google-auth -cassandra-driver==3.28.0 +cassandra-driver==3.29.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -122,13 +123,13 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -comm==0.2.0 +comm==0.2.1 # via # ipykernel # ipywidgets -coverage[toml]==7.3.2 +coverage[toml]==7.4.1 # via pytest-cov -cryptography==41.0.6 +cryptography==41.0.7 # via # azure-identity # azure-storage-blob @@ -143,9 +144,9 @@ cryptography==41.0.6 # types-redis dask==2023.5.0 # via feast (setup.py) -db-dtypes==1.1.1 +db-dtypes==1.2.0 # via google-cloud-bigquery -debugpy==1.8.0 +debugpy==1.8.1 # via ipykernel decorator==5.1.1 # via ipython @@ -153,14 +154,14 @@ defusedxml==0.7.1 # via nbconvert deprecation==2.1.0 # via testcontainers -dill==0.3.7 +dill==0.3.8 # via # bytewax # feast (setup.py) # multiprocess -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -docker==6.1.3 +docker==7.0.0 # via # feast (setup.py) # testcontainers @@ -168,7 +169,7 @@ docutils==0.19 # via sphinx entrypoints==0.4 # via altair -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via # anyio # pytest @@ -176,13 +177,13 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro -fastjsonschema==2.19.0 +fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 # via @@ -196,7 +197,7 @@ flake8==6.0.0 # via feast (setup.py) fqdn==1.5.1 # via jsonschema -fsspec==2023.9.2 +fsspec==2023.12.2 # via # dask # feast (setup.py) @@ -204,7 +205,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.14.0 +google-api-core[grpc]==2.17.1 # via # feast (setup.py) # firebase-admin @@ -216,9 +217,9 @@ google-api-core[grpc]==2.14.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.108.0 +google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.23.4 +google-auth==2.27.0 # via # google-api-core # google-api-python-client @@ -226,26 +227,26 @@ google-auth==2.23.4 # google-cloud-core # google-cloud-storage # kubernetes -google-auth-httplib2==0.1.1 +google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.22.0 +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) -google-cloud-bigtable==2.21.0 +google-cloud-bigtable==2.23.0 # via feast (setup.py) -google-cloud-core==2.3.3 +google-cloud-core==2.4.1 # via # google-cloud-bigquery # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.18.0 +google-cloud-datastore==2.19.0 # via feast (setup.py) -google-cloud-firestore==2.13.1 +google-cloud-firestore==2.14.0 # via firebase-admin -google-cloud-storage==2.13.0 +google-cloud-storage==2.14.0 # via # feast (setup.py) # firebase-admin @@ -253,11 +254,11 @@ google-crc32c==1.5.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.0 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos[grpc]==1.61.0 +googleapis-common-protos[grpc]==1.62.0 # via # feast (setup.py) # google-api-core @@ -265,11 +266,11 @@ googleapis-common-protos[grpc]==1.61.0 # grpcio-status great-expectations==0.15.50 # via feast (setup.py) -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # google-api-core @@ -281,15 +282,15 @@ grpcio==1.59.2 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-status==1.59.2 +grpcio-status==1.60.1 # via google-api-core -grpcio-testing==1.59.2 +grpcio-testing==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -301,9 +302,9 @@ happybase==1.2.0 # via feast (setup.py) hazelcast-python-client==5.3.0 # via feast (setup.py) -hiredis==2.2.3 +hiredis==2.3.2 # via feast (setup.py) -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httplib2==0.22.0 # via @@ -311,11 +312,13 @@ httplib2==0.22.0 # google-auth-httplib2 httptools==0.6.1 # via uvicorn -httpx==0.25.1 - # via feast (setup.py) -identify==2.5.31 +httpx==0.26.0 + # via + # feast (setup.py) + # jupyterlab +identify==2.5.34 # via pre-commit -idna==3.4 +idna==3.6 # via # anyio # httpx @@ -324,7 +327,7 @@ idna==3.4 # snowflake-connector-python imagesize==1.4.1 # via sphinx -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # build # dask @@ -344,20 +347,20 @@ importlib-resources==6.1.1 # jupyterlab iniconfig==2.0.0 # via pytest -ipykernel==6.26.0 +ipykernel==6.29.2 # via jupyterlab ipython==8.12.3 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.1 +ipywidgets==8.1.2 # via great-expectations isodate==0.6.1 # via azure-storage-blob isoduration==20.11.0 # via jsonschema -isort==5.12.0 +isort==5.13.2 # via feast (setup.py) jedi==0.19.1 # via ipython @@ -384,7 +387,7 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.20.0 +jsonschema[format-nongpl]==4.21.1 # via # altair # feast (setup.py) @@ -392,14 +395,14 @@ jsonschema[format-nongpl]==4.20.0 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema jupyter-client==8.6.0 # via # ipykernel # jupyter-server # nbclient -jupyter-core==5.5.0 +jupyter-core==5.7.1 # via # ipykernel # jupyter-client @@ -412,24 +415,24 @@ jupyter-events==0.9.0 # via jupyter-server jupyter-lsp==2.2.2 # via jupyterlab -jupyter-server==2.11.2 +jupyter-server==2.12.5 # via # jupyter-lsp # jupyterlab # jupyterlab-server # notebook # notebook-shim -jupyter-server-terminals==0.4.4 +jupyter-server-terminals==0.5.2 # via jupyter-server -jupyterlab==4.0.11 +jupyterlab==4.1.1 # via notebook -jupyterlab-pygments==0.2.2 +jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.25.1 +jupyterlab-server==2.25.3 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.9 +jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -437,12 +440,12 @@ locket==1.0.0 # via partd makefun==1.15.2 # via great-expectations -markupsafe==2.1.3 +markupsafe==2.1.5 # via # jinja2 # nbconvert # werkzeug -marshmallow==3.20.1 +marshmallow==3.20.2 # via great-expectations matplotlib-inline==0.1.6 # via @@ -456,23 +459,23 @@ mistune==3.0.2 # via # great-expectations # nbconvert -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.9 +moto==4.2.14 # via feast (setup.py) -msal==1.25.0 +msal==1.26.0 # via # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.1.0 # via azure-identity msgpack==1.0.7 # via cachecontrol -multiprocess==0.70.15 +multiprocess==0.70.16 # via bytewax mypy==1.8.0 # via @@ -486,7 +489,7 @@ mypy-protobuf==3.1.0 # via feast (setup.py) nbclient==0.9.0 # via nbconvert -nbconvert==7.11.0 +nbconvert==7.16.0 # via jupyter-server nbformat==5.9.2 # via @@ -494,11 +497,11 @@ nbformat==5.9.2 # jupyter-server # nbclient # nbconvert -nest-asyncio==1.5.8 +nest-asyncio==1.6.0 # via ipykernel nodeenv==1.8.0 # via pre-commit -notebook==7.0.6 +notebook==7.1.0 # via great-expectations notebook-shim==0.2.3 # via @@ -516,7 +519,7 @@ numpy==1.24.4 # scipy oauthlib==3.2.2 # via requests-oauthlib -overrides==7.4.0 +overrides==7.7.0 # via jupyter-server packaging==23.2 # via @@ -533,6 +536,7 @@ packaging==23.2 # jupyterlab # jupyterlab-server # marshmallow + # msal-extensions # nbconvert # pytest # snowflake-connector-python @@ -548,17 +552,17 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 +pandocfilters==1.5.1 # via nbconvert parso==0.8.3 # via jedi partd==1.4.1 # via dask -pathspec==0.11.2 +pathspec==0.12.1 # via black pbr==6.0.0 # via mock -pexpect==4.8.0 +pexpect==4.9.0 # via ipython pickleshare==0.7.5 # via ipython @@ -572,7 +576,7 @@ platformdirs==3.11.0 # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.3.0 +pluggy==1.4.0 # via pytest ply==3.11 # via thriftpy2 @@ -580,11 +584,11 @@ portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 # via feast (setup.py) -prometheus-client==0.18.0 +prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.41 +prompt-toolkit==3.0.43 # via ipython -proto-plus==1.22.3 +proto-plus==1.23.0 # via # feast (setup.py) # google-cloud-bigquery @@ -628,13 +632,13 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==14.0.1 +pyarrow==15.0.0 # via # db-dtypes # feast (setup.py) # google-cloud-bigquery # snowflake-connector-python -pyasn1==0.5.0 +pyasn1==0.5.1 # via # pyasn1-modules # rsa @@ -646,14 +650,14 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) # great-expectations pyflakes==3.0.1 # via flake8 -pygments==2.16.1 +pygments==2.17.2 # via # feast (setup.py) # ipython @@ -663,11 +667,11 @@ pyjwt[crypto]==2.8.0 # via # msal # snowflake-connector-python -pymssql==2.2.10 +pymssql==2.2.11 # via feast (setup.py) pymysql==1.1.0 # via feast (setup.py) -pyodbc==5.0.1 +pyodbc==5.1.0 # via feast (setup.py) pyopenssl==23.3.0 # via snowflake-connector-python @@ -679,7 +683,7 @@ pyproject-hooks==1.0.0 # via build pyspark==3.5.0 # via feast (setup.py) -pytest==7.4.3 +pytest==7.4.4 # via # feast (setup.py) # pytest-benchmark @@ -701,7 +705,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.4.0 +pytest-xdist==3.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -715,11 +719,11 @@ python-dateutil==2.8.2 # pandas # rockset # trino -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events -pytz==2023.3.post1 +pytz==2024.1 # via # babel # great-expectations @@ -735,19 +739,19 @@ pyyaml==6.0.1 # pre-commit # responses # uvicorn -pyzmq==25.1.1 +pyzmq==25.1.2 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2023.10.3 +regex==2023.12.25 # via feast (setup.py) requests==2.31.0 # via @@ -770,7 +774,7 @@ requests==2.31.0 # trino requests-oauthlib==1.3.1 # via kubernetes -responses==0.24.1 +responses==0.25.0 # via moto rfc3339-validator==0.1.4 # via @@ -782,7 +786,7 @@ rfc3986-validator==0.1.1 # jupyter-events rockset==2.1.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -792,7 +796,7 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -s3transfer==0.7.0 +s3transfer==0.10.0 # via boto3 scipy==1.10.1 # via great-expectations @@ -803,7 +807,6 @@ six==1.16.0 # asttokens # azure-core # bleach - # cassandra-driver # geomet # happybase # isodate @@ -819,7 +822,7 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.5.0 +snowflake-connector-python[pandas]==3.7.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python @@ -839,13 +842,13 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -875,12 +878,12 @@ tomli==2.0.1 # pytest tomlkit==0.12.3 # via snowflake-connector-python -toolz==0.12.0 +toolz==0.12.1 # via # altair # dask # partd -tornado==6.3.3 +tornado==6.4 # via # ipykernel # jupyter-client @@ -888,11 +891,11 @@ tornado==6.3.3 # jupyterlab # notebook # terminado -tqdm==4.66.1 +tqdm==4.66.2 # via # feast (setup.py) # great-expectations -traitlets==5.13.0 +traitlets==5.14.1 # via # comm # ipykernel @@ -917,28 +920,29 @@ types-protobuf==3.19.22 # mypy-protobuf types-pymysql==1.1.0.1 # via feast (setup.py) -types-pyopenssl==23.3.0.0 +types-pyopenssl==24.0.0.20240130 # via types-redis -types-python-dateutil==2.8.19.14 +types-python-dateutil==2.8.19.20240106 # via # arrow # feast (setup.py) -types-pytz==2023.3.1.1 +types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.10 +types-redis==4.6.0.20240106 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.1 +types-setuptools==69.0.0.20240125 # via feast (setup.py) -types-tabulate==0.9.0.3 +types-tabulate==0.9.0.20240106 # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # async-lru # azure-core # azure-storage-blob @@ -972,7 +976,7 @@ urllib3==1.26.18 # responses # rockset # snowflake-connector-python -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn @@ -984,7 +988,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.10 +wcwidth==0.2.13 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -992,18 +996,17 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.6.4 +websocket-client==1.7.0 # via - # docker # jupyter-server # kubernetes websockets==12.0 # via uvicorn werkzeug==3.0.1 # via moto -wheel==0.41.3 +wheel==0.42.0 # via pip-tools -widgetsnbextension==4.0.9 +widgetsnbextension==4.0.10 # via ipywidgets wrapt==1.16.0 # via testcontainers diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 163fa4c9a87..388bb3143f3 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,21 +4,22 @@ # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt # -anyio==4.0.0 + +anyio==4.2.0 # via # httpx # starlette # watchfiles appdirs==1.4.4 # via fissix -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing bowler==0.9.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -38,33 +39,33 @@ colorama==0.4.6 # via feast (setup.py) dask==2023.5.0 # via feast (setup.py) -dill==0.3.7 +dill==0.3.8 # via feast (setup.py) -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via anyio -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.10.0 +fsspec==2024.2.0 # via dask -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # grpcio-health-checking # grpcio-reflection # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -72,18 +73,18 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httptools==0.6.1 # via uvicorn -httpx==0.25.1 +httpx==0.26.0 # via feast (setup.py) -idna==3.4 +idna==3.6 # via # anyio # httpx # requests -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # dask # feast (setup.py) @@ -94,19 +95,19 @@ importlib-resources==6.1.1 # jsonschema-specifications jinja2==3.1.3 # via feast (setup.py) -jsonschema==4.20.0 +jsonschema==4.21.1 # via feast (setup.py) -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd -markupsafe==2.1.3 +markupsafe==2.1.5 # via jinja2 -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.7.0 +mypy==1.8.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -132,7 +133,7 @@ partd==1.4.1 # via dask pkgutil-resolve-name==1.3.10 # via jsonschema -proto-plus==1.22.3 +proto-plus==1.23.0 # via feast (setup.py) protobuf==4.23.3 # via @@ -142,32 +143,32 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==14.0.1 +pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) -pygments==2.16.1 +pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 # via pandas -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn -pytz==2023.3.post1 +pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask # feast (setup.py) # uvicorn -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -179,11 +180,11 @@ sniffio==1.3.0 # via # anyio # httpx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -193,27 +194,28 @@ toml==0.10.2 # via feast (setup.py) tomli==2.0.1 # via mypy -toolz==0.12.0 +toolz==0.12.1 # via # dask # partd -tqdm==4.66.1 +tqdm==4.66.2 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==4.24.0.4 +types-protobuf==4.24.0.20240129 # via mypy-protobuf -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # fastapi # mypy # pydantic # sqlalchemy2-stubs # starlette # uvicorn -urllib3==2.1.0 +urllib3==2.2.0 # via requests -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index f9d7ac3fb9f..9cb322d2f60 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -4,11 +4,12 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.9-ci-requirements.txt # -alabaster==0.7.13 + +alabaster==0.7.16 # via sphinx altair==4.2.0 # via great-expectations -anyio==4.0.0 +anyio==4.2.0 # via # httpx # jupyter-server @@ -32,14 +33,14 @@ async-lru==2.0.4 # via jupyterlab async-timeout==4.0.3 # via redis -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing avro==1.10.0 # via feast (setup.py) -azure-core==1.29.5 +azure-core==1.30.0 # via # azure-identity # azure-storage-blob @@ -47,21 +48,21 @@ azure-identity==1.15.0 # via feast (setup.py) azure-storage-blob==12.19.0 # via feast (setup.py) -babel==2.13.1 +babel==2.14.0 # via # jupyterlab-server # sphinx -beautifulsoup4==4.12.2 +beautifulsoup4==4.12.3 # via nbconvert black==22.12.0 # via feast (setup.py) bleach==6.1.0 # via nbconvert -boto3==1.29.2 +boto3==1.34.42 # via # feast (setup.py) # moto -botocore==1.32.2 +botocore==1.34.42 # via # boto3 # moto @@ -74,13 +75,13 @@ build==1.0.3 # pip-tools bytewax==0.15.1 # via feast (setup.py) -cachecontrol==0.13.1 +cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.2 # via google-auth -cassandra-driver==3.28.0 +cassandra-driver==3.29.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -116,13 +117,13 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -comm==0.2.0 +comm==0.2.1 # via # ipykernel # ipywidgets -coverage[toml]==7.3.2 +coverage[toml]==7.4.1 # via pytest-cov -cryptography==41.0.6 +cryptography==41.0.7 # via # azure-identity # azure-storage-blob @@ -135,11 +136,11 @@ cryptography==41.0.6 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.11.0 +dask==2024.2.0 # via feast (setup.py) -db-dtypes==1.1.1 +db-dtypes==1.2.0 # via google-cloud-bigquery -debugpy==1.8.0 +debugpy==1.8.1 # via ipykernel decorator==5.1.1 # via ipython @@ -147,14 +148,14 @@ defusedxml==0.7.1 # via nbconvert deprecation==2.1.0 # via testcontainers -dill==0.3.7 +dill==0.3.8 # via # bytewax # feast (setup.py) # multiprocess -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -docker==6.1.3 +docker==7.0.0 # via # feast (setup.py) # testcontainers @@ -162,7 +163,7 @@ docutils==0.19 # via sphinx entrypoints==0.4 # via altair -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via # anyio # ipython @@ -171,13 +172,13 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro -fastjsonschema==2.19.0 +fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 # via @@ -191,7 +192,7 @@ flake8==6.0.0 # via feast (setup.py) fqdn==1.5.1 # via jsonschema -fsspec==2023.9.2 +fsspec==2023.12.2 # via # dask # feast (setup.py) @@ -199,7 +200,7 @@ geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver -google-api-core[grpc]==2.14.0 +google-api-core[grpc]==2.17.1 # via # feast (setup.py) # firebase-admin @@ -211,9 +212,9 @@ google-api-core[grpc]==2.14.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.108.0 +google-api-python-client==2.118.0 # via firebase-admin -google-auth==2.23.4 +google-auth==2.27.0 # via # google-api-core # google-api-python-client @@ -221,26 +222,26 @@ google-auth==2.23.4 # google-cloud-core # google-cloud-storage # kubernetes -google-auth-httplib2==0.1.1 +google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.22.0 +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) -google-cloud-bigtable==2.21.0 +google-cloud-bigtable==2.23.0 # via feast (setup.py) -google-cloud-core==2.3.3 +google-cloud-core==2.4.1 # via # google-cloud-bigquery # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.18.0 +google-cloud-datastore==2.19.0 # via feast (setup.py) -google-cloud-firestore==2.13.1 +google-cloud-firestore==2.14.0 # via firebase-admin -google-cloud-storage==2.13.0 +google-cloud-storage==2.14.0 # via # feast (setup.py) # firebase-admin @@ -248,11 +249,11 @@ google-crc32c==1.5.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.0 # via # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos[grpc]==1.61.0 +googleapis-common-protos[grpc]==1.62.0 # via # feast (setup.py) # google-api-core @@ -260,11 +261,11 @@ googleapis-common-protos[grpc]==1.61.0 # grpcio-status great-expectations==0.15.50 # via feast (setup.py) -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpc-google-iam-v1==0.12.7 +grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # google-api-core @@ -276,15 +277,15 @@ grpcio==1.59.2 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-status==1.59.2 +grpcio-status==1.60.1 # via google-api-core -grpcio-testing==1.59.2 +grpcio-testing==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -296,9 +297,9 @@ happybase==1.2.0 # via feast (setup.py) hazelcast-python-client==5.3.0 # via feast (setup.py) -hiredis==2.2.3 +hiredis==2.3.2 # via feast (setup.py) -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httplib2==0.22.0 # via @@ -306,11 +307,13 @@ httplib2==0.22.0 # google-auth-httplib2 httptools==0.6.1 # via uvicorn -httpx==0.25.1 - # via feast (setup.py) -identify==2.5.31 +httpx==0.26.0 + # via + # feast (setup.py) + # jupyterlab +identify==2.5.34 # via pre-commit -idna==3.4 +idna==3.6 # via # anyio # httpx @@ -319,7 +322,7 @@ idna==3.4 # snowflake-connector-python imagesize==1.4.1 # via sphinx -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # build # dask @@ -335,20 +338,20 @@ importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 # via pytest -ipykernel==6.26.0 +ipykernel==6.29.2 # via jupyterlab -ipython==8.17.2 +ipython==8.18.1 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.1 +ipywidgets==8.1.2 # via great-expectations isodate==0.6.1 # via azure-storage-blob isoduration==20.11.0 # via jsonschema -isort==5.12.0 +isort==5.13.2 # via feast (setup.py) jedi==0.19.1 # via ipython @@ -375,7 +378,7 @@ jsonpointer==2.4 # via # jsonpatch # jsonschema -jsonschema[format-nongpl]==4.20.0 +jsonschema[format-nongpl]==4.21.1 # via # altair # feast (setup.py) @@ -383,14 +386,14 @@ jsonschema[format-nongpl]==4.20.0 # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema jupyter-client==8.6.0 # via # ipykernel # jupyter-server # nbclient -jupyter-core==5.5.0 +jupyter-core==5.7.1 # via # ipykernel # jupyter-client @@ -403,24 +406,24 @@ jupyter-events==0.9.0 # via jupyter-server jupyter-lsp==2.2.2 # via jupyterlab -jupyter-server==2.11.2 +jupyter-server==2.12.5 # via # jupyter-lsp # jupyterlab # jupyterlab-server # notebook # notebook-shim -jupyter-server-terminals==0.4.4 +jupyter-server-terminals==0.5.2 # via jupyter-server -jupyterlab==4.0.11 +jupyterlab==4.1.1 # via notebook -jupyterlab-pygments==0.2.2 +jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.25.1 +jupyterlab-server==2.25.3 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.9 +jupyterlab-widgets==3.0.10 # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) @@ -428,12 +431,12 @@ locket==1.0.0 # via partd makefun==1.15.2 # via great-expectations -markupsafe==2.1.3 +markupsafe==2.1.5 # via # jinja2 # nbconvert # werkzeug -marshmallow==3.20.1 +marshmallow==3.20.2 # via great-expectations matplotlib-inline==0.1.6 # via @@ -447,23 +450,23 @@ mistune==3.0.2 # via # great-expectations # nbconvert -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==4.2.9 +moto==4.2.14 # via feast (setup.py) -msal==1.25.0 +msal==1.26.0 # via # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.1.0 # via azure-identity msgpack==1.0.7 # via cachecontrol -multiprocess==0.70.15 +multiprocess==0.70.16 # via bytewax mypy==1.8.0 # via @@ -477,7 +480,7 @@ mypy-protobuf==3.1.0 # via feast (setup.py) nbclient==0.9.0 # via nbconvert -nbconvert==7.11.0 +nbconvert==7.16.0 # via jupyter-server nbformat==5.9.2 # via @@ -485,11 +488,11 @@ nbformat==5.9.2 # jupyter-server # nbclient # nbconvert -nest-asyncio==1.5.8 +nest-asyncio==1.6.0 # via ipykernel nodeenv==1.8.0 # via pre-commit -notebook==7.0.6 +notebook==7.1.0 # via great-expectations notebook-shim==0.2.3 # via @@ -507,7 +510,7 @@ numpy==1.24.4 # scipy oauthlib==3.2.2 # via requests-oauthlib -overrides==7.4.0 +overrides==7.7.0 # via jupyter-server packaging==23.2 # via @@ -524,6 +527,7 @@ packaging==23.2 # jupyterlab # jupyterlab-server # marshmallow + # msal-extensions # nbconvert # pytest # snowflake-connector-python @@ -539,17 +543,17 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 +pandocfilters==1.5.1 # via nbconvert parso==0.8.3 # via jedi partd==1.4.1 # via dask -pathspec==0.11.2 +pathspec==0.12.1 # via black pbr==6.0.0 # via mock -pexpect==4.8.0 +pexpect==4.9.0 # via ipython pip-tools==7.3.0 # via feast (setup.py) @@ -559,7 +563,7 @@ platformdirs==3.11.0 # jupyter-core # snowflake-connector-python # virtualenv -pluggy==1.3.0 +pluggy==1.4.0 # via pytest ply==3.11 # via thriftpy2 @@ -567,11 +571,11 @@ portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 # via feast (setup.py) -prometheus-client==0.18.0 +prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.41 +prompt-toolkit==3.0.43 # via ipython -proto-plus==1.22.3 +proto-plus==1.23.0 # via # feast (setup.py) # google-cloud-bigquery @@ -615,13 +619,13 @@ py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark -pyarrow==14.0.1 +pyarrow==15.0.0 # via # db-dtypes # feast (setup.py) # google-cloud-bigquery # snowflake-connector-python -pyasn1==0.5.0 +pyasn1==0.5.1 # via # pyasn1-modules # rsa @@ -633,14 +637,14 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) # great-expectations pyflakes==3.0.1 # via flake8 -pygments==2.16.1 +pygments==2.17.2 # via # feast (setup.py) # ipython @@ -650,11 +654,11 @@ pyjwt[crypto]==2.8.0 # via # msal # snowflake-connector-python -pymssql==2.2.10 +pymssql==2.2.11 # via feast (setup.py) pymysql==1.1.0 # via feast (setup.py) -pyodbc==5.0.1 +pyodbc==5.1.0 # via feast (setup.py) pyopenssl==23.3.0 # via snowflake-connector-python @@ -666,7 +670,7 @@ pyproject-hooks==1.0.0 # via build pyspark==3.5.0 # via feast (setup.py) -pytest==7.4.3 +pytest==7.4.4 # via # feast (setup.py) # pytest-benchmark @@ -688,7 +692,7 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.4.0 +pytest-xdist==3.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via @@ -702,11 +706,11 @@ python-dateutil==2.8.2 # pandas # rockset # trino -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn python-json-logger==2.0.7 # via jupyter-events -pytz==2023.3.post1 +pytz==2024.1 # via # great-expectations # pandas @@ -721,19 +725,19 @@ pyyaml==6.0.1 # pre-commit # responses # uvicorn -pyzmq==25.1.1 +pyzmq==25.1.2 # via # ipykernel # jupyter-client # jupyter-server redis==4.6.0 # via feast (setup.py) -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2023.10.3 +regex==2023.12.25 # via feast (setup.py) requests==2.31.0 # via @@ -756,7 +760,7 @@ requests==2.31.0 # trino requests-oauthlib==1.3.1 # via kubernetes -responses==0.24.1 +responses==0.25.0 # via moto rfc3339-validator==0.1.4 # via @@ -768,7 +772,7 @@ rfc3986-validator==0.1.1 # jupyter-events rockset==2.1.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -778,9 +782,9 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -s3transfer==0.7.0 +s3transfer==0.10.0 # via boto3 -scipy==1.11.3 +scipy==1.12.0 # via great-expectations send2trash==1.8.2 # via jupyter-server @@ -789,7 +793,6 @@ six==1.16.0 # asttokens # azure-core # bleach - # cassandra-driver # geomet # happybase # isodate @@ -805,39 +808,33 @@ sniffio==1.3.0 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.5.0 +snowflake-connector-python[pandas]==3.7.0 # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 - # via - # feast (setup.py) - # sphinxcontrib-applehelp - # sphinxcontrib-devhelp - # sphinxcontrib-htmlhelp - # sphinxcontrib-qthelp - # sphinxcontrib-serializinghtml -sphinxcontrib-applehelp==1.0.7 + # via feast (setup.py) +sphinxcontrib-applehelp==1.0.8 # via sphinx -sphinxcontrib-devhelp==1.0.5 +sphinxcontrib-devhelp==1.0.6 # via sphinx -sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-htmlhelp==2.0.5 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.6 +sphinxcontrib-qthelp==1.0.7 # via sphinx -sphinxcontrib-serializinghtml==1.1.9 +sphinxcontrib-serializinghtml==1.1.10 # via sphinx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -867,12 +864,12 @@ tomli==2.0.1 # pytest tomlkit==0.12.3 # via snowflake-connector-python -toolz==0.12.0 +toolz==0.12.1 # via # altair # dask # partd -tornado==6.3.3 +tornado==6.4 # via # ipykernel # jupyter-client @@ -880,11 +877,11 @@ tornado==6.3.3 # jupyterlab # notebook # terminado -tqdm==4.66.1 +tqdm==4.66.2 # via # feast (setup.py) # great-expectations -traitlets==5.13.0 +traitlets==5.14.1 # via # comm # ipykernel @@ -909,28 +906,29 @@ types-protobuf==3.19.22 # mypy-protobuf types-pymysql==1.1.0.1 # via feast (setup.py) -types-pyopenssl==23.3.0.0 +types-pyopenssl==24.0.0.20240130 # via types-redis -types-python-dateutil==2.8.19.14 +types-python-dateutil==2.8.19.20240106 # via # arrow # feast (setup.py) -types-pytz==2023.3.1.1 +types-pytz==2024.1.0.20240203 # via feast (setup.py) types-pyyaml==6.0.12.12 # via feast (setup.py) -types-redis==4.6.0.10 +types-redis==4.6.0.20240106 # via feast (setup.py) types-requests==2.30.0.0 # via feast (setup.py) -types-setuptools==68.2.0.1 +types-setuptools==69.0.0.20240125 # via feast (setup.py) -types-tabulate==0.9.0.3 +types-tabulate==0.9.0.20240106 # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # async-lru # azure-core # azure-storage-blob @@ -964,7 +962,7 @@ urllib3==1.26.18 # responses # rockset # snowflake-connector-python -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn @@ -976,7 +974,7 @@ volatile==2.1.0 # via bowler watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.10 +wcwidth==0.2.13 # via prompt-toolkit webcolors==1.13 # via jsonschema @@ -984,18 +982,17 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.6.4 +websocket-client==1.7.0 # via - # docker # jupyter-server # kubernetes websockets==12.0 # via uvicorn werkzeug==3.0.1 # via moto -wheel==0.41.3 +wheel==0.42.0 # via pip-tools -widgetsnbextension==4.0.9 +widgetsnbextension==4.0.10 # via ipywidgets wrapt==1.16.0 # via testcontainers diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 4d9b8f107de..012dac6f81f 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -4,21 +4,22 @@ # # pip-compile --output-file=sdk/python/requirements/py3.9-requirements.txt # -anyio==4.0.0 + +anyio==4.2.0 # via # httpx # starlette # watchfiles appdirs==1.4.4 # via fissix -attrs==23.1.0 +attrs==23.2.0 # via # bowler # jsonschema # referencing bowler==0.9.0 # via feast (setup.py) -certifi==2023.7.22 +certifi==2024.2.2 # via # httpcore # httpx @@ -36,35 +37,35 @@ cloudpickle==3.0.0 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.11.0 +dask==2024.2.0 # via feast (setup.py) -dill==0.3.7 +dill==0.3.8 # via feast (setup.py) -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via anyio -fastapi==0.109.1 +fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.0 +fastavro==1.9.4 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.10.0 +fsspec==2024.2.0 # via dask -greenlet==3.0.1 +greenlet==3.0.3 # via sqlalchemy -grpcio==1.59.2 +grpcio==1.60.1 # via # feast (setup.py) # grpcio-health-checking # grpcio-reflection # grpcio-tools -grpcio-health-checking==1.59.2 +grpcio-health-checking==1.60.1 # via feast (setup.py) -grpcio-reflection==1.59.2 +grpcio-reflection==1.60.1 # via feast (setup.py) -grpcio-tools==1.59.2 +grpcio-tools==1.60.1 # via feast (setup.py) gunicorn==21.2.0 # via feast (setup.py) @@ -72,18 +73,18 @@ h11==0.14.0 # via # httpcore # uvicorn -httpcore==1.0.2 +httpcore==1.0.3 # via httpx httptools==0.6.1 # via uvicorn -httpx==0.25.1 +httpx==0.26.0 # via feast (setup.py) -idna==3.4 +idna==3.6 # via # anyio # httpx # requests -importlib-metadata==6.8.0 +importlib-metadata==6.11.0 # via # dask # feast (setup.py) @@ -91,19 +92,19 @@ importlib-resources==6.1.1 # via feast (setup.py) jinja2==3.1.3 # via feast (setup.py) -jsonschema==4.20.0 +jsonschema==4.21.1 # via feast (setup.py) -jsonschema-specifications==2023.11.1 +jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 # via partd -markupsafe==2.1.3 +markupsafe==2.1.5 # via jinja2 -mmh3==4.0.1 +mmh3==4.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.7.0 +mypy==1.8.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -127,7 +128,7 @@ pandavro==1.5.2 # via feast (setup.py) partd==1.4.1 # via dask -proto-plus==1.22.3 +proto-plus==1.23.0 # via feast (setup.py) protobuf==4.23.3 # via @@ -137,32 +138,32 @@ protobuf==4.23.3 # grpcio-tools # mypy-protobuf # proto-plus -pyarrow==14.0.1 +pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.13 +pydantic==1.10.14 # via # fastapi # feast (setup.py) -pygments==2.16.1 +pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 # via pandas -python-dotenv==1.0.0 +python-dotenv==1.0.1 # via uvicorn -pytz==2023.3.post1 +pytz==2024.1 # via pandas pyyaml==6.0.1 # via # dask # feast (setup.py) # uvicorn -referencing==0.31.0 +referencing==0.33.0 # via # jsonschema # jsonschema-specifications requests==2.31.0 # via feast (setup.py) -rpds-py==0.13.0 +rpds-py==0.18.0 # via # jsonschema # referencing @@ -174,11 +175,11 @@ sniffio==1.3.0 # via # anyio # httpx -sqlalchemy[mypy]==1.4.50 +sqlalchemy[mypy]==1.4.51 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a37 +sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -starlette==0.35.1 +starlette==0.36.3 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -188,27 +189,28 @@ toml==0.10.2 # via feast (setup.py) tomli==2.0.1 # via mypy -toolz==0.12.0 +toolz==0.12.1 # via # dask # partd -tqdm==4.66.1 +tqdm==4.66.2 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-protobuf==4.24.0.4 +types-protobuf==4.24.0.20240129 # via mypy-protobuf -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via + # anyio # fastapi # mypy # pydantic # sqlalchemy2-stubs # starlette # uvicorn -urllib3==2.1.0 +urllib3==2.2.0 # via requests -uvicorn[standard]==0.24.0.post1 +uvicorn[standard]==0.27.1 # via feast (setup.py) uvloop==0.19.0 # via uvicorn diff --git a/setup.py b/setup.py index 81ae63a7a44..ebc4df31a85 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ "google-cloud-datastore>=2.1.0,<3", "google-cloud-storage>=1.34.0,<3", "google-cloud-bigtable>=2.11.0,<3", - "fsspec<2023.10.0", + "fsspec<=2024.1.0", ] REDIS_REQUIRED = [ @@ -98,7 +98,7 @@ "hiredis>=2.0.0,<3", ] -AWS_REQUIRED = ["boto3>=1.17.0,<2", "docker>=5.0.2", "fsspec<2023.10.0"] +AWS_REQUIRED = ["boto3>=1.17.0,<2", "docker>=5.0.2", "fsspec<=2024.1.0"] BYTEWAX_REQUIRED = ["bytewax==0.15.1", "docker>=5.0.2", "kubernetes<=20.13.0"] @@ -158,7 +158,6 @@ "moto<5", "mypy>=1.4.1", "avro==1.10.0", - "fsspec<2023.10.0", "urllib3>=1.25.4,<3", "psutil==5.9.0", "py>=1.11.0", # https://github.com/pytest-dev/pytest/issues/10420 From ec11a7cb8d56d8e2e5cda07e06b4c98dcc9d2ba3 Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Thu, 15 Feb 2024 11:04:06 -0500 Subject: [PATCH 25/30] feat: Update the Pydantic from v1 to v2 (#3948) --- sdk/python/feast/importer.py | 3 +- .../infra/contrib/spark_kafka_processor.py | 4 +- .../feast/infra/contrib/stream_processor.py | 6 +- .../feature_servers/aws_lambda/config.py | 3 +- .../infra/feature_servers/base_config.py | 2 +- .../feature_servers/gcp_cloudrun/config.py | 3 +- .../feature_servers/local_process/config.py | 2 +- .../infra/materialization/snowflake_engine.py | 6 +- .../feast/infra/offline_stores/bigquery.py | 22 ++- .../contrib/athena_offline_store/athena.py | 2 +- .../athena_offline_store/tests/data_source.py | 4 +- .../contrib/mssql_offline_store/mssql.py | 7 +- .../mssql_offline_store/tests/data_source.py | 4 +- .../postgres_offline_store/postgres.py | 2 +- .../tests/data_source.py | 4 +- .../spark_offline_store/tests/data_source.py | 9 +- .../test_config/manual_tests.py | 2 +- .../trino_offline_store/tests/data_source.py | 6 +- .../contrib/trino_offline_store/trino.py | 12 +- sdk/python/feast/infra/offline_stores/file.py | 3 +- .../feast/infra/offline_stores/redshift.py | 16 +- .../feast/infra/offline_stores/snowflake.py | 8 +- .../infra/offline_stores/snowflake_source.py | 6 +- .../feast/infra/online_stores/bigtable.py | 3 +- .../cassandra_online_store.py | 13 +- .../contrib/hbase_online_store/hbase.py | 3 +- .../contrib/mysql_online_store/mysql.py | 4 +- .../infra/online_stores/contrib/postgres.py | 3 +- .../feast/infra/online_stores/datastore.py | 13 +- .../feast/infra/online_stores/dynamodb.py | 3 +- sdk/python/feast/infra/online_stores/redis.py | 2 +- .../feast/infra/online_stores/snowflake.py | 9 +- .../feast/infra/online_stores/sqlite.py | 3 +- .../feast/infra/passthrough_provider.py | 2 +- .../feast/infra/registry/base_registry.py | 36 ++++ sdk/python/feast/infra/registry/snowflake.py | 9 +- .../infra/utils/snowflake/snowflake_utils.py | 6 +- sdk/python/feast/repo_config.py | 165 ++++++++---------- .../requirements/py3.10-ci-requirements.txt | 17 +- .../requirements/py3.10-requirements.txt | 16 +- .../requirements/py3.8-ci-requirements.txt | 16 +- .../requirements/py3.8-requirements.txt | 12 +- .../requirements/py3.9-ci-requirements.txt | 17 +- .../requirements/py3.9-requirements.txt | 8 +- sdk/python/tests/conftest.py | 5 +- .../feature_repos/repo_configuration.py | 16 +- .../universal/data_source_creator.py | 9 +- .../universal/data_sources/bigquery.py | 3 +- .../universal/data_sources/file.py | 9 +- .../universal/data_sources/redshift.py | 5 +- .../universal/data_sources/snowflake.py | 4 +- .../feature_repos/universal/feature_views.py | 3 +- .../universal/online_store_creator.py | 3 +- sdk/python/tests/unit/cli/test_cli_chdir.py | 9 +- .../offline_stores/test_offline_store.py | 5 +- .../infra/offline_stores/test_redshift.py | 1 + .../infra/scaffolding/test_repo_config.py | 12 +- sdk/python/tests/utils/e2e_test_validation.py | 2 +- setup.py | 4 +- 59 files changed, 333 insertions(+), 253 deletions(-) diff --git a/sdk/python/feast/importer.py b/sdk/python/feast/importer.py index d1d7d629010..938d29fe313 100644 --- a/sdk/python/feast/importer.py +++ b/sdk/python/feast/importer.py @@ -1,5 +1,4 @@ import importlib -from typing import Optional from feast.errors import ( FeastClassImportError, @@ -8,7 +7,7 @@ ) -def import_class(module_name: str, class_name: str, class_type: Optional[str] = None): +def import_class(module_name: str, class_name: str, class_type: str = ""): """ Dynamically loads and returns a class from a module. diff --git a/sdk/python/feast/infra/contrib/spark_kafka_processor.py b/sdk/python/feast/infra/contrib/spark_kafka_processor.py index bac1c28b064..fc4a34f17bd 100644 --- a/sdk/python/feast/infra/contrib/spark_kafka_processor.py +++ b/sdk/python/feast/infra/contrib/spark_kafka_processor.py @@ -1,5 +1,5 @@ from types import MethodType -from typing import List, Optional +from typing import List, Optional, no_type_check import pandas as pd from pyspark.sql import DataFrame, SparkSession @@ -76,6 +76,8 @@ def ingest_stream_feature_view( online_store_query = self._write_stream_data(transformed_df, to) return online_store_query + # In the line 64 of __init__(), the "data_source" is assigned a stream_source (and has to be KafkaSource as in line 40). + @no_type_check def _ingest_stream_data(self) -> StreamTable: """Only supports json and avro formats currently.""" if self.format == "json": diff --git a/sdk/python/feast/infra/contrib/stream_processor.py b/sdk/python/feast/infra/contrib/stream_processor.py index df4e144f8c6..c4620f4ca1d 100644 --- a/sdk/python/feast/infra/contrib/stream_processor.py +++ b/sdk/python/feast/infra/contrib/stream_processor.py @@ -1,4 +1,4 @@ -from abc import ABC +from abc import ABC, abstractmethod from types import MethodType from typing import TYPE_CHECKING, Optional @@ -50,6 +50,7 @@ def __init__( self.sfv = sfv self.data_source = data_source + @abstractmethod def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: """ Ingests data from the stream source attached to the stream feature view; transforms the data @@ -57,12 +58,14 @@ def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: """ raise NotImplementedError + @abstractmethod def _ingest_stream_data(self) -> StreamTable: """ Ingests data into a StreamTable. """ raise NotImplementedError + @abstractmethod def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: """ Applies transformations on top of StreamTable object. Since stream engines use lazy @@ -71,6 +74,7 @@ def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: """ raise NotImplementedError + @abstractmethod def _write_stream_data(self, table: StreamTable, to: PushMode) -> None: """ Launches a job to persist stream data to the online store and/or offline store, depending diff --git a/sdk/python/feast/infra/feature_servers/aws_lambda/config.py b/sdk/python/feast/infra/feature_servers/aws_lambda/config.py index 31dd879af6d..946831a18fb 100644 --- a/sdk/python/feast/infra/feature_servers/aws_lambda/config.py +++ b/sdk/python/feast/infra/feature_servers/aws_lambda/config.py @@ -1,5 +1,6 @@ +from typing import Literal + from pydantic import StrictBool, StrictStr -from pydantic.typing import Literal from feast.infra.feature_servers.base_config import BaseFeatureServerConfig diff --git a/sdk/python/feast/infra/feature_servers/base_config.py b/sdk/python/feast/infra/feature_servers/base_config.py index 756dd79b438..1a348032e17 100644 --- a/sdk/python/feast/infra/feature_servers/base_config.py +++ b/sdk/python/feast/infra/feature_servers/base_config.py @@ -30,5 +30,5 @@ class BaseFeatureServerConfig(FeastConfigBaseModel): enabled: StrictBool = False """Whether the feature server should be launched.""" - feature_logging: Optional[FeatureLoggingConfig] + feature_logging: Optional[FeatureLoggingConfig] = None """ Feature logging configuration """ diff --git a/sdk/python/feast/infra/feature_servers/gcp_cloudrun/config.py b/sdk/python/feast/infra/feature_servers/gcp_cloudrun/config.py index 8d0c269cf5d..ddcbde7924a 100644 --- a/sdk/python/feast/infra/feature_servers/gcp_cloudrun/config.py +++ b/sdk/python/feast/infra/feature_servers/gcp_cloudrun/config.py @@ -1,5 +1,6 @@ +from typing import Literal + from pydantic import StrictBool -from pydantic.typing import Literal from feast.infra.feature_servers.base_config import BaseFeatureServerConfig diff --git a/sdk/python/feast/infra/feature_servers/local_process/config.py b/sdk/python/feast/infra/feature_servers/local_process/config.py index bb2e7bdf738..3d97912e4bd 100644 --- a/sdk/python/feast/infra/feature_servers/local_process/config.py +++ b/sdk/python/feast/infra/feature_servers/local_process/config.py @@ -1,4 +1,4 @@ -from pydantic.typing import Literal +from typing import Literal from feast.infra.feature_servers.base_config import BaseFeatureServerConfig diff --git a/sdk/python/feast/infra/materialization/snowflake_engine.py b/sdk/python/feast/infra/materialization/snowflake_engine.py index 36c42cd390c..62b23dfadef 100644 --- a/sdk/python/feast/infra/materialization/snowflake_engine.py +++ b/sdk/python/feast/infra/materialization/snowflake_engine.py @@ -7,7 +7,7 @@ import click import pandas as pd from colorama import Fore, Style -from pydantic import Field, StrictStr +from pydantic import ConfigDict, Field, StrictStr from pytz import utc from tqdm import tqdm @@ -72,9 +72,7 @@ class SnowflakeMaterializationEngineConfig(FeastConfigBaseModel): schema_: Optional[str] = Field("PUBLIC", alias="schema") """ Snowflake schema name """ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) @dataclass diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 0ee82a908ed..68420c06642 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -10,6 +10,7 @@ Dict, Iterator, List, + Literal, Optional, Tuple, Union, @@ -19,8 +20,7 @@ import pandas as pd import pyarrow import pyarrow.parquet -from pydantic import ConstrainedStr, StrictStr, validator -from pydantic.typing import Literal +from pydantic import StrictStr, field_validator from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed from feast import flags_helper @@ -72,13 +72,6 @@ def get_http_client_info(): return http_client_info.ClientInfo(user_agent=get_user_agent()) -class BigQueryTableCreateDisposition(ConstrainedStr): - """Custom constraint for table_create_disposition. To understand more, see: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition""" - - values = {"CREATE_NEVER", "CREATE_IF_NEEDED"} - - class BigQueryOfflineStoreConfig(FeastConfigBaseModel): """Offline store config for GCP BigQuery""" @@ -102,10 +95,15 @@ class BigQueryOfflineStoreConfig(FeastConfigBaseModel): gcs_staging_location: Optional[str] = None """ (optional) GCS location used for offloading BigQuery results as parquet files.""" - table_create_disposition: Optional[BigQueryTableCreateDisposition] = None - """ (optional) Specifies whether the job is allowed to create new tables. The default value is CREATE_IF_NEEDED.""" + table_create_disposition: Literal[ + "CREATE_NEVER", "CREATE_IF_NEEDED" + ] = "CREATE_IF_NEEDED" + """ (optional) Specifies whether the job is allowed to create new tables. The default value is CREATE_IF_NEEDED. + Custom constraint for table_create_disposition. To understand more, see: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition + """ - @validator("billing_project_id") + @field_validator("billing_project_id") def project_id_exists(cls, v, values, **kwargs): if v and not values["project_id"]: raise ValueError( diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py index 85a61106aaf..ae510171db9 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py @@ -8,6 +8,7 @@ Dict, Iterator, List, + Literal, Optional, Tuple, Union, @@ -18,7 +19,6 @@ import pyarrow import pyarrow as pa from pydantic import StrictStr -from pydantic.typing import Literal from pytz import utc from feast import OnDemandFeatureView diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py index f68e109d6c1..6b2238830b6 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py @@ -48,10 +48,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - suffix: Optional[str] = None, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: table_name = destination_name diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py index 849d5cc797f..67bae292c37 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py @@ -3,7 +3,7 @@ import warnings from datetime import datetime from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Set, Tuple, Union import numpy as np import pandas @@ -11,7 +11,6 @@ import pyarrow as pa import sqlalchemy from pydantic.types import StrictStr -from pydantic.typing import Literal from sqlalchemy import create_engine from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker @@ -32,7 +31,7 @@ from feast.infra.provider import RetrievalJob from feast.infra.registry.base_registry import BaseRegistry from feast.on_demand_feature_view import OnDemandFeatureView -from feast.repo_config import FeastBaseModel, RepoConfig +from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage from feast.type_map import pa_to_mssql_type from feast.usage import log_exceptions_and_usage @@ -43,7 +42,7 @@ EntitySchema = Dict[str, np.dtype] -class MsSqlServerOfflineStoreConfig(FeastBaseModel): +class MsSqlServerOfflineStoreConfig(FeastConfigBaseModel): """Offline store config for SQL Server""" type: Literal["mssql"] = "mssql" diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py index 2604cf7c18b..71ce56bdefd 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py @@ -64,10 +64,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, - **kwargs, + timestamp_field: Optional[str] = "ts", ) -> DataSource: # Make sure the field mapping is correct and convert the datetime datasources. if timestamp_field in df: diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index c2e95a8648e..9b300d7bf46 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -9,6 +9,7 @@ Iterator, KeysView, List, + Literal, Optional, Tuple, Union, @@ -19,7 +20,6 @@ import pyarrow as pa from jinja2 import BaseLoader, Environment from psycopg2 import sql -from pydantic.typing import Literal from pytz import utc from feast.data_source import DataSource diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index 224fcea30f9..46d5c20e977 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -82,10 +82,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - suffix: Optional[str] = None, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py index 7b4fda3b5f5..b9785218857 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py @@ -9,6 +9,7 @@ from pyspark.sql import SparkSession from feast.data_source import DataSource +from feast.feature_logging import LoggingDestination from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( SparkOfflineStoreConfig, ) @@ -68,10 +69,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, - **kwargs, + timestamp_field: Optional[str] = "ts", ) -> DataSource: if timestamp_field in df: df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True) @@ -119,3 +120,7 @@ def create_saved_dataset_destination(self) -> SavedDatasetSparkStorage: def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}_{suffix}" + + def create_logged_features_destination(self) -> LoggingDestination: + # No implementation of LoggingDestination for Spark offline store. + return None # type: ignore diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py index 7d31aa90fb4..a31d368ea11 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/test_config/manual_tests.py @@ -8,6 +8,6 @@ FULL_REPO_CONFIGS = [ IntegrationTestRepoConfig( provider="local", - offline_store_creator=TrinoSourceCreator, + offline_store_creator=TrinoSourceCreator, # type: ignore ), ] diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py index a5aa53df7ab..fcc0c8d0fa7 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py @@ -81,10 +81,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - suffix: Optional[str] = None, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) self.client.execute_query( @@ -128,4 +128,6 @@ def create_offline_store_config(self) -> FeastConfigBaseModel: catalog="memory", dataset=self.project_name, connector={"type": "memory"}, + user="test", + auth=None, ) diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py index d4cfdb66329..cdc94350244 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import pyarrow -from pydantic import Field, FilePath, SecretStr, StrictBool, StrictStr, root_validator +from pydantic import Field, FilePath, SecretStr, StrictBool, StrictStr, model_validator from trino.auth import ( BasicAuthentication, CertificateAuthentication, @@ -98,14 +98,14 @@ class AuthConfig(FeastConfigBaseModel): type: Literal["kerberos", "basic", "jwt", "oauth2", "certificate"] config: Optional[Dict[StrictStr, Any]] - @root_validator - def config_only_nullable_for_oauth2(cls, values): - auth_type = values["type"] - auth_config = values["config"] + @model_validator(mode="after") + def config_only_nullable_for_oauth2(self): + auth_type = self.type + auth_config = self.config if auth_type != "oauth2" and auth_config is None: raise ValueError(f"config cannot be null for auth type '{auth_type}'") - return values + return self def to_trino_auth(self): auth_type = self.type diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 5e4107545f0..0e5064ba785 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -2,7 +2,7 @@ import uuid from datetime import datetime from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Any, Callable, List, Literal, Optional, Tuple, Union import dask.dataframe as dd import pandas as pd @@ -10,7 +10,6 @@ import pyarrow.dataset import pyarrow.parquet import pytz -from pydantic.typing import Literal from feast.data_source import DataSource from feast.errors import ( diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 6034bf5ac7b..2565a569ad1 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -9,6 +9,7 @@ Dict, Iterator, List, + Literal, Optional, Tuple, Union, @@ -19,8 +20,7 @@ import pyarrow import pyarrow as pa from dateutil import parser -from pydantic import StrictStr, root_validator -from pydantic.typing import Literal +from pydantic import StrictStr, model_validator from pytz import utc from feast import OnDemandFeatureView, RedshiftSource @@ -72,16 +72,16 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): iam_role: StrictStr """ IAM Role for Redshift, granting it access to S3 """ - @root_validator - def require_cluster_and_user_or_workgroup(cls, values): + @model_validator(mode="after") + def require_cluster_and_user_or_workgroup(self): """ Provisioned Redshift clusters: Require cluster_id and user, ignore workgroup Serverless Redshift: Require workgroup, ignore cluster_id and user """ cluster_id, user, workgroup = ( - values.get("cluster_id"), - values.get("user"), - values.get("workgroup"), + self.cluster_id, + self.user, + self.workgroup, ) if not (cluster_id and user) and not workgroup: raise ValueError( @@ -90,7 +90,7 @@ def require_cluster_and_user_or_workgroup(cls, values): elif cluster_id and workgroup: raise ValueError("cannot specify both cluster_id and workgroup") - return values + return self class RedshiftOfflineStore(OfflineStore): diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index dd13ffc96c7..66e7e78651d 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -14,6 +14,7 @@ Dict, Iterator, List, + Literal, Optional, Tuple, Union, @@ -23,8 +24,7 @@ import numpy as np import pandas as pd import pyarrow -from pydantic import Field, StrictStr -from pydantic.typing import Literal +from pydantic import ConfigDict, Field, StrictStr from pytz import utc from feast import OnDemandFeatureView @@ -119,9 +119,7 @@ class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): convert_timestamp_columns: Optional[bool] = None """ Convert timestamp columns on export to a Parquet-supported format """ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class SnowflakeOfflineStore(OfflineStore): diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index e29197c68d4..9a2c6e09bc4 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, no_type_check from typeguard import typechecked @@ -202,6 +202,7 @@ def get_table_query_string(self) -> str: def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: return type_map.snowflake_type_to_feast_value_type + @no_type_check def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: @@ -292,7 +293,8 @@ def get_table_column_names_and_types( ) return [ - (column["column_name"], column["snowflake_type"]) for column in metadata + (str(column["column_name"]), str(column["snowflake_type"])) + for column in metadata ] diff --git a/sdk/python/feast/infra/online_stores/bigtable.py b/sdk/python/feast/infra/online_stores/bigtable.py index 30561d0840f..3a83d23cedb 100644 --- a/sdk/python/feast/infra/online_stores/bigtable.py +++ b/sdk/python/feast/infra/online_stores/bigtable.py @@ -2,13 +2,12 @@ import logging from concurrent import futures from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Set, Tuple import google from google.cloud import bigtable from google.cloud.bigtable import row_filters from pydantic import StrictStr -from pydantic.typing import Literal from feast import Entity, FeatureView, utils from feast.feature_view import DUMMY_ENTITY_NAME diff --git a/sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py b/sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py index 34a8cab036d..c672e18db03 100644 --- a/sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py +++ b/sdk/python/feast/infra/online_stores/contrib/cassandra_online_store/cassandra_online_store.py @@ -20,7 +20,17 @@ import logging from datetime import datetime -from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Literal, + Optional, + Sequence, + Tuple, +) from cassandra.auth import PlainTextAuthProvider from cassandra.cluster import ( @@ -34,7 +44,6 @@ from cassandra.policies import DCAwareRoundRobinPolicy, TokenAwarePolicy from cassandra.query import PreparedStatement from pydantic import StrictFloat, StrictInt, StrictStr -from pydantic.typing import Literal from feast import Entity, FeatureView, RepoConfig from feast.infra.key_encoding_utils import serialize_entity_key diff --git a/sdk/python/feast/infra/online_stores/contrib/hbase_online_store/hbase.py b/sdk/python/feast/infra/online_stores/contrib/hbase_online_store/hbase.py index 1da9de89a81..4b2d8ae39c2 100644 --- a/sdk/python/feast/infra/online_stores/contrib/hbase_online_store/hbase.py +++ b/sdk/python/feast/infra/online_stores/contrib/hbase_online_store/hbase.py @@ -1,12 +1,11 @@ import calendar import struct from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple from happybase import ConnectionPool from happybase.connection import DEFAULT_PROTOCOL, DEFAULT_TRANSPORT from pydantic import StrictStr -from pydantic.typing import Literal from feast import Entity from feast.feature_view import FeatureView diff --git a/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py b/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py index c09cb126f0c..cf07d5fef12 100644 --- a/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py +++ b/sdk/python/feast/infra/online_stores/contrib/mysql_online_store/mysql.py @@ -1,7 +1,7 @@ from __future__ import absolute_import from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple import pymysql import pytz @@ -23,7 +23,7 @@ class MySQLOnlineStoreConfig(FeastConfigBaseModel): NOTE: The class *must* end with the `OnlineStoreConfig` suffix. """ - type = "mysql" + type: Literal["mysql"] = "mysql" host: Optional[StrictStr] = None user: Optional[StrictStr] = None diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py index 49f87ddb0ae..308528aaec2 100644 --- a/sdk/python/feast/infra/online_stores/contrib/postgres.py +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -2,14 +2,13 @@ import logging from collections import defaultdict from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple import psycopg2 import pytz from psycopg2 import sql from psycopg2.extras import execute_values from psycopg2.pool import SimpleConnectionPool -from pydantic.schema import Literal from feast import Entity from feast.feature_view import FeatureView diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index ed4e7612ba5..ae96e16c640 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -17,10 +17,19 @@ from multiprocessing.pool import ThreadPool from queue import Empty, Queue from threading import Lock, Thread -from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Literal, + Optional, + Sequence, + Tuple, +) from pydantic import PositiveInt, StrictStr -from pydantic.typing import Literal from feast import Entity, utils from feast.errors import FeastProviderLoginError diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index a1eef16f40d..a049189de7f 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -14,10 +14,9 @@ import itertools import logging from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union from pydantic import StrictBool, StrictStr -from pydantic.typing import Literal, Union from feast import Entity, FeatureView, utils from feast.infra.infra_object import DYNAMODB_INFRA_OBJECT_CLASS_TYPE, InfraObject diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 9561705aaac..ad84e8db7c9 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -21,6 +21,7 @@ Callable, Dict, List, + Literal, Optional, Sequence, Tuple, @@ -30,7 +31,6 @@ import pytz from google.protobuf.timestamp_pb2 import Timestamp from pydantic import StrictStr -from pydantic.typing import Literal from feast import Entity, FeatureView, RepoConfig, utils from feast.infra.online_stores.helpers import _mmh3, _redis_key, _redis_key_prefix diff --git a/sdk/python/feast/infra/online_stores/snowflake.py b/sdk/python/feast/infra/online_stores/snowflake.py index c1a03a2862c..f5600249c91 100644 --- a/sdk/python/feast/infra/online_stores/snowflake.py +++ b/sdk/python/feast/infra/online_stores/snowflake.py @@ -2,11 +2,10 @@ import os from binascii import hexlify from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple import pandas as pd -from pydantic import Field, StrictStr -from pydantic.schema import Literal +from pydantic import ConfigDict, Field, StrictStr from feast.entity import Entity from feast.feature_view import FeatureView @@ -57,9 +56,7 @@ class SnowflakeOnlineStoreConfig(FeastConfigBaseModel): schema_: Optional[str] = Field("PUBLIC", alias="schema") """ Snowflake schema name """ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class SnowflakeOnlineStore(OnlineStore): diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 6949b2bf247..4a6aa28889d 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -16,10 +16,9 @@ import sqlite3 from datetime import datetime from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple from pydantic import StrictStr -from pydantic.schema import Literal from feast import Entity from feast.feature_view import FeatureView diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 811abe106c3..aca18f4856b 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -70,7 +70,7 @@ def batch_engine(self) -> BatchMaterializationEngine: if self._batch_engine: return self._batch_engine else: - engine_config = self.repo_config._batch_engine_config + engine_config = self.repo_config.batch_engine_config config_is_dict = False if isinstance(engine_config, str): engine_config_type = engine_config diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index f89b0794788..f23a820d239 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -51,6 +51,7 @@ def apply_entity(self, entity: Entity, project: str, commit: bool = True): project: Feast project that this entity belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def delete_entity(self, name: str, project: str, commit: bool = True): @@ -62,6 +63,7 @@ def delete_entity(self, name: str, project: str, commit: bool = True): project: Feast project that this entity belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_entity(self, name: str, project: str, allow_cache: bool = False) -> Entity: @@ -77,6 +79,7 @@ def get_entity(self, name: str, project: str, allow_cache: bool = False) -> Enti Returns either the specified entity, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_entities(self, project: str, allow_cache: bool = False) -> List[Entity]: @@ -90,6 +93,7 @@ def list_entities(self, project: str, allow_cache: bool = False) -> List[Entity] Returns: List of entities """ + raise NotImplementedError # Data source operations @abstractmethod @@ -104,6 +108,7 @@ def apply_data_source( project: Feast project that this data source belongs to commit: Whether to immediately commit to the registry """ + raise NotImplementedError @abstractmethod def delete_data_source(self, name: str, project: str, commit: bool = True): @@ -115,6 +120,7 @@ def delete_data_source(self, name: str, project: str, commit: bool = True): project: Feast project that this data source belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_data_source( @@ -131,6 +137,7 @@ def get_data_source( Returns: Returns either the specified data source, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_data_sources( @@ -146,6 +153,7 @@ def list_data_sources( Returns: List of data sources """ + raise NotImplementedError # Feature service operations @abstractmethod @@ -159,6 +167,7 @@ def apply_feature_service( feature_service: A feature service that will be registered project: Feast project that this entity belongs to """ + raise NotImplementedError @abstractmethod def delete_feature_service(self, name: str, project: str, commit: bool = True): @@ -170,6 +179,7 @@ def delete_feature_service(self, name: str, project: str, commit: bool = True): project: Feast project that this feature service belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_feature_service( @@ -187,6 +197,7 @@ def get_feature_service( Returns either the specified feature service, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_feature_services( @@ -202,6 +213,7 @@ def list_feature_services( Returns: List of feature services """ + raise NotImplementedError # Feature view operations @abstractmethod @@ -216,6 +228,7 @@ def apply_feature_view( project: Feast project that this feature view belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def delete_feature_view(self, name: str, project: str, commit: bool = True): @@ -227,6 +240,7 @@ def delete_feature_view(self, name: str, project: str, commit: bool = True): project: Feast project that this feature view belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError # stream feature view operations @abstractmethod @@ -245,6 +259,7 @@ def get_stream_feature_view( Returns either the specified feature view, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_stream_feature_views( @@ -260,6 +275,7 @@ def list_stream_feature_views( Returns: List of stream feature views """ + raise NotImplementedError # on demand feature view operations @abstractmethod @@ -278,6 +294,7 @@ def get_on_demand_feature_view( Returns either the specified on demand feature view, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_on_demand_feature_views( @@ -293,6 +310,7 @@ def list_on_demand_feature_views( Returns: List of on demand feature views """ + raise NotImplementedError # regular feature view operations @abstractmethod @@ -311,6 +329,7 @@ def get_feature_view( Returns either the specified feature view, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_feature_views( @@ -326,6 +345,7 @@ def list_feature_views( Returns: List of feature views """ + raise NotImplementedError # request feature view operations @abstractmethod @@ -344,6 +364,7 @@ def get_request_feature_view( Returns either the specified feature view, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_request_feature_views( @@ -359,6 +380,7 @@ def list_request_feature_views( Returns: List of request feature views """ + raise NotImplementedError @abstractmethod def apply_materialization( @@ -379,6 +401,7 @@ def apply_materialization( end_date (datetime): End date of the materialization interval to track commit: Whether the change should be persisted immediately """ + raise NotImplementedError # Saved dataset operations @abstractmethod @@ -396,6 +419,7 @@ def apply_saved_dataset( project: Feast project that this dataset belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_saved_dataset( @@ -413,6 +437,7 @@ def get_saved_dataset( Returns either the specified SavedDataset, or raises an exception if none is found """ + raise NotImplementedError def delete_saved_dataset(self, name: str, project: str, allow_cache: bool = False): """ @@ -427,6 +452,7 @@ def delete_saved_dataset(self, name: str, project: str, allow_cache: bool = Fals Returns either the specified SavedDataset, or raises an exception if none is found """ + raise NotImplementedError @abstractmethod def list_saved_datasets( @@ -442,6 +468,7 @@ def list_saved_datasets( Returns: Returns the list of SavedDatasets """ + raise NotImplementedError # Validation reference operations @abstractmethod @@ -459,6 +486,7 @@ def apply_validation_reference( project: Feast project that this dataset belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def delete_validation_reference(self, name: str, project: str, commit: bool = True): @@ -470,6 +498,7 @@ def delete_validation_reference(self, name: str, project: str, commit: bool = Tr project: Feast project that this object belongs to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_validation_reference( @@ -487,6 +516,7 @@ def get_validation_reference( Returns either the specified ValidationReference, or raises an exception if none is found """ + raise NotImplementedError # TODO: Needs to be implemented. def list_validation_references( @@ -519,6 +549,7 @@ def list_project_metadata( Returns: List of project metadata """ + raise NotImplementedError @abstractmethod def update_infra(self, infra: Infra, project: str, commit: bool = True): @@ -530,6 +561,7 @@ def update_infra(self, infra: Infra, project: str, commit: bool = True): project: Feast project that the Infra object refers to commit: Whether the change should be persisted immediately """ + raise NotImplementedError @abstractmethod def get_infra(self, project: str, allow_cache: bool = False) -> Infra: @@ -543,6 +575,7 @@ def get_infra(self, project: str, allow_cache: bool = False) -> Infra: Returns: The stored Infra object. """ + raise NotImplementedError @abstractmethod def apply_user_metadata( @@ -567,14 +600,17 @@ def proto(self) -> RegistryProto: Returns: The registry proto object. """ + raise NotImplementedError @abstractmethod def commit(self): """Commits the state of the registry cache to the remote registry store.""" + raise NotImplementedError @abstractmethod def refresh(self, project: Optional[str] = None): """Refreshes the state of the registry cache by fetching the registry state from the remote registry store.""" + raise NotImplementedError @staticmethod def _message_to_sorted_dict(message: Message) -> Dict[str, Any]: diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index c1ebf13d6b8..cdf79c78b5f 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -5,10 +5,9 @@ from datetime import datetime, timedelta from enum import Enum from threading import Lock -from typing import Any, Callable, List, Optional, Set, Union +from typing import Any, Callable, List, Literal, Optional, Set, Union -from pydantic import Field, StrictStr -from pydantic.schema import Literal +from pydantic import ConfigDict, Field, StrictStr import feast from feast import usage @@ -103,9 +102,7 @@ class SnowflakeRegistryConfig(RegistryConfig): schema_: Optional[str] = Field("PUBLIC", alias="schema") """ Snowflake schema name """ - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class SnowflakeRegistry(BaseRegistry): diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index 8eb5177ac23..8548e4dbd86 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -43,11 +43,7 @@ class GetSnowflakeConnection: - def __init__( - self, - config: str, - autocommit=True, - ): + def __init__(self, config: Any, autocommit=True): self.config = config self.autocommit = autocommit diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 3461ae058bd..c69bb4d1e72 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -2,20 +2,19 @@ import os import warnings from pathlib import Path -from typing import Any +from typing import Any, Dict, Optional import yaml from pydantic import ( BaseModel, + ConfigDict, Field, StrictInt, StrictStr, ValidationError, - root_validator, - validator, + field_validator, + model_validator, ) -from pydantic.error_wrappers import ErrorWrapper -from pydantic.typing import Dict, Optional from feast.errors import ( FeastFeatureServerTypeInvalidError, @@ -93,17 +92,13 @@ class FeastBaseModel(BaseModel): """Feast Pydantic Configuration Class""" - class Config: - arbitrary_types_allowed = True - extra = "allow" + model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow") class FeastConfigBaseModel(BaseModel): """Feast Pydantic Configuration Class""" - class Config: - arbitrary_types_allowed = True - extra = "forbid" + model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid") class RegistryConfig(FeastBaseModel): @@ -112,7 +107,7 @@ class RegistryConfig(FeastBaseModel): registry_type: StrictStr = "file" """ str: Provider name or a class name that implements Registry.""" - registry_store_type: Optional[StrictStr] + registry_store_type: Optional[StrictStr] = None """ str: Provider name or a class name that implements RegistryStore. """ path: StrictStr = "" @@ -126,7 +121,7 @@ class RegistryConfig(FeastBaseModel): set to infinity by setting TTL to 0 seconds, which means the cache will only be loaded once and will never expire. Users can manually refresh the cache by calling feature_store.refresh_registry() """ - s3_additional_kwargs: Optional[Dict[str, str]] + s3_additional_kwargs: Optional[Dict[str, str]] = None """ Dict[str, str]: Extra arguments to pass to boto3 when writing the registry file to S3. """ @@ -142,7 +137,7 @@ class RepoConfig(FeastBaseModel): provider: StrictStr """ str: local or gcp or aws """ - _registry_config: Any = Field(alias="registry", default="data/registry.db") + registry_config: Any = Field(alias="registry", default="data/registry.db") """ Configures the registry. Can be: 1. str: a path to a file based registry (a local path, or remote object storage path, e.g. a GCS URI) @@ -150,19 +145,19 @@ class RepoConfig(FeastBaseModel): 3. SnowflakeRegistryConfig: Using a Snowflake table to store the registry """ - _online_config: Any = Field(alias="online_store") + online_config: Any = Field(None, alias="online_store") """ OnlineStoreConfig: Online store configuration (optional depending on provider) """ - _offline_config: Any = Field(alias="offline_store") + offline_config: Any = Field(None, alias="offline_store") """ OfflineStoreConfig: Offline store configuration (optional depending on provider) """ - _batch_engine_config: Any = Field(alias="batch_engine") + batch_engine_config: Any = Field(None, alias="batch_engine") """ BatchMaterializationEngine: Batch materialization configuration (optional depending on provider)""" - feature_server: Optional[Any] + feature_server: Optional[Any] = None """ FeatureServerConfig: Feature server configuration (optional depending on provider) """ - flags: Any + flags: Any = None """ Flags (deprecated field): Feature flags for experimental features """ repo_path: Optional[Path] = None @@ -187,42 +182,42 @@ def __init__(self, **data: Any): self._registry = None if "registry" not in data: raise FeastRegistryNotSetError() - self._registry_config = data["registry"] + self.registry_config = data["registry"] self._offline_store = None if "offline_store" in data: - self._offline_config = data["offline_store"] + self.offline_config = data["offline_store"] else: if data["provider"] == "local": - self._offline_config = "file" + self.offline_config = "file" elif data["provider"] == "gcp": - self._offline_config = "bigquery" + self.offline_config = "bigquery" elif data["provider"] == "aws": - self._offline_config = "redshift" + self.offline_config = "redshift" elif data["provider"] == "azure": - self._offline_config = "mssql" + self.offline_config = "mssql" self._online_store = None if "online_store" in data: - self._online_config = data["online_store"] + self.online_config = data["online_store"] else: if data["provider"] == "local": - self._online_config = "sqlite" + self.online_config = "sqlite" elif data["provider"] == "gcp": - self._online_config = "datastore" + self.online_config = "datastore" elif data["provider"] == "aws": - self._online_config = "dynamodb" + self.online_config = "dynamodb" elif data["provider"] == "rockset": - self._online_config = "rockset" + self.online_config = "rockset" self._batch_engine = None if "batch_engine" in data: - self._batch_engine_config = data["batch_engine"] + self.batch_engine_config = data["batch_engine"] elif "batch_engine_config" in data: - self._batch_engine_config = data["batch_engine_config"] + self.batch_engine_config = data["batch_engine_config"] else: # Defaults to using local in-process materialization engine. - self._batch_engine_config = "local" + self.batch_engine_config = "local" if isinstance(self.feature_server, Dict): self.feature_server = get_feature_server_config_from_type( @@ -242,71 +237,71 @@ def __init__(self, **data: Any): @property def registry(self): if not self._registry: - if isinstance(self._registry_config, Dict): - if "registry_type" in self._registry_config: + if isinstance(self.registry_config, Dict): + if "registry_type" in self.registry_config: self._registry = get_registry_config_from_type( - self._registry_config["registry_type"] - )(**self._registry_config) + self.registry_config["registry_type"] + )(**self.registry_config) else: # This may be a custom registry store, which does not need a 'registry_type' - self._registry = RegistryConfig(**self._registry_config) - elif isinstance(self._registry_config, str): + self._registry = RegistryConfig(**self.registry_config) + elif isinstance(self.registry_config, str): # User passed in just a path to file registry self._registry = get_registry_config_from_type("file")( - path=self._registry_config + path=self.registry_config ) - elif self._registry_config: - self._registry = self._registry_config + elif self.registry_config: + self._registry = self.registry_config return self._registry @property def offline_store(self): if not self._offline_store: - if isinstance(self._offline_config, Dict): + if isinstance(self.offline_config, Dict): self._offline_store = get_offline_config_from_type( - self._offline_config["type"] - )(**self._offline_config) - elif isinstance(self._offline_config, str): + self.offline_config["type"] + )(**self.offline_config) + elif isinstance(self.offline_config, str): self._offline_store = get_offline_config_from_type( - self._offline_config + self.offline_config )() - elif self._offline_config: - self._offline_store = self._offline_config + elif self.offline_config: + self._offline_store = self.offline_config return self._offline_store @property def online_store(self): if not self._online_store: - if isinstance(self._online_config, Dict): + if isinstance(self.online_config, Dict): self._online_store = get_online_config_from_type( - self._online_config["type"] - )(**self._online_config) - elif isinstance(self._online_config, str): - self._online_store = get_online_config_from_type(self._online_config)() - elif self._online_config: - self._online_store = self._online_config + self.online_config["type"] + )(**self.online_config) + elif isinstance(self.online_config, str): + self._online_store = get_online_config_from_type(self.online_config)() + elif self.online_config: + self._online_store = self.online_config return self._online_store @property def batch_engine(self): if not self._batch_engine: - if isinstance(self._batch_engine_config, Dict): + if isinstance(self.batch_engine_config, Dict): self._batch_engine = get_batch_engine_config_from_type( - self._batch_engine_config["type"] - )(**self._batch_engine_config) - elif isinstance(self._batch_engine_config, str): + self.batch_engine_config["type"] + )(**self.batch_engine_config) + elif isinstance(self.batch_engine_config, str): self._batch_engine = get_batch_engine_config_from_type( - self._batch_engine_config + self.batch_engine_config )() - elif self._batch_engine_config: + elif self.batch_engine_config: self._batch_engine = self._batch_engine return self._batch_engine - @root_validator(pre=True) + @model_validator(mode="before") @log_exceptions - def _validate_online_store_config(cls, values): + def _validate_online_store_config(cls, values: Any) -> Any: # This method will validate whether the online store configurations are set correctly. This explicit validation # is necessary because Pydantic Unions throw very verbose and cryptic exceptions. We also use this method to # impute the default online store type based on the selected provider. For the time being this method should be @@ -347,14 +342,12 @@ def _validate_online_store_config(cls, values): online_config_class = get_online_config_from_type(online_store_type) online_config_class(**values["online_store"]) except ValidationError as e: - raise ValidationError( - [ErrorWrapper(e, loc="online_store")], - model=RepoConfig, - ) + raise e return values - @root_validator(pre=True) - def _validate_offline_store_config(cls, values): + @model_validator(mode="before") + @classmethod + def _validate_offline_store_config(cls, values: Any) -> Any: # Set empty offline_store config if it isn't set explicitly if "offline_store" not in values: values["offline_store"] = dict() @@ -385,15 +378,13 @@ def _validate_offline_store_config(cls, values): offline_config_class = get_offline_config_from_type(offline_store_type) offline_config_class(**values["offline_store"]) except ValidationError as e: - raise ValidationError( - [ErrorWrapper(e, loc="offline_store")], - model=RepoConfig, - ) + raise e return values - @root_validator(pre=True) - def _validate_feature_server_config(cls, values): + @model_validator(mode="before") + @classmethod + def _validate_feature_server_config(cls, values: Any) -> Any: # Having no feature server is the default. if "feature_server" not in values: return values @@ -420,15 +411,13 @@ def _validate_feature_server_config(cls, values): ) feature_server_config_class(**values["feature_server"]) except ValidationError as e: - raise ValidationError( - [ErrorWrapper(e, loc="feature_server")], - model=RepoConfig, - ) + raise e return values - @validator("project") - def _validate_project_name(cls, v): + @field_validator("project") + @classmethod + def _validate_project_name(cls, v: str) -> str: from feast.repo_operations import is_valid_name if not is_valid_name(v): @@ -438,10 +427,11 @@ def _validate_project_name(cls, v): ) return v - @validator("flags") - def _validate_flags(cls, v): - if not isinstance(v, Dict): - return + @field_validator("flags") + @classmethod + def _validate_flags(cls, v: Optional[dict]) -> Optional[dict]: + if not isinstance(v, dict): + return v _logger.warning( "Flags are no longer necessary in Feast. Experimental features will log warnings instead." @@ -463,8 +453,7 @@ def write_to_path(self, repo_path: Path): sort_keys=False, ) - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class FeastConfigError(Exception): diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index ffb4662eb15..34d0b0c2846 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -4,11 +4,12 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.10-ci-requirements.txt # - alabaster==0.7.16 # via sphinx -altair==4.2.0 +altair==4.2.2 # via great-expectations +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -225,6 +226,10 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via + # feast (setup.py) + # google-cloud-bigquery +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -259,7 +264,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.15.50 +great-expectations==0.18.8 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -326,7 +331,6 @@ importlib-metadata==6.11.0 # via # dask # feast (setup.py) - # great-expectations importlib-resources==6.1.1 # via feast (setup.py) iniconfig==2.0.0 @@ -630,11 +634,13 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) # great-expectations +pydantic-core==2.16.2 + # via pydantic pyflakes==3.0.1 # via flake8 pygments==2.17.2 @@ -927,6 +933,7 @@ typing-extensions==4.9.0 # great-expectations # mypy # pydantic + # pydantic-core # snowflake-connector-python # sqlalchemy2-stubs # uvicorn diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index d38a287d720..ba474f61205 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -4,7 +4,8 @@ # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt # - +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -140,10 +141,12 @@ protobuf==4.23.3 # proto-plus pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) +pydantic-core==2.16.2 + # via pydantic pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 @@ -176,7 +179,9 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.51 - # via feast (setup.py) + # via + # feast (setup.py) + # sqlalchemy sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy starlette==0.36.3 @@ -205,12 +210,15 @@ typing-extensions==4.9.0 # fastapi # mypy # pydantic + # pydantic-core # sqlalchemy2-stubs # uvicorn urllib3==2.2.0 # via requests uvicorn[standard]==0.27.1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 33dd89c362e..bf8f4fbc425 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -7,8 +7,10 @@ alabaster==0.7.13 # via sphinx -altair==4.2.0 +altair==4.2.2 # via great-expectations +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -230,6 +232,10 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via + # feast (setup.py) + # google-cloud-bigquery +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -264,7 +270,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.15.50 +great-expectations==0.18.8 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -332,7 +338,6 @@ importlib-metadata==6.11.0 # build # dask # feast (setup.py) - # great-expectations # jupyter-client # jupyter-lsp # jupyterlab @@ -650,11 +655,13 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) # great-expectations +pydantic-core==2.16.2 + # via pydantic pyflakes==3.0.1 # via flake8 pygments==2.17.2 @@ -952,6 +959,7 @@ typing-extensions==4.9.0 # ipython # mypy # pydantic + # pydantic-core # snowflake-connector-python # sqlalchemy2-stubs # starlette diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 388bb3143f3..5e8481e7706 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,7 +4,8 @@ # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt # - +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -145,10 +146,12 @@ protobuf==4.23.3 # proto-plus pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) +pydantic-core==2.16.2 + # via pydantic pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 @@ -210,13 +213,16 @@ typing-extensions==4.9.0 # fastapi # mypy # pydantic + # pydantic-core # sqlalchemy2-stubs # starlette # uvicorn urllib3==2.2.0 # via requests uvicorn[standard]==0.27.1 - # via feast (setup.py) + # via + # feast (setup.py) + # uvicorn uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 9cb322d2f60..670ba1c07d6 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -4,11 +4,12 @@ # # pip-compile --extra=ci --output-file=sdk/python/requirements/py3.9-ci-requirements.txt # - alabaster==0.7.16 # via sphinx -altair==4.2.0 +altair==4.2.2 # via great-expectations +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -225,6 +226,10 @@ google-auth==2.27.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via + # feast (setup.py) + # google-cloud-bigquery +google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) google-cloud-bigquery-storage==2.24.0 # via feast (setup.py) @@ -259,7 +264,7 @@ googleapis-common-protos[grpc]==1.62.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.15.50 +great-expectations==0.18.8 # via feast (setup.py) greenlet==3.0.3 # via sqlalchemy @@ -327,7 +332,6 @@ importlib-metadata==6.11.0 # build # dask # feast (setup.py) - # great-expectations # jupyter-client # jupyter-lsp # jupyterlab @@ -637,11 +641,13 @@ pycodestyle==2.10.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) # great-expectations +pydantic-core==2.16.2 + # via pydantic pyflakes==3.0.1 # via flake8 pygments==2.17.2 @@ -938,6 +944,7 @@ typing-extensions==4.9.0 # ipython # mypy # pydantic + # pydantic-core # snowflake-connector-python # sqlalchemy2-stubs # starlette diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 012dac6f81f..2815ed0d787 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -4,7 +4,8 @@ # # pip-compile --output-file=sdk/python/requirements/py3.9-requirements.txt # - +annotated-types==0.6.0 + # via pydantic anyio==4.2.0 # via # httpx @@ -140,10 +141,12 @@ protobuf==4.23.3 # proto-plus pyarrow==15.0.0 # via feast (setup.py) -pydantic==1.10.14 +pydantic==2.6.1 # via # fastapi # feast (setup.py) +pydantic-core==2.16.2 + # via pydantic pygments==2.17.2 # via feast (setup.py) python-dateutil==2.8.2 @@ -205,6 +208,7 @@ typing-extensions==4.9.0 # fastapi # mypy # pydantic + # pydantic-core # sqlalchemy2-stubs # starlette # uvicorn diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 728bd9b34f7..743a1ce4a0f 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -18,7 +18,7 @@ from datetime import datetime, timedelta from multiprocessing import Process from sys import platform -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Tuple, no_type_check import pandas as pd import pytest @@ -187,9 +187,10 @@ def environment(request, worker_id): e.online_store_creator.teardown() -_config_cache = {} +_config_cache: Any = {} +@no_type_check def pytest_generate_tests(metafunc: pytest.Metafunc): """ This function receives each test function (wrapped in Metafunc) diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 027dea2c582..f745bafa132 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -99,7 +99,7 @@ "host": os.getenv("ROCKSET_APISERVER", "api.rs2.usw2.rockset.com"), } -OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = { +OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, Tuple[str, Type[DataSourceCreator]]] = { "file": ("local", FileDataSourceCreator), "bigquery": ("gcp", BigQueryDataSourceCreator), "redshift": ("aws", RedshiftDataSourceCreator), @@ -111,7 +111,7 @@ ] AVAILABLE_ONLINE_STORES: Dict[ - str, Tuple[Union[str, Dict[str, str]], Optional[Type[OnlineStoreCreator]]] + str, Tuple[Union[str, Dict[Any, Any]], Optional[Type[OnlineStoreCreator]]] ] = { "sqlite": ({"type": "sqlite"}, None), } @@ -169,7 +169,7 @@ AVAILABLE_ONLINE_STORES = { c.online_store["type"] if isinstance(c.online_store, dict) - else c.online_store: (c.online_store, c.online_store_creator) + else c.online_store: (c.online_store, c.online_store_creator) # type: ignore for c in FULL_REPO_CONFIGS } @@ -328,7 +328,7 @@ class UniversalFeatureViews: customer: FeatureView global_fv: FeatureView driver: FeatureView - driver_odfv: OnDemandFeatureView + driver_odfv: Optional[OnDemandFeatureView] order: FeatureView location: FeatureView field_mapping: FeatureView @@ -410,9 +410,7 @@ def construct_test_environment( online_creator = test_repo_config.online_store_creator( project, fixture_request=fixture_request ) - online_store = ( - test_repo_config.online_store - ) = online_creator.create_online_store() + online_store = online_creator.create_online_store() else: online_creator = None online_store = test_repo_config.online_store @@ -422,7 +420,7 @@ def construct_test_environment( AwsLambdaFeatureServerConfig, ) - feature_server = AwsLambdaFeatureServerConfig( + feature_server: Any = AwsLambdaFeatureServerConfig( enabled=True, execution_role_name=os.getenv( "AWS_LAMBDA_ROLE", @@ -465,7 +463,7 @@ def construct_test_environment( # Create feature_store.yaml out of the config with open(Path(repo_dir_name) / "feature_store.yaml", "w") as f: - yaml.safe_dump(json.loads(config.json()), f) + yaml.safe_dump(json.loads(config.model_dump_json(by_alias=True)), f) fs = FeatureStore(repo_dir_name) # We need to initialize the registry, because if nothing is applied in the test before tearing down diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index d64463606ff..5e5062291d5 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -42,19 +42,20 @@ def create_data_source( A Data source object, pointing to a table or file that is uploaded/persisted for the purpose of the test. """ - ... + raise NotImplementedError @abstractmethod def create_offline_store_config(self) -> FeastConfigBaseModel: - ... + raise NotImplementedError @abstractmethod def create_saved_dataset_destination(self) -> SavedDatasetStorage: - ... + raise NotImplementedError + @abstractmethod def create_logged_features_destination(self) -> LoggingDestination: raise NotImplementedError @abstractmethod def teardown(self): - ... + raise NotImplementedError diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py index 215d19ba7f3..066497a0bcd 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py @@ -64,10 +64,9 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - timestamp_field="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, - **kwargs, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 3263785683e..008bb8d8815 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -39,9 +39,9 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - timestamp_field="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -94,9 +94,9 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - timestamp_field="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -167,11 +167,10 @@ def _upload_parquet_file(self, df, file_name, minio_endpoint): def create_data_source( self, df: pd.DataFrame, - destination_name: Optional[str] = None, - suffix: Optional[str] = None, - timestamp_field="ts", + destination_name: str, created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: filename = f"{destination_name}.parquet" port = self.minio.get_exposed_port("9000") diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index e6f20d6125b..5a4e3f10854 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -42,16 +42,17 @@ def __init__(self, project_name: str, *args, **kwargs): iam_role=os.getenv( "AWS_IAM_ROLE", "arn:aws:iam::402087665549:role/redshift_s3_access_role" ), + workgroup="", ) def create_data_source( self, df: pd.DataFrame, destination_name: str, - suffix: Optional[str] = None, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index 1414291a18d..1481b11a106 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -48,10 +48,10 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - suffix: Optional[str] = None, - timestamp_field="ts", + event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, + timestamp_field: Optional[str] = "ts", ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 5938a0c936e..9bb8aae77f3 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -14,6 +14,7 @@ StreamFeatureView, ) from feast.data_source import DataSource, RequestSource +from feast.feature_view_projection import FeatureViewProjection from feast.types import Array, FeastType, Float32, Float64, Int32, Int64 from tests.integration.feature_repos.universal.entities import ( customer, @@ -55,7 +56,7 @@ def conv_rate_plus_100(features_df: pd.DataFrame) -> pd.DataFrame: def conv_rate_plus_100_feature_view( - sources: Dict[str, Union[RequestSource, FeatureView]], + sources: List[Union[FeatureView, RequestSource, FeatureViewProjection]], infer_features: bool = False, features: Optional[List[Field]] = None, ) -> OnDemandFeatureView: diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py index 10a81437395..4932001e76f 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py @@ -1,4 +1,4 @@ -from abc import ABC +from abc import ABC, abstractmethod from feast.repo_config import FeastConfigBaseModel @@ -10,5 +10,6 @@ def __init__(self, project_name: str, **kwargs): def create_online_store(self) -> FeastConfigBaseModel: raise NotImplementedError + @abstractmethod def teardown(self): raise NotImplementedError diff --git a/sdk/python/tests/unit/cli/test_cli_chdir.py b/sdk/python/tests/unit/cli/test_cli_chdir.py index cf1d0312272..12ca8f6b084 100644 --- a/sdk/python/tests/unit/cli/test_cli_chdir.py +++ b/sdk/python/tests/unit/cli/test_cli_chdir.py @@ -15,7 +15,7 @@ def test_cli_chdir() -> None: # Make sure the path is absolute by resolving any symlinks temp_path = Path(temp_dir).resolve() result = runner.run(["init", "my_project"], cwd=temp_path) - repo_path = temp_path / "my_project" / "feature_repo" + repo_path = str(temp_path / "my_project" / "feature_repo") assert result.returncode == 0 result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_path) @@ -44,7 +44,12 @@ def test_cli_chdir() -> None: assert result.returncode == 0 result = runner.run( - ["--chdir", repo_path, "materialize-incremental", end_date.isoformat()], + [ + "--chdir", + repo_path, + "materialize-incremental", + end_date.isoformat(), + ], cwd=temp_path, ) assert result.returncode == 0 diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py index 220bdba0dae..f93237fce5e 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -61,12 +61,12 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: return pyarrow.Table() @property - def full_feature_names(self) -> bool: + def full_feature_names(self) -> bool: # type: ignore """Returns True if full feature names should be applied to the results of the query.""" return False @property - def on_demand_feature_views(self) -> List[OnDemandFeatureView]: + def on_demand_feature_views(self) -> List[OnDemandFeatureView]: # type: ignore """Returns a list containing all the on demand feature views to be handled.""" return [] @@ -118,6 +118,7 @@ def retrieval_job(request, environment): database="feast", s3_staging_location="s3://feast-integration-tests/redshift/tests/ingestion", iam_role="arn:aws:iam::402087665549:role/redshift_s3_access_role", + workgroup="", ) environment.test_repo_config.offline_store = offline_store_config return RedshiftRetrievalJob( diff --git a/sdk/python/tests/unit/infra/offline_stores/test_redshift.py b/sdk/python/tests/unit/infra/offline_stores/test_redshift.py index 049977489b9..48ee99e89ff 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_redshift.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_redshift.py @@ -31,6 +31,7 @@ def test_offline_write_batch( user="user", iam_role="abcdef", s3_staging_location="s3://bucket/path", + workgroup="", ), ) diff --git a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py index 42229f8683f..ca4ed6472b9 100644 --- a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py +++ b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py @@ -45,8 +45,7 @@ def test_nullable_online_store_aws(): entity_key_serialization_version: 2 """ ), - expect_error="__root__ -> offline_store -> __root__\n" - " please specify either cluster_id & user if using provisioned clusters, or workgroup if using serverless (type=value_error)", + expect_error="4 validation errors for RepoConfig\nregion\n Field required", ) @@ -154,8 +153,7 @@ def test_extra_field(): path: "online_store.db" """ ), - expect_error="__root__ -> online_store -> that_field_should_not_be_here\n" - " extra fields not permitted (type=value_error.extra)", + expect_error="1 validation error for RepoConfig\nthat_field_should_not_be_here\n Extra inputs are not permitted", ) @@ -186,7 +184,7 @@ def test_bad_type(): path: 100500 """ ), - expect_error="__root__ -> online_store -> path\n str type expected", + expect_error="1 validation error for RepoConfig\npath\n Input should be a valid string", ) @@ -201,9 +199,7 @@ def test_no_project(): entity_key_serialization_version: 2 """ ), - expect_error="1 validation error for RepoConfig\n" - "project\n" - " field required (type=value_error.missing)", + expect_error="1 validation error for RepoConfig\nproject\n Field required", ) diff --git a/sdk/python/tests/utils/e2e_test_validation.py b/sdk/python/tests/utils/e2e_test_validation.py index bacc8c17206..d8c769f12c9 100644 --- a/sdk/python/tests/utils/e2e_test_validation.py +++ b/sdk/python/tests/utils/e2e_test_validation.py @@ -193,7 +193,7 @@ def make_feature_store_yaml( repo_path=str(Path(repo_dir_name)), entity_key_serialization_version=2, ) - config_dict = config.dict() + config_dict = config.model_dump(by_alias=True) if ( isinstance(config_dict["online_store"], dict) and "redis_type" in config_dict["online_store"] diff --git a/setup.py b/setup.py index ebc4df31a85..a73ef31b061 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ "protobuf<4.23.4,>3.20", "proto-plus>=1.20.0,<2", "pyarrow>=4", - "pydantic>=1,<2", + "pydantic>=2.0.0", "pygments>=2.12.0,<3", "PyYAML>=5.4.0,<7", "requests", @@ -126,7 +126,7 @@ "cassandra-driver>=3.24.0,<4", ] -GE_REQUIRED = ["great_expectations>=0.15.41,<0.16.0"] +GE_REQUIRED = ["great_expectations>=0.15.41"] AZURE_REQUIRED = [ "azure-storage-blob>=0.37.0", From dd96150e2a5829401f793a51da4b3594677e570d Mon Sep 17 00:00:00 2001 From: Tyler Rhodes <767526+trhodeos@users.noreply.github.com> Date: Sat, 17 Feb 2024 11:41:43 -0600 Subject: [PATCH 26/30] fix: Update typeguard version to >=4.0.0 (#3837) --- sdk/python/requirements/py3.10-ci-requirements.txt | 2 +- sdk/python/requirements/py3.10-requirements.txt | 2 +- sdk/python/requirements/py3.8-ci-requirements.txt | 2 +- sdk/python/requirements/py3.8-requirements.txt | 2 +- sdk/python/requirements/py3.9-ci-requirements.txt | 2 +- sdk/python/requirements/py3.9-requirements.txt | 2 +- sdk/python/tests/unit/test_feature_views.py | 3 ++- setup.py | 2 +- 8 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 34d0b0c2846..f20bc05df90 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -895,7 +895,7 @@ traitlets==5.14.1 # nbformat trino==0.327.0 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==3.19.22 # via diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index ba474f61205..3943662d010 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -200,7 +200,7 @@ toolz==0.12.1 # partd tqdm==4.66.2 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==4.24.0.20240129 # via mypy-protobuf diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index bf8f4fbc425..afa43ec2a2b 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -919,7 +919,7 @@ traitlets==5.14.1 # nbformat trino==0.327.0 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==3.19.22 # via diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 5e8481e7706..079064a9ecc 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -203,7 +203,7 @@ toolz==0.12.1 # partd tqdm==4.66.2 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==4.24.0.20240129 # via mypy-protobuf diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 670ba1c07d6..6c26f889e27 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -904,7 +904,7 @@ traitlets==5.14.1 # nbformat trino==0.327.0 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==3.19.22 # via diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 2815ed0d787..182cb7ad076 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -198,7 +198,7 @@ toolz==0.12.1 # partd tqdm==4.66.2 # via feast (setup.py) -typeguard==2.13.3 +typeguard==4.1.5 # via feast (setup.py) types-protobuf==4.24.0.20240129 # via mypy-protobuf diff --git a/sdk/python/tests/unit/test_feature_views.py b/sdk/python/tests/unit/test_feature_views.py index afef332d372..20863645b77 100644 --- a/sdk/python/tests/unit/test_feature_views.py +++ b/sdk/python/tests/unit/test_feature_views.py @@ -1,6 +1,7 @@ from datetime import timedelta import pytest +from typeguard import TypeCheckError from feast.aggregation import Aggregation from feast.batch_feature_view import BatchFeatureView @@ -278,7 +279,7 @@ def test_hash(): def test_field_types(): - with pytest.raises(TypeError): + with pytest.raises(TypeCheckError): Field(name="name", dtype=ValueType.INT32) diff --git a/setup.py b/setup.py index a73ef31b061..c14d64557a2 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ "tenacity>=7,<9", "toml>=0.10.0,<1", "tqdm>=4,<5", - "typeguard==2.13.3", + "typeguard>=4.0.0", "fastapi>=0.68.0", "uvicorn[standard]>=0.14.0,<1", "gunicorn", From dd79dbbac90caaf0617a5046c84a2618e532980b Mon Sep 17 00:00:00 2001 From: Shuchu Han Date: Sat, 17 Feb 2024 22:30:13 -0500 Subject: [PATCH 27/30] fix: Fix typo as the cli does not support shortcut-f option. (#3954) * fix: Fix typo as the cli does not support shortcut-f option. Signed-off-by: Shuchu Han * fix: add -f option as a shortcut of feature-store-yaml. Signed-off-by: Shuchu Han --------- Signed-off-by: Shuchu Han --- sdk/python/feast/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 985c44b821f..7ce8aaef2bc 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -76,6 +76,7 @@ def format_options(self, ctx: click.Context, formatter: click.HelpFormatter): ) @click.option( "--feature-store-yaml", + "-f", help="Override the directory where the CLI should look for the feature_store.yaml file.", ) @click.pass_context From 6b8e96c982a50587a13216666085fc61494cdfc9 Mon Sep 17 00:00:00 2001 From: Chester Date: Tue, 20 Feb 2024 09:38:52 +0800 Subject: [PATCH 28/30] fix: Revert mypy config (#3952) * fetch_arrow_all returns empty table Signed-off-by: Chester Ong * fix spark_kafka_processor typing errors Signed-off-by: Chester Ong * fix correct return type Signed-off-by: Chester Ong * revert _to_arrow_internal Signed-off-by: Chester Ong * revert kafkaStreamProcessor changes, change base type instead Signed-off-by: Chester Ong --------- Signed-off-by: Chester Ong --- Makefile | 2 +- .../feast/infra/contrib/stream_processor.py | 8 +++--- .../tests/data_source.py | 27 +++++++++++-------- .../feast/infra/offline_stores/snowflake.py | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 6736e64078f..1598664f83f 100644 --- a/Makefile +++ b/Makefile @@ -310,7 +310,7 @@ format-python: cd ${ROOT_DIR}/sdk/python; python -m black --target-version py38 feast tests lint-python: - cd ${ROOT_DIR}/sdk/python; python -m mypy --exclude=/tests/ --follow-imports=skip feast + cd ${ROOT_DIR}/sdk/python; python -m mypy feast cd ${ROOT_DIR}/sdk/python; python -m isort feast/ tests/ --check-only cd ${ROOT_DIR}/sdk/python; python -m flake8 feast/ tests/ cd ${ROOT_DIR}/sdk/python; python -m black --check feast tests diff --git a/sdk/python/feast/infra/contrib/stream_processor.py b/sdk/python/feast/infra/contrib/stream_processor.py index c4620f4ca1d..3f1fe085109 100644 --- a/sdk/python/feast/infra/contrib/stream_processor.py +++ b/sdk/python/feast/infra/contrib/stream_processor.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from types import MethodType -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional from pyspark.sql import DataFrame from typing_extensions import TypeAlias @@ -51,7 +51,9 @@ def __init__( self.data_source = data_source @abstractmethod - def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: + def ingest_stream_feature_view( + self, to: PushMode = PushMode.ONLINE + ) -> Optional[Any]: """ Ingests data from the stream source attached to the stream feature view; transforms the data and then persists it to the online store and/or offline store, depending on the 'to' parameter. @@ -75,7 +77,7 @@ def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: raise NotImplementedError @abstractmethod - def _write_stream_data(self, table: StreamTable, to: PushMode) -> None: + def _write_stream_data(self, table: StreamTable, to: PushMode) -> Optional[Any]: """ Launches a job to persist stream data to the online store and/or offline store, depending on the 'to' parameter, and returns a handle for the job. diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index 46d5c20e977..f50cdc4c41f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -1,5 +1,5 @@ import logging -from typing import Dict, Optional +from typing import Dict, Literal, Optional import pandas as pd import pytest @@ -12,6 +12,7 @@ PostgreSQLSource, ) from feast.infra.utils.postgres.connection_utils import df_to_postgres_table +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) @@ -26,6 +27,10 @@ POSTGRES_DB = "test" +class PostgreSQLOnlineStoreConfig(PostgreSQLConfig): + type: Literal["postgres"] = "postgres" + + @pytest.fixture(scope="session") def postgres_container(): container = ( @@ -106,17 +111,17 @@ def create_offline_store_config(self) -> PostgreSQLOfflineStoreConfig: def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}_{suffix}" - def create_online_store(self) -> Dict[str, str]: + def create_online_store(self) -> PostgreSQLOnlineStoreConfig: assert self.container - return { - "type": "postgres", - "host": "localhost", - "port": self.container.get_exposed_port(5432), - "database": POSTGRES_DB, - "db_schema": "feature_store", - "user": POSTGRES_USER, - "password": POSTGRES_PASSWORD, - } + return PostgreSQLOnlineStoreConfig( + type="postgres", + host="localhost", + port=self.container.get_exposed_port(5432), + database=POSTGRES_DB, + db_schema="feature_store", + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + ) def create_saved_dataset_destination(self): # FIXME: ... diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 66e7e78651d..32cda2d6b65 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -470,7 +470,7 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: pa_table = execute_snowflake_statement( self.snowflake_conn, self.to_sql() - ).fetch_arrow_all() + ).fetch_arrow_all(force_return_table=False) if pa_table: return pa_table From b83a70227c6afe7258328ff5847a26b526d0b5df Mon Sep 17 00:00:00 2001 From: Chester Date: Tue, 20 Feb 2024 09:42:46 +0800 Subject: [PATCH 29/30] fix: Using version args to install the correct feast version (#3953) * using version args to install the correct feast version Signed-off-by: Chester Ong * revert the COPY command Signed-off-by: Chester Ong --------- Signed-off-by: Chester Ong --- .../feast/infra/feature_servers/multicloud/Dockerfile | 7 +++++-- .../feast/infra/feature_servers/multicloud/Dockerfile.dev | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile index fdd8e3ac51d..4527c5b1566 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile @@ -1,5 +1,9 @@ FROM python:3.8 +# Input the feast version to install +# This requires feast package to be available in pypi before building this image +ARG VERSION + RUN apt update && \ apt install -y \ jq \ @@ -7,8 +11,7 @@ RUN apt update && \ build-essential RUN pip install pip --upgrade -RUN pip install "feast[aws,gcp,snowflake,redis,go,mysql,postgres]" - +RUN pip install "feast[aws,gcp,snowflake,redis,go,mysql,postgres]==${VERSION}" RUN apt update RUN apt install -y -V ca-certificates lsb-release wget diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev index 3fc1355d7a8..015e3c7ee82 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev @@ -1,5 +1,9 @@ FROM python:3.8 +# Input the feast version to install +# This requires feast package to be available in pypi before building this image +ARG VERSION + RUN apt update && \ apt install -y \ jq \ @@ -9,11 +13,11 @@ RUN apt update && \ RUN pip install pip --upgrade COPY . . -RUN pip install "feast[aws,gcp,snowflake,redis,go,mysql,postgres]" +RUN pip install "feast[aws,gcp,snowflake,redis,go,mysql,postgres]==${VERSION}" RUN apt update RUN apt install -y -V ca-certificates lsb-release wget RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb RUN apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb RUN apt update -RUN apt -y install libarrow-dev \ No newline at end of file +RUN apt -y install libarrow-dev From 1cc94f2d23f88e0d9412b2fab8761abc81f5d35c Mon Sep 17 00:00:00 2001 From: John Lemmon <137814163+JohnLemmonMedely@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:50:37 -0600 Subject: [PATCH 30/30] fix: Bugfix for grabbing historical data from Snowflake with array type features. (#3964) Bugfix for grabbing historical data from Snowflake with array type features that are null for an entity. Update docs to reflect array support in Snowflake Signed-off-by: john.lemmon --- docs/reference/data-sources/overview.md | 20 +++++++------- docs/reference/data-sources/snowflake.md | 2 +- .../feast/infra/offline_stores/snowflake.py | 4 ++- .../infra/offline_stores/test_snowflake.py | 26 +++++++++++++++++++ 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/docs/reference/data-sources/overview.md b/docs/reference/data-sources/overview.md index 112d4168d30..302c19b049c 100644 --- a/docs/reference/data-sources/overview.md +++ b/docs/reference/data-sources/overview.md @@ -19,13 +19,13 @@ Details for each specific data source can be found [here](README.md). Below is a matrix indicating which data sources support which types. | | File | BigQuery | Snowflake | Redshift | Postgres | Spark | Trino | -| :-------------------------------- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | -| `bytes` | yes | yes | yes | yes | yes | yes | yes | -| `string` | yes | yes | yes | yes | yes | yes | yes | -| `int32` | yes | yes | yes | yes | yes | yes | yes | -| `int64` | yes | yes | yes | yes | yes | yes | yes | -| `float32` | yes | yes | yes | yes | yes | yes | yes | -| `float64` | yes | yes | yes | yes | yes | yes | yes | -| `bool` | yes | yes | yes | yes | yes | yes | yes | -| `timestamp` | yes | yes | yes | yes | yes | yes | yes | -| array types | yes | yes | no | no | yes | yes | no | \ No newline at end of file +| :-------------------------------- | :-- | :-- |:----------| :-- | :-- | :-- | :-- | +| `bytes` | yes | yes | yes | yes | yes | yes | yes | +| `string` | yes | yes | yes | yes | yes | yes | yes | +| `int32` | yes | yes | yes | yes | yes | yes | yes | +| `int64` | yes | yes | yes | yes | yes | yes | yes | +| `float32` | yes | yes | yes | yes | yes | yes | yes | +| `float64` | yes | yes | yes | yes | yes | yes | yes | +| `bool` | yes | yes | yes | yes | yes | yes | yes | +| `timestamp` | yes | yes | yes | yes | yes | yes | yes | +| array types | yes | yes | yes | no | yes | yes | no | \ No newline at end of file diff --git a/docs/reference/data-sources/snowflake.md b/docs/reference/data-sources/snowflake.md index 82bf5cb4d49..98a56e09f87 100644 --- a/docs/reference/data-sources/snowflake.md +++ b/docs/reference/data-sources/snowflake.md @@ -46,5 +46,5 @@ The full set of configuration options is available [here](https://rtd.feast.dev/ ## Supported Types -Snowflake data sources support all eight primitive types, but currently do not support array types. +Snowflake data sources support all eight primitive types. Array types are also supported but not with type inference. For a comparison against other batch data sources, please see [here](overview.md#functionality-matrix). diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 32cda2d6b65..14752fd8572 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -463,7 +463,9 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: Array(Float32), Array(Bool), ]: - df[feature.name] = [json.loads(x) for x in df[feature.name]] + df[feature.name] = [ + json.loads(x) if x else None for x in df[feature.name] + ] return df diff --git a/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py index afc3ae97aef..ac55f123bbb 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py @@ -1,14 +1,18 @@ import re from unittest.mock import ANY, MagicMock, patch +import pandas as pd import pytest +from pytest_mock import MockFixture +from feast import FeatureView, Field, FileSource from feast.infra.offline_stores.snowflake import ( SnowflakeOfflineStoreConfig, SnowflakeRetrievalJob, ) from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig +from feast.types import Array, String @pytest.fixture(params=["s3", "s3gov"]) @@ -55,3 +59,25 @@ def test_to_remote_storage(retrieval_job): mock_get_file_names_from_copy.assert_called_once_with(ANY, ANY) native_path = mock_get_file_names_from_copy.call_args[0][1] assert re.match("^s3://.*", native_path), "path should be s3://*" + + +def test_snowflake_to_df_internal( + retrieval_job: SnowflakeRetrievalJob, mocker: MockFixture +): + mock_execute = mocker.patch( + "feast.infra.offline_stores.snowflake.execute_snowflake_statement" + ) + mock_execute.return_value.fetch_pandas_all.return_value = pd.DataFrame.from_dict( + {"feature1": ['["1", "2", "3"]', None, "[]"]} # For Valid, Null, and Empty + ) + + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Array(String)), + ], + source=FileSource(path="dummy.path"), # Dummy value + ) + retrieval_job._feature_views = [feature_view] + retrieval_job._to_df_internal()