Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sling] remove deprecations #23461

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,14 @@ Assets (Sling)

.. autoclass:: DagsterSlingTranslator

.. autofunction:: build_sling_asset

Resources (Sling)
=================

.. autoclass:: SlingResource
:members: sync, replicate
:members: replicate

.. autoclass:: SlingConnectionResource

.. autoclass:: dagster_embedded_elt.sling.resources.SlingSourceConnection
.. autoclass:: dagster_embedded_elt.sling.resources.SlingTargetConnection

----

*******************************
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
from dagster_embedded_elt.sling.asset_decorator import sling_assets
from dagster_embedded_elt.sling.asset_defs import build_sling_asset
from dagster_embedded_elt.sling.dagster_sling_translator import DagsterSlingTranslator
from dagster_embedded_elt.sling.resources import (
SlingConnectionResource,
SlingMode,
SlingResource,
SlingSourceConnection,
SlingTargetConnection,
)
from dagster_embedded_elt.sling.resources import SlingConnectionResource, SlingMode, SlingResource
from dagster_embedded_elt.sling.sling_replication import SlingReplicationParam

__all__ = [
"SlingResource",
"SlingMode",
"build_sling_asset",
"SlingSourceConnection",
"SlingTargetConnection",
"sling_assets",
"DagsterSlingTranslator",
"SlingReplicationParam",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,111 +0,0 @@
import re
from typing import Any, Dict, List, Optional, Union

from dagster import (
AssetExecutionContext,
AssetsDefinition,
AssetSpec,
MaterializeResult,
multi_asset,
)
from dagster._annotations import deprecated
from dagster._utils.warnings import deprecation_warning

from dagster_embedded_elt.sling.resources import SlingMode, SlingResource


@deprecated(
breaking_version="0.23.0",
additional_warn_text="Use `@sling_assets` instead.",
)
def build_sling_asset(
asset_spec: AssetSpec,
source_stream: str,
target_object: str,
mode: SlingMode = SlingMode.FULL_REFRESH,
primary_key: Optional[Union[str, List[str]]] = None,
update_key: Optional[str] = None,
source_options: Optional[Dict[str, Any]] = None,
target_options: Optional[Dict[str, Any]] = None,
sling_resource_key: str = "sling",
) -> AssetsDefinition:
"""Asset Factory for using Sling to sync data from a source stream to a target object.

Args:
asset_spec (AssetSpec): The AssetSpec to use to materialize this asset.
source_stream (str): The source stream to sync from. This can be a table, a query, or a path.
target_object (str): The target object to sync to. This can be a table, or a path.
mode (SlingMode, optional): The sync mode to use when syncing. Defaults to SlingMode.FULL_REFRESH.
primary_key (Optional[Union[str, List[str]]], optional): The optional primary key to use when syncing.
update_key (Optional[str], optional): The optional update key to use when syncing.
source_options (Optional[Dict[str, Any]], optional): Any optional Sling source options to use when syncing.
target_options (Optional[Dict[str, Any]], optional): Any optional target options to use when syncing.
sling_resource_key (str, optional): The resource key for the SlingResource. Defaults to "sling".

Examples:
Creating a Sling asset that syncs from a file to a table:

.. code-block:: python

asset_spec = AssetSpec(key=["main", "dest_tbl"])
asset_def = build_sling_asset(
asset_spec=asset_spec,
source_stream="file:///tmp/test.csv",
target_object="main.dest_table",
mode=SlingMode.INCREMENTAL,
primary_key="id"
)

Creating a Sling asset that syncs from a table to a file with a full refresh:

.. code-block:: python

asset_spec = AssetSpec(key="test.csv")
asset_def = build_sling_asset(
asset_spec=asset_spec,
source_stream="main.dest_table",
target_object="file:///tmp/test.csv",
mode=SlingMode.FULL_REFRESH
)


"""
if primary_key is not None and not isinstance(primary_key, list):
primary_key = [primary_key]

@multi_asset(
name=asset_spec.key.to_python_identifier(),
compute_kind="sling",
specs=[asset_spec],
required_resource_keys={sling_resource_key},
)
def sync(context: AssetExecutionContext) -> MaterializeResult:
deprecation_warning(
"build_sling_asset",
breaking_version="0.23.0",
additional_warn_text="Use `@sling_assets` property instead.",
)

sling: SlingResource = getattr(context.resources, sling_resource_key)
last_row_count_observed = None
for stdout_line in sling.sync(
source_stream=source_stream,
target_object=target_object,
mode=mode,
primary_key=primary_key,
update_key=update_key,
source_options=source_options,
target_options=target_options,
):
match = re.search(r"(\d+) rows", stdout_line)
if match:
last_row_count_observed = int(match.group(1))
context.log.info(stdout_line)

return MaterializeResult(
metadata=(
{} if last_row_count_observed is None else {"row_count": last_row_count_observed}
)
)

return sync
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@
PermissiveConfig,
get_dagster_logger,
)
from dagster._annotations import deprecated, public
from dagster._annotations import public
from dagster._utils.env import environ
from dagster._utils.warnings import deprecation_warning
from pydantic import Field

from dagster_embedded_elt.sling.asset_decorator import (
Expand All @@ -39,7 +38,6 @@
logger = get_dagster_logger()

ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
DEPRECATION_WARNING_TEXT = "{name} has been deprecated, use `SlingConnectionResource` for both source and target connections."


@public
Expand All @@ -56,83 +54,6 @@ class SlingMode(str, Enum):
BACKFILL = "backfill"


@deprecated(
breaking_version="0.23.0",
additional_warn_text=DEPRECATION_WARNING_TEXT.format(name="SlingSourceConnection"),
)
class SlingSourceConnection(PermissiveConfig):
"""A Sling Source Connection defines the source connection used by :py:class:`~dagster_elt.sling.SlingResource`.

Examples:
Creating a Sling Source for a file, such as CSV or JSON:

.. code-block:: python

source = SlingSourceConnection(type="file")

Create a Sling Source for a Postgres database, using a connection string:

.. code-block:: python

source = SlingTargetConnection(type="postgres", connection_string=EnvVar("POSTGRES_CONNECTION_STRING"))
source = SlingSourceConnection(type="postgres", connection_string="postgresql://user:password@host:port/schema")

Create a Sling Source for a Postgres database, using keyword arguments, as described here:
https://docs.slingdata.io/connections/database-connections/postgres

.. code-block:: python

source = SlingTargetConnection(type="postgres", host="host", user="hunter42", password=EnvVar("POSTGRES_PASSWORD"))

"""

type: str = Field(description="Type of the source connection. Use 'file' for local storage.")
connection_string: Optional[str] = Field(
description="The connection string for the source database.",
default=None,
)


@deprecated(
breaking_version="0.23.0",
additional_warn_text=DEPRECATION_WARNING_TEXT.format(name="SlingTargetConnection"),
)
class SlingTargetConnection(PermissiveConfig):
"""A Sling Target Connection defines the target connection used by :py:class:`~dagster_elt.sling.SlingResource`.

Examples:
Creating a Sling Target for a file, such as CSV or JSON:

.. code-block:: python

source = SlingTargetConnection(type="file")

Create a Sling Source for a Postgres database, using a connection string:

.. code-block:: python

source = SlingTargetConnection(type="postgres", connection_string="postgresql://user:password@host:port/schema"
source = SlingTargetConnection(type="postgres", connection_string=EnvVar("POSTGRES_CONNECTION_STRING"))

Create a Sling Source for a Postgres database, using keyword arguments, as described here:
https://docs.slingdata.io/connections/database-connections/postgres

.. code-block::python

source = SlingTargetConnection(type="postgres", host="host", user="hunter42", password=EnvVar("POSTGRES_PASSWORD"))


"""

type: str = Field(
description="Type of the destination connection. Use 'file' for local storage."
)
connection_string: Optional[str] = Field(
description="The connection string for the target database.",
default=None,
)


@public
class SlingConnectionResource(PermissiveConfig):
"""A representation of a connection to a database or file to be used by Sling. This resource can be used as a source or a target for a Sling syncs.
Expand Down Expand Up @@ -196,8 +117,6 @@ class SlingResource(ConfigurableResource):

Args:
connections (List[SlingConnectionResource]): A list of connections to use for the replication.
source_connection (Optional[SlingSourceConnection]): Deprecated, use `connections` instead.
target_connection (Optional[SlingTargetConnection]): Deprecated, use `connections` instead.

Examples:
.. code-block:: python
Expand All @@ -224,8 +143,6 @@ class SlingResource(ConfigurableResource):
)
"""

source_connection: Optional[SlingSourceConnection] = None
target_connection: Optional[SlingTargetConnection] = None
connections: List[SlingConnectionResource] = []
_stdout: List[str] = []

Expand Down Expand Up @@ -297,22 +214,8 @@ def _clean_connection_dict(self, d: Dict[str, Any]) -> Dict[str, Any]:
return d

def prepare_environment(self) -> Dict[str, Any]:
sling_source = None
sling_target = None

if self.source_connection:
sling_source = self._clean_connection_dict(dict(self.source_connection))
if self.target_connection:
sling_target = self._clean_connection_dict(dict(self.target_connection))

env = {}

if sling_source:
env["SLING_SOURCE"] = json.dumps(sling_source)

if sling_target:
env["SLING_TARGET"] = json.dumps(sling_target)

for conn in self.connections:
d = self._clean_connection_dict(dict(conn))
env[conn.name] = json.dumps(d)
Expand All @@ -322,22 +225,6 @@ def prepare_environment(self) -> Dict[str, Any]:
@contextlib.contextmanager
def _setup_config(self) -> Generator[None, None, None]:
"""Uses environment variables to set the Sling source and target connections."""
if self.source_connection:
deprecation_warning(
"source_connection",
"0.23",
"source_connection has been deprecated, provide a list of SlingConnectionResource to the `connections` parameter instead.",
stacklevel=4,
)

if self.target_connection:
deprecation_warning(
"target_connection",
"0.23",
"target_connection has been deprecated, provide a list of SlingConnectionResource to the `connections` parameter instead.",
stacklevel=4,
)

prepared_environment = self.prepare_environment()
with environ(prepared_environment):
yield
Expand Down Expand Up @@ -365,63 +252,6 @@ def _exec_sling_cmd(
if proc.returncode != 0:
raise Exception("Sling command failed with error code %s", proc.returncode)

@deprecated(
breaking_version="0.23.0",
additional_warn_text="sync has been deprecated, use `replicate` instead.",
)
def sync(
self,
source_stream: str,
target_object: str,
mode: SlingMode = SlingMode.FULL_REFRESH,
primary_key: Optional[List[str]] = None,
update_key: Optional[str] = None,
source_options: Optional[Dict[str, Any]] = None,
target_options: Optional[Dict[str, Any]] = None,
encoding: str = "utf8",
) -> Generator[str, None, None]:
"""Runs a Sling sync from the given source table to the given destination table. Generates
output lines from the Sling CLI. Deprecated, use `replicate` instead.
"""
if (
self.source_connection
and self.source_connection.type == "file"
and not source_stream.startswith("file://")
):
source_stream = "file://" + source_stream

if (
self.target_connection
and self.target_connection.type == "file"
and not target_object.startswith("file://")
):
target_object = "file://" + target_object

with self._setup_config():
config = {
"mode": mode,
"source": {
"conn": "SLING_SOURCE",
"stream": source_stream,
"primary_key": primary_key,
"update_key": update_key,
"options": source_options,
},
"target": {
"conn": "SLING_TARGET",
"object": target_object,
"options": target_options,
},
}
config["source"] = {k: v for k, v in config["source"].items() if v is not None}
config["target"] = {k: v for k, v in config["target"].items() if v is not None}

sling_cli = sling.Sling(**config)
logger.info("Starting Sling sync with mode: %s", mode)
cmd = sling_cli._prep_cmd() # noqa: SLF001

yield from self._exec_sling_cmd(cmd, encoding=encoding)

def _parse_json_table_output(self, table_output: Dict[str, Any]) -> List[Dict[str, str]]:
column_keys: List[str] = table_output["fields"]
column_values: List[List[str]] = table_output["rows"]
Expand Down
Loading