Skip to content

Commit

Permalink
[components] Stop forcing editable dagster for `dg generate code-loca…
Browse files Browse the repository at this point in the history
…tion` (#26447)

## Summary & Motivation

Removes the forced use of an editable installation for
`dagster-dg`-generated code locations. Allows for dependencies to be
added incrementally.

`dg generate code-location` takes a `--use-editable-dagster` option.
This can either be used as a flag, in which case env var
`DAGSTER_GIT_REPO_DIR` will be read, or takes a value pointing to the
directory containing the dagster repo.

## How I Tested These Changes

Unit tests and ran through demo using published code on this branch.
  • Loading branch information
smackesey authored Dec 13, 2024
1 parent 9eb5c0a commit 4fad913
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 62 deletions.
7 changes: 4 additions & 3 deletions python_modules/automation/automation_tests/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import subprocess
from pathlib import Path

# Some libraries are excluded because they lack a Dagster dependency, which is a prerequisite for
# registering in the DagsterLibraryRegistry.
EXCLUDE_LIBRARIES = ["dagster-dg"]
# Some libraries are excluded because they either:
# - lack a Dagster dependency, which is a prerequisite for registering in the DagsterLibraryRegistry.
# - are temporary or on a separate release schedule from the rest of the libraries.
EXCLUDE_LIBRARIES = ["dagster-components", "dagster-dg"]


def test_all_libraries_register() -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from dagster._core.libraries import DagsterLibraryRegistry

from dagster_components.core.component import (
Component as Component,
ComponentLoadContext as ComponentLoadContext,
Expand All @@ -10,5 +8,3 @@
build_defs_from_toplevel_components_folder as build_defs_from_toplevel_components_folder,
)
from dagster_components.version import __version__ as __version__

DagsterLibraryRegistry.register("dagster-components", __version__)
2 changes: 1 addition & 1 deletion python_modules/libraries/dagster-components/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_version() -> str:
],
packages=find_packages(exclude=["dagster_components_tests*", "examples*"]),
install_requires=[
f"dagster{pin}",
"dagster>=1.9.5",
"tomli",
],
zip_safe=False,
Expand Down
21 changes: 17 additions & 4 deletions python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,19 @@ def generate_deployment_command(path: Path) -> None:

@generate_cli.command(name="code-location")
@click.argument("name", type=str)
@click.option("--use-editable-dagster", is_flag=True, default=False)
def generate_code_location_command(name: str, use_editable_dagster: bool) -> None:
@click.option(
"--use-editable-dagster",
type=str,
flag_value="TRUE",
is_flag=False,
default=None,
help=(
"Install Dagster package dependencies from a local Dagster clone. Accepts a path to local Dagster clone root or"
" may be set as a flag (no value is passed). If set as a flag,"
" the location of the local Dagster clone will be read from the `DAGSTER_GIT_REPO_DIR` environment variable."
),
)
def generate_code_location_command(name: str, use_editable_dagster: Optional[str]) -> None:
"""Generate a Dagster code location file structure and a uv-managed virtual environment scoped
to the code location.
Expand Down Expand Up @@ -78,8 +89,8 @@ def generate_code_location_command(name: str, use_editable_dagster: bool) -> Non
else:
code_location_path = Path.cwd() / name

if use_editable_dagster:
if "DAGSTER_GIT_REPO_DIR" not in os.environ:
if use_editable_dagster == "TRUE":
if not os.environ.get("DAGSTER_GIT_REPO_DIR"):
click.echo(
click.style(
"The `--use-editable-dagster` flag requires the `DAGSTER_GIT_REPO_DIR` environment variable to be set.",
Expand All @@ -88,6 +99,8 @@ def generate_code_location_command(name: str, use_editable_dagster: bool) -> Non
)
sys.exit(1)
editable_dagster_root = os.environ["DAGSTER_GIT_REPO_DIR"]
elif use_editable_dagster: # a string value was passed
editable_dagster_root = use_editable_dagster
else:
editable_dagster_root = None

Expand Down
99 changes: 77 additions & 22 deletions python_modules/libraries/dagster-dg/dagster_dg/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
from dagster_dg.context import CodeLocationDirectoryContext
from dagster_dg.utils import (
camelcase,
discover_git_root,
execute_code_location_command,
generate_subtree,
get_uv_command_env,
pushd,
)

# ########################
# ##### DEPLOYMENT
# ########################


def generate_deployment(path: Path) -> None:
click.echo(f"Creating a Dagster deployment at {path}.")
Expand All @@ -29,39 +32,81 @@ def generate_deployment(path: Path) -> None:
)


def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None:
click.echo(f"Creating a Dagster code location at {path}.")
# ########################
# ##### CODE LOCATION
# ########################

# Despite the fact that editable dependencies are resolved through tool.uv.sources, we need to set
# the dependencies themselves differently depending on whether we are using editable dagster or
# not. This is because `tool.uv.sources` only seems to apply to direct dependencies of the package,
# so any 2+-order Dagster dependency of our package needs to be listed as a direct dependency in the
# editable case.
EDITABLE_DAGSTER_DEPENDENCIES = (
"dagster",
"dagster-pipes",
"dagster-components",
)
EDITABLE_DAGSTER_DEV_DEPENDENCIES = ("dagster-webserver", "dagster-graphql")
PYPI_DAGSTER_DEPENDENCIES = ("dagster-components",)
PYPI_DAGSTER_DEV_DEPENDENCIES = ("dagster-webserver",)


def get_pyproject_toml_dependencies(use_editable_dagster: bool) -> str:
deps = EDITABLE_DAGSTER_DEPENDENCIES if use_editable_dagster else PYPI_DAGSTER_DEPENDENCIES
return "\n".join(
[
"dependencies = [",
*[f' "{dep}",' for dep in deps],
"]",
]
)


# Temporarily we always set an editable dagster root. This is needed while the packages are not
# published.
editable_dagster_root = (
editable_dagster_root
or os.environ.get("DAGSTER_GIT_REPO_DIR")
or str(discover_git_root(Path(__file__)))
def get_pyproject_toml_dev_dependencies(use_editable_dagster: bool) -> str:
deps = (
EDITABLE_DAGSTER_DEV_DEPENDENCIES if use_editable_dagster else PYPI_DAGSTER_DEV_DEPENDENCIES
)
return "\n".join(
[
"dev = [",
*[f' "{dep}",' for dep in deps],
"]",
]
)

editable_dagster_uv_sources = textwrap.dedent(f"""
[tool.uv.sources]
dagster = {{ path = "{editable_dagster_root}/python_modules/dagster", editable = true }}
dagster-graphql = {{ path = "{editable_dagster_root}/python_modules/dagster-graphql", editable = true }}
dagster-pipes = {{ path = "{editable_dagster_root}/python_modules/dagster-pipes", editable = true }}
dagster-webserver = {{ path = "{editable_dagster_root}/python_modules/dagster-webserver", editable = true }}
dagster-components = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-components", editable = true }}
dagster-embedded-elt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-embedded-elt", editable = true }}
dagster-dbt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-dbt", editable = true }}

def get_pyproject_toml_uv_sources(editable_dagster_root: str) -> str:
return textwrap.dedent(f"""
[tool.uv.sources]
dagster = {{ path = "{editable_dagster_root}/python_modules/dagster", editable = true }}
dagster-graphql = {{ path = "{editable_dagster_root}/python_modules/dagster-graphql", editable = true }}
dagster-pipes = {{ path = "{editable_dagster_root}/python_modules/dagster-pipes", editable = true }}
dagster-webserver = {{ path = "{editable_dagster_root}/python_modules/dagster-webserver", editable = true }}
dagster-components = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-components", editable = true }}
dagster-embedded-elt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-embedded-elt", editable = true }}
dagster-dbt = {{ path = "{editable_dagster_root}/python_modules/libraries/dagster-dbt", editable = true }}
""")

if editable_dagster_root:
uv_sources = editable_dagster_uv_sources
else:
uv_sources = editable_dagster_uv_sources

def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None:
click.echo(f"Creating a Dagster code location at {path}.")

dependencies = get_pyproject_toml_dependencies(use_editable_dagster=bool(editable_dagster_root))
dev_dependencies = get_pyproject_toml_dev_dependencies(
use_editable_dagster=bool(editable_dagster_root)
)
uv_sources = (
get_pyproject_toml_uv_sources(editable_dagster_root) if editable_dagster_root else ""
)

generate_subtree(
path=path,
name_placeholder="CODE_LOCATION_NAME_PLACEHOLDER",
templates_path=os.path.join(
os.path.dirname(__file__), "templates", "CODE_LOCATION_NAME_PLACEHOLDER"
),
dependencies=dependencies,
dev_dependencies=dev_dependencies,
uv_sources=uv_sources,
)

Expand All @@ -70,6 +115,11 @@ def generate_code_location(path: Path, editable_dagster_root: Optional[str] = No
subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env())


# ########################
# ##### COMPONENT TYPE
# ########################


def generate_component_type(context: CodeLocationDirectoryContext, name: str) -> None:
root_path = Path(context.local_component_types_root_path)
click.echo(f"Creating a Dagster component type at {root_path}/{name}.py.")
Expand All @@ -89,6 +139,11 @@ def generate_component_type(context: CodeLocationDirectoryContext, name: str) ->
)


# ########################
# ##### COMPONENT INSTANCE
# ########################


def generate_component_instance(
root_path: Path,
name: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,14 @@
name = "{{ project_name }}"
requires-python = ">=3.9,<3.13"
version = "0.1.0"
dependencies = [
"dagster",
"dagster-graphql",
"dagster-pipes",
"dagster-webserver",
"dagster-components[sling,dbt]",
"dagster-embedded-elt",
"dagster-dbt",
"sling-mac-arm64",
]
[project.optional-dependencies]
dev = [
"dagster-webserver",
]
{{ dependencies }}
[project.entry-points]
"dagster.components" = { {{ project_name }} = "{{ project_name }}.lib"}
[dependency-groups]
{{ dev_dependencies }}
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import inspect
import json
import os
import subprocess
import sys
import textwrap
from contextlib import contextmanager
Expand Down Expand Up @@ -72,14 +73,21 @@ def isolated_example_deployment_foo(runner: CliRunner) -> Iterator[None]:
def isolated_example_code_location_bar(
runner: CliRunner, in_deployment: bool = True
) -> Iterator[None]:
dagster_git_repo_dir = str(discover_git_root(Path(__file__)))
if in_deployment:
with isolated_example_deployment_foo(runner), clean_module_cache("bar"):
runner.invoke(generate_code_location_command, ["bar"])
runner.invoke(
generate_code_location_command,
["--use-editable-dagster", dagster_git_repo_dir, "bar"],
)
with pushd("code_locations/bar"):
yield
else:
with runner.isolated_filesystem(), clean_module_cache("bar"):
runner.invoke(generate_code_location_command, ["bar"])
runner.invoke(
generate_code_location_command,
["--use-editable-dagster", dagster_git_repo_dir, "bar"],
)
with pushd("bar"):
yield

Expand Down Expand Up @@ -148,12 +156,11 @@ def test_generate_code_location_inside_deployment_success() -> None:
assert Path("code_locations/bar/.venv").exists()
assert Path("code_locations/bar/uv.lock").exists()

# Commented out because we are always adding sources right now
# with open("code_locations/bar/pyproject.toml") as f:
# toml = tomli.loads(f.read())
#
# # No tool.uv.sources added without --use-editable-dagster
# assert "uv" not in toml["tool"]
with open("code_locations/bar/pyproject.toml") as f:
toml = tomli.loads(f.read())

# No tool.uv.sources added without --use-editable-dagster
assert "uv" not in toml["tool"]


def test_generate_code_location_outside_deployment_success() -> None:
Expand All @@ -173,12 +180,17 @@ def test_generate_code_location_outside_deployment_success() -> None:
assert Path("bar/uv.lock").exists()


def test_generate_code_location_editable_dagster_success(monkeypatch) -> None:
@pytest.mark.parametrize("mode", ["env_var", "arg"])
def test_generate_code_location_editable_dagster_success(mode: str, monkeypatch) -> None:
runner = CliRunner()
dagster_git_repo_dir = discover_git_root(Path(__file__))
monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", str(dagster_git_repo_dir))
if mode == "env_var":
monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", str(dagster_git_repo_dir))
editable_args = ["--use-editable-dagster", "--"]
else:
editable_args = ["--use-editable-dagster", str(dagster_git_repo_dir)]
with isolated_example_deployment_foo(runner):
result = runner.invoke(generate_code_location_command, ["--use-editable-dagster", "bar"])
result = runner.invoke(generate_code_location_command, [*editable_args, "bar"])
assert result.exit_code == 0
assert Path("code_locations/bar").exists()
assert Path("code_locations/bar/pyproject.toml").exists()
Expand All @@ -202,6 +214,17 @@ def test_generate_code_location_editable_dagster_success(monkeypatch) -> None:
}


def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monkeypatch) -> None:
runner = CliRunner()
monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", "")
with isolated_example_deployment_foo(runner):
result = runner.invoke(
generate_code_location_command, ["--use-editable-dagster", "--", "bar"]
)
assert result.exit_code != 0
assert "requires the `DAGSTER_GIT_REPO_DIR`" in result.output


def test_generate_code_location_already_exists_fails() -> None:
runner = CliRunner()
with isolated_example_deployment_foo(runner):
Expand Down Expand Up @@ -326,6 +349,11 @@ def test_generate_component_already_exists_fails(in_deployment: bool) -> None:
def test_generate_sling_replication_instance() -> None:
runner = CliRunner()
with isolated_example_code_location_bar(runner):
# We need to add dagster-embedded-elt also because we are using editable installs. Only
# direct dependencies will be resolved by uv.tool.sources.
subprocess.run(
["uv", "add", "dagster-components[sling]", "dagster-embedded-elt"], check=True
)
result = runner.invoke(
generate_component_command, ["dagster_components.sling_replication", "file_ingest"]
)
Expand Down Expand Up @@ -354,6 +382,9 @@ def test_generate_sling_replication_instance() -> None:
def test_generate_dbt_project_instance(params) -> None:
runner = CliRunner()
with isolated_example_code_location_bar(runner):
# We need to add dagster-dbt also because we are using editable installs. Only
# direct dependencies will be resolved by uv.tool.sources.
subprocess.run(["uv", "add", "dagster-components[dbt]", "dagster-dbt"], check=True)
result = runner.invoke(
generate_component_command, ["dagster_components.dbt_project", "my_project", *params]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def test_list_component_types_success():
result.output
== "\n".join(
[
"dagster_components.dbt_project",
"dagster_components.pipes_subprocess_script_collection",
"dagster_components.sling_replication",
]
)
+ "\n"
Expand Down

0 comments on commit 4fad913

Please sign in to comment.