From 137ed676e70309205f8709c1cffea6196a2c8df9 Mon Sep 17 00:00:00 2001 From: Erin Cochran Date: Tue, 20 Aug 2024 14:55:58 -0400 Subject: [PATCH] [docs] [revamp] - Vale updates (#23758) ## Summary & Motivation This PR addresses comments in #23754 ## How I Tested These Changes --- docs/docs-beta/docs/concepts/assets.md | 2 +- .../concepts/assets/thinking-in-assets.md | 4 +- docs/docs-beta/docs/concepts/io-managers.md | 8 +-- .../docs/concepts/understanding-assets.md | 2 +- .../docs/dagster-plus/access/rbac/teams.md | 4 +- .../docs/dagster-plus/access/rbac/users.md | 4 +- .../docs/dagster-plus/deployment/alerts.md | 2 +- .../branch-deployments/change-tracking.md | 4 +- .../branch-deployments/dagster-cloud-cli.md | 2 +- .../deployment/branch-deployments/github.md | 2 +- .../deployment/branch-deployments/gitlab.md | 2 +- .../environment-variables/agent-config.md | 4 +- .../environment-variables/dagster-ui.md | 4 +- .../deployment/hybrid/agents/multiple.md | 2 +- .../serverless/transition-hybrid.md | 2 +- .../docs/dagster-plus/getting-started.md | 2 +- .../dagster-plus/insights/asset-metadata.md | 4 +- .../dagster-plus/insights/export-metrics.md | 4 +- docs/docs-beta/docs/guides/automation.md | 3 +- .../docs/guides/automation/asset-sensors.md | 6 +- .../docs/guides/automation/schedules.md | 6 +- .../docs/guides/automation/sensors.md | 8 +-- docs/docs-beta/docs/guides/data-modeling.md | 4 +- .../adding-metadata-to-assets.md | 4 +- .../data-modeling/creating-asset-factories.md | 3 +- .../data-modeling/creating-data-assets.md | 3 +- .../creating-dependencies-between-assets.md | 4 +- .../passing-data-between-assets.md | 55 ++++++++----------- .../selecting-subsets-of-assets.md | 4 +- docs/docs-beta/docs/guides/deployment/aws.md | 8 +-- .../docs-beta/docs/guides/deployment/azure.md | 8 +-- .../guides/deployment/building-a-data-mesh.md | 6 +- .../docs/guides/deployment/dagster-plus.md | 6 +- docs/docs-beta/docs/guides/deployment/gcp.md | 6 +- .../deployment/managing-code-locations.md | 6 +- .../deployment/self-hosted-to-dagster-plus.md | 6 +- .../adding-python-libraries.md | 4 +- .../external-systems/connecting-databases.md | 4 +- .../external-systems/using-api-connections.md | 8 +-- .../docs/guides/ingestion/ingesting-data.md | 2 +- .../docs/guides/monitoring/custom-logging.md | 8 +-- .../docs/guides/monitoring/custom-metrics.md | 8 +-- .../guides/monitoring/failed-run-alerts.md | 8 +-- .../testing/detecting-schema-changes.md | 4 +- .../docs/guides/testing/integration-tests.md | 6 +- .../docs/guides/testing/stopping-runs.md | 8 +-- .../testing-assets-with-asset-checks.md | 8 +-- .../testing/testing-for-data-freshness.md | 8 +-- .../testing/unit-tests-for-assets-and-ops.md | 6 +- .../pushing-operations-to-data-warehouses.md | 4 +- .../working-with-different-data-formats.md | 4 +- .../working-with-large-datasets.md | 4 +- .../docs/partials/_InspirationList.md | 2 +- docs/docs-beta/docs/tutorial/installation.md | 16 +++--- docs/docs-beta/docs/tutorial/quick-start.md | 22 ++++---- docs/docs-beta/docs/tutorial/tutorial-etl.md | 4 +- docs/vale/styles/Dagster/acronyms.yml | 1 + docs/vale/styles/Dagster/colons.yml | 10 ---- docs/vale/styles/Dagster/headings-casing.yml | 40 ++++++++++++++ docs/vale/styles/Dagster/parentheses.yml | 7 --- docs/vale/styles/Dagster/readability.yml | 8 --- docs/vale/styles/Terms/engineering.yml | 1 - docs/vale/styles/config/ignore/ignore.txt | 3 + .../config/vocabularies/Dagster/accept.txt | 8 ++- 64 files changed, 203 insertions(+), 217 deletions(-) delete mode 100644 docs/vale/styles/Dagster/colons.yml delete mode 100644 docs/vale/styles/Dagster/parentheses.yml delete mode 100644 docs/vale/styles/Dagster/readability.yml diff --git a/docs/docs-beta/docs/concepts/assets.md b/docs/docs-beta/docs/concepts/assets.md index 83ec9cc831153..0d0a5506fe39f 100644 --- a/docs/docs-beta/docs/concepts/assets.md +++ b/docs/docs-beta/docs/concepts/assets.md @@ -4,6 +4,6 @@ title: Assets # Assets -## Assets and Ops +## Assets and ops Assets and ops are two different concepts in Dagster. diff --git a/docs/docs-beta/docs/concepts/assets/thinking-in-assets.md b/docs/docs-beta/docs/concepts/assets/thinking-in-assets.md index 0079ea32a280b..b5709428f0033 100644 --- a/docs/docs-beta/docs/concepts/assets/thinking-in-assets.md +++ b/docs/docs-beta/docs/concepts/assets/thinking-in-assets.md @@ -1,6 +1,6 @@ --- -title: "Thinking in assets" +title: "Think in assets" sidebar_position: 10 --- -# Thinking in assets +# Think in assets diff --git a/docs/docs-beta/docs/concepts/io-managers.md b/docs/docs-beta/docs/concepts/io-managers.md index 3ba9237f7150a..ecf46e0b776c2 100644 --- a/docs/docs-beta/docs/concepts/io-managers.md +++ b/docs/docs-beta/docs/concepts/io-managers.md @@ -1,5 +1,3 @@ -## ======= - -## title: "I/O managers" - -# I/O managers +--- +title: "I/O managers" +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/concepts/understanding-assets.md b/docs/docs-beta/docs/concepts/understanding-assets.md index adafa34768ace..8841ae839d458 100644 --- a/docs/docs-beta/docs/concepts/understanding-assets.md +++ b/docs/docs-beta/docs/concepts/understanding-assets.md @@ -1,5 +1,5 @@ --- -title: Understanding Assets +title: About assets description: Understanding the concept of assets in Dagster last_update: date: 2024-08-11 diff --git a/docs/docs-beta/docs/dagster-plus/access/rbac/teams.md b/docs/docs-beta/docs/dagster-plus/access/rbac/teams.md index f7c2f71fa2736..f23fb6c761253 100644 --- a/docs/docs-beta/docs/dagster-plus/access/rbac/teams.md +++ b/docs/docs-beta/docs/dagster-plus/access/rbac/teams.md @@ -1,7 +1,7 @@ --- -title: "Managing teams" +title: "Team management" displayed_sidebar: "dagsterPlus" sidebar_position: 2 --- -# Managing teams \ No newline at end of file +# Team management in Dagster+ \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/access/rbac/users.md b/docs/docs-beta/docs/dagster-plus/access/rbac/users.md index c69d0f66373d6..eab9fd6a95c27 100644 --- a/docs/docs-beta/docs/dagster-plus/access/rbac/users.md +++ b/docs/docs-beta/docs/dagster-plus/access/rbac/users.md @@ -1,7 +1,7 @@ --- -title: "Managing users" +title: "User management" displayed_sidebar: "dagsterPlus" sidebar_position: 1 --- -# Managing users \ No newline at end of file +# User management in Dagster+ \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/alerts.md b/docs/docs-beta/docs/dagster-plus/deployment/alerts.md index ebe1f36235ad7..631abf71dc508 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/alerts.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/alerts.md @@ -2,4 +2,4 @@ title: "Dagster+ alerts" --- -# DAgster+ alerts \ No newline at end of file +# Dagster+ alerts \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/change-tracking.md b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/change-tracking.md index 0472fa9e30b0c..c568f9a9fef47 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/change-tracking.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/change-tracking.md @@ -1,8 +1,8 @@ --- -title: "Branch Deployment Change Tracking" +title: "Change Tracking in Branch Deployments" displayed_sidebar: "dagsterPlus" sidebar_position: 4 sidebar_label: "Change Tracking" --- -# Using Change Tracking \ No newline at end of file +# Change Tracking in Branch Deployments \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/dagster-cloud-cli.md b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/dagster-cloud-cli.md index d14796d769c8a..9e0e058e8a2d0 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/dagster-cloud-cli.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/dagster-cloud-cli.md @@ -5,4 +5,4 @@ sidebar_position: 3 sidebar_label: "dagster-cloud CLI" --- -# Using Branch Deployments with the dagster-cloud CLI \ No newline at end of file +# Use Branch Deployments with the dagster-cloud CLI \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/github.md b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/github.md index fbada01974b3e..830fb832fffff 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/github.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/github.md @@ -5,4 +5,4 @@ sidebar_position: 1 sidebar_label: "GitHub" --- -# Using Branch Deployments with GitHub \ No newline at end of file +# Use Branch Deployments with GitHub \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/gitlab.md b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/gitlab.md index ced65eba94d24..109f8c55f5f36 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/gitlab.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/branch-deployments/gitlab.md @@ -5,4 +5,4 @@ sidebar_position: 2 sidebar_label: "Gitlab" --- -# Using Branch Deployments with Gitlab \ No newline at end of file +# Use Branch Deployments with Gitlab \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/agent-config.md b/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/agent-config.md index 3ba090eb148db..14b7ba9274507 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/agent-config.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/agent-config.md @@ -1,8 +1,8 @@ --- -title: "Setting environment variables using agent config" +title: "Set environment variables using agent config" displayed_sidebar: "dagsterPlus" sidebar_position: 2 sidebar_label: "Set with agent config" --- -# Setting environment variables using agent config \ No newline at end of file +# Set environment variables using agent config \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/dagster-ui.md b/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/dagster-ui.md index 06bc772bd3795..26aaa110b7295 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/dagster-ui.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/environment-variables/dagster-ui.md @@ -1,8 +1,8 @@ --- -title: "Setting environment variables with the Dagster+ UI" +title: "Set environment variables with the Dagster+ UI" displayed_sidebar: "dagsterPlus" sidebar_position: 1 sidebar_label: "Set with Dagster+ UI" --- -# Setting environment variables with the Dagster+ UI \ No newline at end of file +# Set environment variables with the Dagster+ UI \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/hybrid/agents/multiple.md b/docs/docs-beta/docs/dagster-plus/deployment/hybrid/agents/multiple.md index cab81855748b2..c00dba0637864 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/hybrid/agents/multiple.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/hybrid/agents/multiple.md @@ -4,4 +4,4 @@ displayed_sidebar: "dagsterPlus" sidebar_position: 50 --- -# Using multiple agents \ No newline at end of file +# Use multiple agents with Dagster+ \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/deployment/serverless/transition-hybrid.md b/docs/docs-beta/docs/dagster-plus/deployment/serverless/transition-hybrid.md index 00d7e6f88ced4..22ffd567a7aee 100644 --- a/docs/docs-beta/docs/dagster-plus/deployment/serverless/transition-hybrid.md +++ b/docs/docs-beta/docs/dagster-plus/deployment/serverless/transition-hybrid.md @@ -4,4 +4,4 @@ displayed_sidebar: "dagsterPlus" sidebar_position: 50 --- -# Transitioning from Serverless to Hybrid \ No newline at end of file +# Transition from Serverless to Hybrid \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/getting-started.md b/docs/docs-beta/docs/dagster-plus/getting-started.md index 19d122fbab7e6..0995223f87044 100644 --- a/docs/docs-beta/docs/dagster-plus/getting-started.md +++ b/docs/docs-beta/docs/dagster-plus/getting-started.md @@ -3,4 +3,4 @@ title: "Getting started with Dagster+" displayed_sidebar: "dagsterPlus" --- -# Getting started with Dagster+ \ No newline at end of file +# Get started with Dagster+ \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/insights/asset-metadata.md b/docs/docs-beta/docs/dagster-plus/insights/asset-metadata.md index 8a9095a38d6a7..9feee284eeccb 100644 --- a/docs/docs-beta/docs/dagster-plus/insights/asset-metadata.md +++ b/docs/docs-beta/docs/dagster-plus/insights/asset-metadata.md @@ -1,7 +1,7 @@ --- -title: "Integrating asset metadata into Dagster+ Insights" +title: "Integrate asset metadata into Dagster+ Insights" sidebar_label: "Integrate asset metadata" sidebar_position: 1 --- -# Integrating asset metadata into Dagster+ Insights \ No newline at end of file +# Integrate asset metadata into Dagster+ Insights \ No newline at end of file diff --git a/docs/docs-beta/docs/dagster-plus/insights/export-metrics.md b/docs/docs-beta/docs/dagster-plus/insights/export-metrics.md index f3a435f2e63c1..119130f21c4ab 100644 --- a/docs/docs-beta/docs/dagster-plus/insights/export-metrics.md +++ b/docs/docs-beta/docs/dagster-plus/insights/export-metrics.md @@ -1,7 +1,7 @@ --- -title: "Exporting metrics from Dagster+ Insights" +title: "Export metrics from Dagster+ Insights" sidebar_label: "Export metrics" sidebar_position: 6 --- -# xporting metrics from Dagster+ Insights \ No newline at end of file +# Export metrics from Dagster+ Insights \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/automation.md b/docs/docs-beta/docs/guides/automation.md index cfa58783c771f..33be9a06c6734 100644 --- a/docs/docs-beta/docs/guides/automation.md +++ b/docs/docs-beta/docs/guides/automation.md @@ -57,7 +57,6 @@ You must provide a function that the sensor will use to determine if it should t Like schedules, sensors operate on a selection of assets, known as [Jobs](/concepts/ops-jobs) and can either start a pipeline through a Run or log a reason for not starting a pipeline using a SkipReason. - ### When to use sensors - You need event-driven automation @@ -78,7 +77,7 @@ Asset Sensors trigger jobs when specified assets are materialized, allowing you For more examples of how to create asset sensors, see the [How-To Use Asset Sensors](/guides/automation/asset-sensors) guide. -## Declarative automation +## Declarative Automation TODO: add content diff --git a/docs/docs-beta/docs/guides/automation/asset-sensors.md b/docs/docs-beta/docs/guides/automation/asset-sensors.md index 21d29857980d2..87d867994da99 100644 --- a/docs/docs-beta/docs/guides/automation/asset-sensors.md +++ b/docs/docs-beta/docs/guides/automation/asset-sensors.md @@ -1,10 +1,8 @@ --- -title: Triggering jobs with Asset Sensors -sidebar_label: Triggering jobs with Asset Sensors +title: Trigger jobs with Asset Sensors sidebar_position: 30 --- - Asset sensors in Dagster provide a powerful mechanism for monitoring asset materializations and triggering downstream computations or notifications based on those events. This guide covers the most common use cases for asset sensors such as defining cross-job and cross-code location dependencies. @@ -74,7 +72,7 @@ otherwise it skips the run. ## Trigger a job with configuration -By providing a configuration to the `RunRequest` object, you can trigger a job with a specific configuration. This is useful when you want to trigger a job with custom parameters based on custom logic you define. For example, you might use a sensor to trigger a job when an asset is materialized, but also pass metadata about that materialization to the job. +By providing a configuration to the `RunRequest` object, you can trigger a job with a specific configuration. This is useful when you want to trigger a job with custom parameters based on custom logic you define. For example, you might use a sensor to trigger a job when an asset is materialized, but also pass metadata about that materialization to the job. diff --git a/docs/docs-beta/docs/guides/automation/schedules.md b/docs/docs-beta/docs/guides/automation/schedules.md index 5cf68d428b14a..c972ac9933b28 100644 --- a/docs/docs-beta/docs/guides/automation/schedules.md +++ b/docs/docs-beta/docs/guides/automation/schedules.md @@ -1,6 +1,6 @@ --- -title: "Scheduling cron-based pipelines" -sidebar_label: "Running pipelines on a schedule" +title: "Schedule cron-based pipelines" +sidebar_label: "Run pipelines on a schedule" sidebar_position: 10 --- @@ -26,7 +26,7 @@ By default, schedules without a timezone will run in Coordinated Universal Time ```python daily_schedule = ScheduleDefinition( job=daily_refresh_job, - cron_schedule="0 0 * * *", + cron_schedule="0 0 * * *", timezone="America/Los_Angeles", ) ``` diff --git a/docs/docs-beta/docs/guides/automation/sensors.md b/docs/docs-beta/docs/guides/automation/sensors.md index f30e16b94379e..2ea73a1f681d0 100644 --- a/docs/docs-beta/docs/guides/automation/sensors.md +++ b/docs/docs-beta/docs/guides/automation/sensors.md @@ -1,13 +1,13 @@ --- -title: Creating event-based pipelines with sensors -sidebar_label: Creating event-based pipelines +title: Create event-based pipelines with sensors +sidebar_label: Create event-based pipelines sidebar_position: 20 - --- + Sensors are a way to trigger runs in response to events in Dagster. Sensors run on a regular interval and can either trigger a run, or provide a reason why a run was skipped. -Sensors allow you to react events in external systems. For example, you can trigger a run when a new file arrives in an S3 bucket, or when a row is updated in a database. +Sensors allow you to respond to events in external systems. For example, you can trigger a run when a new file arrives in an S3 bucket, or when a row is updated in a database.
Prerequisites diff --git a/docs/docs-beta/docs/guides/data-modeling.md b/docs/docs-beta/docs/guides/data-modeling.md index 49b82f22bab37..06853b2acc36b 100644 --- a/docs/docs-beta/docs/guides/data-modeling.md +++ b/docs/docs-beta/docs/guides/data-modeling.md @@ -1,8 +1,8 @@ --- -title: "Modeling your data" +title: "Model your data" --- -# Modeling your data +# Model your data - Data Assets - Partitions diff --git a/docs/docs-beta/docs/guides/data-modeling/adding-metadata-to-assets.md b/docs/docs-beta/docs/guides/data-modeling/adding-metadata-to-assets.md index 5c288165c7970..a8a913d3f4e7e 100644 --- a/docs/docs-beta/docs/guides/data-modeling/adding-metadata-to-assets.md +++ b/docs/docs-beta/docs/guides/data-modeling/adding-metadata-to-assets.md @@ -1,5 +1,5 @@ --- -title: "Adding metadata to assets" +title: "Add metadata to assets" sidebar_position: 40 -sidebar_label: "Adding metadata" +sidebar_label: "Add metadata" --- diff --git a/docs/docs-beta/docs/guides/data-modeling/creating-asset-factories.md b/docs/docs-beta/docs/guides/data-modeling/creating-asset-factories.md index 07e558b2674dc..60a7b33c54ef4 100644 --- a/docs/docs-beta/docs/guides/data-modeling/creating-asset-factories.md +++ b/docs/docs-beta/docs/guides/data-modeling/creating-asset-factories.md @@ -1,5 +1,4 @@ --- -title: "Creating asset factories" +title: "Create asset factories" sidebar_position: 50 -sidebar_label: "Creating asset factories" --- diff --git a/docs/docs-beta/docs/guides/data-modeling/creating-data-assets.md b/docs/docs-beta/docs/guides/data-modeling/creating-data-assets.md index 12ede891cdcc4..5ffee3afe7dcd 100644 --- a/docs/docs-beta/docs/guides/data-modeling/creating-data-assets.md +++ b/docs/docs-beta/docs/guides/data-modeling/creating-data-assets.md @@ -1,5 +1,4 @@ --- -title: "Creating data assets" +title: "Create data assets" sidebar_position: 10 -sidebar_label: "Creating data assets" --- diff --git a/docs/docs-beta/docs/guides/data-modeling/creating-dependencies-between-assets.md b/docs/docs-beta/docs/guides/data-modeling/creating-dependencies-between-assets.md index ae22e4531cc0c..4f3505f9e0341 100644 --- a/docs/docs-beta/docs/guides/data-modeling/creating-dependencies-between-assets.md +++ b/docs/docs-beta/docs/guides/data-modeling/creating-dependencies-between-assets.md @@ -1,5 +1,5 @@ --- -title: "Creating dependencies between assets" +title: "Create dependencies between assets" sidebar_position: 20 -sidebar_label: "Creating asset dependencies" +sidebar_label: "Create asset dependencies" --- diff --git a/docs/docs-beta/docs/guides/data-modeling/passing-data-between-assets.md b/docs/docs-beta/docs/guides/data-modeling/passing-data-between-assets.md index 7a5dffeca8875..eb00da86270b3 100644 --- a/docs/docs-beta/docs/guides/data-modeling/passing-data-between-assets.md +++ b/docs/docs-beta/docs/guides/data-modeling/passing-data-between-assets.md @@ -1,14 +1,21 @@ --- -title: How to Pass Data Between Assets +title: Pass data between assets description: Learn how to pass data between assets in Dagster sidebar_position: 30 -sidebar_label: "Passing data between assets" last_update: date: 2024-08-11 author: Pedram Navid --- -As you develop your data pipeline, you'll likely need to pass data between assets. By the end of this guide, you'll have a solid understanding of the different approaches to passing data between assets and when to use each one. +In Dagster, assets are the building blocks of your data pipeline and it's common to want to pass data between them. This guide will help you understand how to pass data between assets. + +There are three ways of passing data between assets: + +- Explicitly managing data, by using external storage +- Implicitly managing data, using I/O managers +- Avoiding passing data between assets altogether by combining several tasks into a single asset + +This guide walks through all three methods. --- @@ -18,26 +25,12 @@ As you develop your data pipeline, you'll likely need to pass data between asset To follow the steps in this guide, you'll need: - A basic understanding of Dagster concepts such as assets and resources -- Dagster installed, as well as the `dagster-duckdb-pandas` package +- Dagster and the `dagster-duckdb-pandas` package installed
--- -## Overview - -In Dagster, assets are the building blocks of your data pipeline and it's common to want to pass data between them. This guide will help you understand how to pass data between assets. - -There are three ways of passing data between assets: - -- Explicitly managing data, by using external storage -- Implicitly managing data, using IO Managers -- Avoiding passing data between assets altogether by combining several tasks into a single asset - -This guide walks through all three methods. - ---- - -## Move Data Between Assets Explicitly Using External Storage +## Move data assets explicitly using external storage A common and recommended approach to passing data between assets is explicitly managing data using external storage. This example pipeline uses a SQLite database as external storage: @@ -55,20 +48,20 @@ The downsides of this approach are: - You need to manage connections and transactions manually - You need to handle errors and edge cases, for example, if the database is down or if a connection is closed -## Move Data Between Assets Implicitly Using IO Managers +## Move data between assets implicitly using I/O managers -Dagster's IO Managers are a powerful feature that manages data between assets by defining how data is read from and written to external storage. They help separate business logic from I/O operations, reducing boilerplate code and making it easier to change where data is stored. +Dagster's I/O managers are a powerful feature that manages data between assets by defining how data is read from and written to external storage. They help separate business logic from I/O operations, reducing boilerplate code and making it easier to change where data is stored. I/O managers handle: 1. **Input**: Reading data from storage and loading it into memory for use by dependent assets. 2. **Output**: Writing data to the configured storage location. -For a deeper understanding of IO Managers, check out the [Understanding IO Managers](/concepts/io-managers) guide. +For a deeper understanding of I/O managers, check out the [Understanding I/O managers](/concepts/io-managers) guide. - + -In this example, a `DuckDBPandasIOManager` is instantiated to run using a local file. The IO manager handles both reading and writing to the database. +In this example, a `DuckDBPandasIOManager` is instantiated to run using a local file. The I/O manager handles both reading and writing to the database. :::warning @@ -78,19 +71,19 @@ each step would execute in a separate environment and would not have access to t ::: The `people()` and `birds()` assets both write their dataframes to DuckDB -for persistent storage. The `combined_data()` asset requests data from both assets by adding them as parameters to the function, and the IO Manager handles the reading them from DuckDB and making them available to the `combined_data` function as dataframes. Note that when you use IO Managers you do not need to manually add the asset's dependencies through the `deps` argument. +for persistent storage. The `combined_data()` asset requests data from both assets by adding them as parameters to the function, and the I/O manager handles the reading them from DuckDB and making them available to the `combined_data` function as dataframes. **Note**: When you use I/O managers you don't need to manually add the asset's dependencies through the `deps` argument. The benefits of this approach are: -- The reading and writing of data is handled by the IO Manager, reducing boilerplate code -- It's easy to swap out different IO Managers based on environments without changing the underlying asset computation +- The reading and writing of data is handled by the I/O manager, reducing boilerplate code +- It's easy to swap out different I/O managers based on environments without changing the underlying asset computation The downsides of this approach are: -- The IO Manager approach is less flexible should you need to customize how data is read or written to storage -- Some decisions may be made by the IO Manager for you, such as naming conventions that can be hard to override. +- The I/O manager approach is less flexible should you need to customize how data is read or written to storage +- Some decisions may be made by the I/O manager for you, such as naming conventions that can be hard to override. -## Avoid Passing Data Between Assets by Combining Assets +## Avoid passing data between assets by combining assets In some cases, you may find that you can avoid passing data between assets by carefully considering how you have modeled your pipeline: @@ -124,6 +117,6 @@ The downsides of this approach are: --- -## Related Resources +## Related resources TODO: add links to relevant API documentation here. diff --git a/docs/docs-beta/docs/guides/data-modeling/selecting-subsets-of-assets.md b/docs/docs-beta/docs/guides/data-modeling/selecting-subsets-of-assets.md index 0dee09365534c..d9f156b7ff4e0 100644 --- a/docs/docs-beta/docs/guides/data-modeling/selecting-subsets-of-assets.md +++ b/docs/docs-beta/docs/guides/data-modeling/selecting-subsets-of-assets.md @@ -1,5 +1,5 @@ --- -title: "Selecting subsets of assets" +title: "Select subsets of assets" sidebar_position: 60 -sidebar_label: "Selecting assets" +sidebar_label: "Select assets" --- diff --git a/docs/docs-beta/docs/guides/deployment/aws.md b/docs/docs-beta/docs/guides/deployment/aws.md index 68645a9d51d5f..2eb5bf5c3b9b1 100644 --- a/docs/docs-beta/docs/guides/deployment/aws.md +++ b/docs/docs-beta/docs/guides/deployment/aws.md @@ -1,6 +1,4 @@ --- -title: "Deploying to Amazon Web Services" -sidebar_position: 1 ---- - -# Deploying to Amazon Web Services +title: "Deploy to Amazon Web Services" +sidebar_position: 10 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/deployment/azure.md b/docs/docs-beta/docs/guides/deployment/azure.md index 8cfa39b06904c..83f6ba1ec490c 100644 --- a/docs/docs-beta/docs/guides/deployment/azure.md +++ b/docs/docs-beta/docs/guides/deployment/azure.md @@ -1,6 +1,4 @@ --- -title: "Deploying to Microsoft Azure" -sidebar_position: 3 ---- - -# Deploying to Microsoft Azure +title: "Deploy to Microsoft Azure" +sidebar_position: 30 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/deployment/building-a-data-mesh.md b/docs/docs-beta/docs/guides/deployment/building-a-data-mesh.md index a4104a5d32b5c..11f48ca585859 100644 --- a/docs/docs-beta/docs/guides/deployment/building-a-data-mesh.md +++ b/docs/docs-beta/docs/guides/deployment/building-a-data-mesh.md @@ -1,6 +1,6 @@ --- -title: "Building a data mesh" -sidebar_position: 6 +title: "Build a data mesh" +sidebar_position: 60 --- -# Building a data mesh +# Build a data mesh diff --git a/docs/docs-beta/docs/guides/deployment/dagster-plus.md b/docs/docs-beta/docs/guides/deployment/dagster-plus.md index 3f6ba6a78e8a5..2144f757442b2 100644 --- a/docs/docs-beta/docs/guides/deployment/dagster-plus.md +++ b/docs/docs-beta/docs/guides/deployment/dagster-plus.md @@ -1,6 +1,6 @@ --- -title: "Deploying to Dagster+" -sidebar_position: 4 +title: "Deploy to Dagster+" +sidebar_position: 40 --- -# Deploying to Dagster+ +# Deploy to Dagster+ diff --git a/docs/docs-beta/docs/guides/deployment/gcp.md b/docs/docs-beta/docs/guides/deployment/gcp.md index 90d787f06fe1c..fc94fc464fa96 100644 --- a/docs/docs-beta/docs/guides/deployment/gcp.md +++ b/docs/docs-beta/docs/guides/deployment/gcp.md @@ -1,6 +1,6 @@ --- -title: "Deploying to Google Cloud Platform" -sidebar_position: 2 +title: "Deploy to Google Cloud Platform" +sidebar_position: 20 --- -# Deploying to Google Cloud Platform +# Deploy to Google Cloud Platform diff --git a/docs/docs-beta/docs/guides/deployment/managing-code-locations.md b/docs/docs-beta/docs/guides/deployment/managing-code-locations.md index 0d4ce12fba399..33f229f40dea9 100644 --- a/docs/docs-beta/docs/guides/deployment/managing-code-locations.md +++ b/docs/docs-beta/docs/guides/deployment/managing-code-locations.md @@ -1,6 +1,6 @@ --- -title: "Managing code locations" -sidebar_position: 5 +title: "Manage code locations" +sidebar_position: 50 --- -# Managing code locations +# Manage code locations diff --git a/docs/docs-beta/docs/guides/deployment/self-hosted-to-dagster-plus.md b/docs/docs-beta/docs/guides/deployment/self-hosted-to-dagster-plus.md index f72d198bd7ca2..758be4d741f58 100644 --- a/docs/docs-beta/docs/guides/deployment/self-hosted-to-dagster-plus.md +++ b/docs/docs-beta/docs/guides/deployment/self-hosted-to-dagster-plus.md @@ -1,6 +1,4 @@ --- -title: "Migrating from self-hosted to Dagster+" -sidebar_position: 7 +title: "Migrate from self-hosted to Dagster+" +sidebar_position: 70 --- - -# Migrating from self-hosted to Dagster+ diff --git a/docs/docs-beta/docs/guides/external-systems/adding-python-libraries.md b/docs/docs-beta/docs/guides/external-systems/adding-python-libraries.md index 0b6055226ae0d..4600e8ac4d345 100644 --- a/docs/docs-beta/docs/guides/external-systems/adding-python-libraries.md +++ b/docs/docs-beta/docs/guides/external-systems/adding-python-libraries.md @@ -1,4 +1,4 @@ --- -title: "Adding Python libraries" -sidebar_position: 3 +title: "Add Python libraries" +sidebar_position: 30 --- diff --git a/docs/docs-beta/docs/guides/external-systems/connecting-databases.md b/docs/docs-beta/docs/guides/external-systems/connecting-databases.md index ef99d64489127..9f506223bc817 100644 --- a/docs/docs-beta/docs/guides/external-systems/connecting-databases.md +++ b/docs/docs-beta/docs/guides/external-systems/connecting-databases.md @@ -1,4 +1,4 @@ --- -title: "Connecting databases" -sidebar_position: 1 +title: "Connect databases" +sidebar_position: 10 --- diff --git a/docs/docs-beta/docs/guides/external-systems/using-api-connections.md b/docs/docs-beta/docs/guides/external-systems/using-api-connections.md index 3bde43668236f..ac2698c009189 100644 --- a/docs/docs-beta/docs/guides/external-systems/using-api-connections.md +++ b/docs/docs-beta/docs/guides/external-systems/using-api-connections.md @@ -1,6 +1,4 @@ --- -title: "Using API connections" -sidebar_position: 2 ---- - -# Using API connections +title: "Use API connections" +sidebar_position: 20 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/ingestion/ingesting-data.md b/docs/docs-beta/docs/guides/ingestion/ingesting-data.md index a80c5e64624b1..3497ba53854fc 100644 --- a/docs/docs-beta/docs/guides/ingestion/ingesting-data.md +++ b/docs/docs-beta/docs/guides/ingestion/ingesting-data.md @@ -1,3 +1,3 @@ --- -title: Ingesting Data +title: Ingest Data --- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/monitoring/custom-logging.md b/docs/docs-beta/docs/guides/monitoring/custom-logging.md index a258048687d6d..0947628d5cedf 100644 --- a/docs/docs-beta/docs/guides/monitoring/custom-logging.md +++ b/docs/docs-beta/docs/guides/monitoring/custom-logging.md @@ -1,6 +1,4 @@ --- -title: "Setting up custom logging" -sidebar_position: 1 ---- - -# Setting up custom logging +title: "Set up custom logging" +sidebar_position: 10 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/monitoring/custom-metrics.md b/docs/docs-beta/docs/guides/monitoring/custom-metrics.md index ce124410b2bab..5dab33966079b 100644 --- a/docs/docs-beta/docs/guides/monitoring/custom-metrics.md +++ b/docs/docs-beta/docs/guides/monitoring/custom-metrics.md @@ -1,6 +1,4 @@ --- -title: "Using custom metrics in logs" -sidebar_position: 3 ---- - -# Using custom metrics in logs +title: "Use custom metrics in logs" +sidebar_position: 30 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/monitoring/failed-run-alerts.md b/docs/docs-beta/docs/guides/monitoring/failed-run-alerts.md index cd68c525a48e0..ee9fdbaea0057 100644 --- a/docs/docs-beta/docs/guides/monitoring/failed-run-alerts.md +++ b/docs/docs-beta/docs/guides/monitoring/failed-run-alerts.md @@ -1,6 +1,4 @@ --- -title: "Alerting on failed runs" -sidebar_position: 1 ---- - -# Alerting on failed runs +title: "Alert on failed runs" +sidebar_position: 10 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/testing/detecting-schema-changes.md b/docs/docs-beta/docs/guides/testing/detecting-schema-changes.md index 796b64fe96832..1648e1314373b 100644 --- a/docs/docs-beta/docs/guides/testing/detecting-schema-changes.md +++ b/docs/docs-beta/docs/guides/testing/detecting-schema-changes.md @@ -1,4 +1,4 @@ --- -title: "Detecting schema changes" -sidebar_position: 4 +title: "Detect schema changes" +sidebar_position: 50 --- diff --git a/docs/docs-beta/docs/guides/testing/integration-tests.md b/docs/docs-beta/docs/guides/testing/integration-tests.md index 748231d1f88f0..6d0f7390769f6 100644 --- a/docs/docs-beta/docs/guides/testing/integration-tests.md +++ b/docs/docs-beta/docs/guides/testing/integration-tests.md @@ -1,6 +1,4 @@ --- title: "Integration tests" -sidebar_position: 3 ---- - -# Integration tests +sidebar_position: 40 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/testing/stopping-runs.md b/docs/docs-beta/docs/guides/testing/stopping-runs.md index 55ff00b26b315..14a3df5b66a0d 100644 --- a/docs/docs-beta/docs/guides/testing/stopping-runs.md +++ b/docs/docs-beta/docs/guides/testing/stopping-runs.md @@ -1,6 +1,4 @@ --- -title: "Stopping runs" -sidebar_position: 5 ---- - -# Stopping runs +title: "Stop runs" +sidebar_position: 60 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/testing/testing-assets-with-asset-checks.md b/docs/docs-beta/docs/guides/testing/testing-assets-with-asset-checks.md index 5a39a8fac337e..ecacd49f09631 100644 --- a/docs/docs-beta/docs/guides/testing/testing-assets-with-asset-checks.md +++ b/docs/docs-beta/docs/guides/testing/testing-assets-with-asset-checks.md @@ -1,6 +1,4 @@ --- -title: "Testing assets with Asset Checks" -sidebar_position: 1 ---- - -# Testing assets with Asset Checks +title: "Test assets with Asset Checks" +sidebar_position: 10 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/testing/testing-for-data-freshness.md b/docs/docs-beta/docs/guides/testing/testing-for-data-freshness.md index c6544930aacef..83f664b46ec6d 100644 --- a/docs/docs-beta/docs/guides/testing/testing-for-data-freshness.md +++ b/docs/docs-beta/docs/guides/testing/testing-for-data-freshness.md @@ -1,6 +1,4 @@ --- -title: "Testing for data freshness" -sidebar_position: 1 ---- - -# Testing for data freshness +title: "Test for data freshness" +sidebar_position: 20 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/testing/unit-tests-for-assets-and-ops.md b/docs/docs-beta/docs/guides/testing/unit-tests-for-assets-and-ops.md index fc112bc6824e0..57e986bd076c2 100644 --- a/docs/docs-beta/docs/guides/testing/unit-tests-for-assets-and-ops.md +++ b/docs/docs-beta/docs/guides/testing/unit-tests-for-assets-and-ops.md @@ -1,6 +1,4 @@ --- title: "Unit tests for assets and ops" -sidebar_position: 2 ---- - -# Unit tests for assets and ops +sidebar_position: 30 +--- \ No newline at end of file diff --git a/docs/docs-beta/docs/guides/transformation/pushing-operations-to-data-warehouses.md b/docs/docs-beta/docs/guides/transformation/pushing-operations-to-data-warehouses.md index 343d4f97fe9c6..8f5d2a0dbcc76 100644 --- a/docs/docs-beta/docs/guides/transformation/pushing-operations-to-data-warehouses.md +++ b/docs/docs-beta/docs/guides/transformation/pushing-operations-to-data-warehouses.md @@ -1,4 +1,4 @@ --- -title: "Pushing operations to data warehouses" -sidebar_position: 2 +title: "Push operations to data warehouses" +sidebar_position: 20 --- diff --git a/docs/docs-beta/docs/guides/transformation/working-with-different-data-formats.md b/docs/docs-beta/docs/guides/transformation/working-with-different-data-formats.md index 8d27bb745ebe7..324d275cb7a05 100644 --- a/docs/docs-beta/docs/guides/transformation/working-with-different-data-formats.md +++ b/docs/docs-beta/docs/guides/transformation/working-with-different-data-formats.md @@ -1,4 +1,4 @@ --- -title: "Working with different data file formats" -sidebar_position: 3 +title: "Work with different data file formats" +sidebar_position: 30 --- diff --git a/docs/docs-beta/docs/guides/transformation/working-with-large-datasets.md b/docs/docs-beta/docs/guides/transformation/working-with-large-datasets.md index 2410d094b79aa..f9b9dcd3f8dd1 100644 --- a/docs/docs-beta/docs/guides/transformation/working-with-large-datasets.md +++ b/docs/docs-beta/docs/guides/transformation/working-with-large-datasets.md @@ -1,4 +1,4 @@ --- -title: "Working with large datasets" -sidebar_position: 1 +title: "Work with large datasets" +sidebar_position: 10 --- diff --git a/docs/docs-beta/docs/partials/_InspirationList.md b/docs/docs-beta/docs/partials/_InspirationList.md index 0a1ae1cdd5707..a7e64434dad2b 100644 --- a/docs/docs-beta/docs/partials/_InspirationList.md +++ b/docs/docs-beta/docs/partials/_InspirationList.md @@ -2,6 +2,6 @@ If you're looking for additional inspiration, we recommend: -- [**Dagster Open Platform**](https://github.com/dagster-io/dagster-open-platform), which is Dagster Lab's open-source data platform. This full-sized project contains real assets and other Dagster features used by the Dagster Labs team. +- [**Dagster Open Platform**](https://github.com/dagster-io/dagster-open-platform), which is Dagster Lab's open source data platform. This full-sized project contains real assets and other Dagster features used by the Dagster Labs team. - [**GitHub Discussions**](https://github.com/dagster-io/dagster/discussions), where you can ask questions and get inspired by the Dagster community - [**The Awesome Dagster repository**](https://github.com/dagster-io/awesome-dagster), which is a collection of all awesome things related to Dagster, including other users' projects, talks, articles, and more diff --git a/docs/docs-beta/docs/tutorial/installation.md b/docs/docs-beta/docs/tutorial/installation.md index 23913486accf0..89a146e0f7a80 100644 --- a/docs/docs-beta/docs/tutorial/installation.md +++ b/docs/docs-beta/docs/tutorial/installation.md @@ -1,9 +1,9 @@ --- -title: "Installing Dagster" +title: "Dagster installation" description: "Learn how to install Dagster" --- -# Installing Dagster Guide +# Dagster installation This guide will walk you through the steps to install Dagster, a data orchestrator for machine learning, analytics, and ETL. Follow the instructions below to get started with Dagster on your local machine. @@ -13,11 +13,11 @@ This guide will walk you through the steps to install Dagster, a data orchestrat Before you begin, ensure you have the following prerequisites installed on your system: - Python 3.7 or higher, Python 3.11 is recommended -- pip (Python package installer) +- pip, a Python package installer -## Setup a Virtual Environment +## Set up a virtual environment After installing Python, it's a good idea to setup a virtual environment to isolate your Dagster project from the rest of your system. @@ -29,7 +29,7 @@ python -m venv .venv source .venv/bin/activate ``` -`pyenv` and `pyenv-virtualenv` are more powerful tools that can help you manage multiple versions of Python on a single machine. You can learn more about them [here](https://github.com/pyenv/pyenv). +`pyenv` and `pyenv-virtualenv` are more powerful tools that can help you manage multiple versions of Python on a single machine. You can learn more about them in the [pyenv GitHub repository](https://github.com/pyenv/pyenv). ## Install Dagster @@ -41,7 +41,7 @@ pip install dagster dagster-webserver This command will install the core Dagster library and the webserver, which is used to serve the Dagster UI. -## Verify Installation +## Verify installation To verify that Dagster is installed correctly, you can run the following command: @@ -72,9 +72,9 @@ Congratulations! You have successfully installed Dagster ## Troubleshooting -If you encounter any issues during the installation process, please refer to the [Dagster GitHub repository](https://github.com/dagster-io/dagster) for troubleshooting or reach out to the Dagster community for further assistance. +If you encounter any issues during the installation process, refer to the [Dagster GitHub repository](https://github.com/dagster-io/dagster) for troubleshooting or reach out to the Dagster community for further assistance. -## Next Steps +## Next steps - [Quickstart Tutorial](/tutorial/quick-start) - [ETL Tutorial](/tutorial/tutorial-etl) diff --git a/docs/docs-beta/docs/tutorial/quick-start.md b/docs/docs-beta/docs/tutorial/quick-start.md index 01fc3f32927c4..89c26284dd3cb 100644 --- a/docs/docs-beta/docs/tutorial/quick-start.md +++ b/docs/docs-beta/docs/tutorial/quick-start.md @@ -6,14 +6,14 @@ last_update: author: Pedram Navid --- -# Dagster Tutorial: Building Your First Dagster Project +# Build your first Dagster project Welcome to this hands-on tutorial where you'll learn how to build a basic Extract, Transform, Load (ETL) pipeline using Dagster. By the end of this tutorial, you'll have created a functional pipeline that extracts data from a CSV file and transforms it. -## What You'll Learn +## What you'll learn - How to set up a basic Dagster project -- How to create Software-Defined Assets (SDAs) for each step of the ETL process +- How to create a Dagster asset for each step of the ETL process - How to use Dagster's built-in features to monitor and execute your pipeline ## Prerequisites @@ -21,7 +21,7 @@ Welcome to this hands-on tutorial where you'll learn how to build a basic Extrac - Basic Python knowledge - Python 3.7+ installed on your system, see [installation guide](tutorial/installation.md) for more details -## Step 1: Set Up Your Dagster Environment +## Step 1: Set up your Dagster environment First, set up a new Dagster project. @@ -46,7 +46,7 @@ First, set up a new Dagster project. pip install dagster dagster-webserver pandas ``` -## Step 2: Create Your Dagster Project Structure +## Step 2: Create your Dagster project structure Set up a basic project structure: @@ -86,7 +86,7 @@ how to build more complex pipelines with best practices. 4,Diana,31,Los Angeles ``` -## Step 3: Define Your Assets +## Step 3: Define your assets Now, create the assets for the ETL pipeline. Open `quickstart/assets.py` and add the following code: @@ -119,7 +119,7 @@ The `Definitions` object serves as the central configuration point for a Dagster object is defined and the asset is passed to it. This tells Dagster about the assets that make up the ETL pipeline and allows Dagster to manage their execution and dependencies. -## Step 4: Run Your Pipeline +## Step 4: Run your pipeline :::warning @@ -142,9 +142,9 @@ There should be screenshots here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 5. In the popup that appears, click View to view a run as it executes. 6. Watch as Dagster executes your pipeline. Try different views by selecting the different view buttons in the top-left. - You can click on each asset to see its logs and metadata. + Click each asset to see its logs and metadata. -## Step 5: Verify Your Results +## Step 5: Verify your results To verify that your pipeline worked correctly: @@ -156,7 +156,7 @@ To verify that your pipeline worked correctly: You should see your transformed data, including the new `age_group` column. -## What You've Learned +## What you've learned Congratulations! You've just built and run your first pipeline with Dagster. You've learned how to: @@ -164,7 +164,7 @@ Congratulations! You've just built and run your first pipeline with Dagster. You - Define Software-Defined Assets for each step of your pipeline - Use Dagster's UI to run and monitor your pipeline -## Next Steps +## Next steps - Continue with the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build a more complex ETL pipeline - Learn how to [Think in Assets](/concepts/assets/thinking-in-assets) diff --git a/docs/docs-beta/docs/tutorial/tutorial-etl.md b/docs/docs-beta/docs/tutorial/tutorial-etl.md index d5c6ff74e23fa..c33536b24b070 100644 --- a/docs/docs-beta/docs/tutorial/tutorial-etl.md +++ b/docs/docs-beta/docs/tutorial/tutorial-etl.md @@ -1,12 +1,12 @@ --- -title: Building an ETL Pipeline +title: Build an ETL Pipeline description: Learn how to build an ETL pipeline with Dagster last_update: date: 2024-08-10 author: Pedram Navid --- -# Dagster tutorial: Building your first ETL pipeline +# Build your first ETL pipeline Welcome to this hands-on tutorial where you'll learn how to build an ETL pipeline with Dagster while exploring key parts of Dagster. If you haven't already, complete the [Quick Start](/tutorial/quick-start) tutorial to get familiar with Dagster. diff --git a/docs/vale/styles/Dagster/acronyms.yml b/docs/vale/styles/Dagster/acronyms.yml index a87db8a31f888..9e7cd8375055e 100644 --- a/docs/vale/styles/Dagster/acronyms.yml +++ b/docs/vale/styles/Dagster/acronyms.yml @@ -54,6 +54,7 @@ exceptions: - SQL - SSH - SSL + - SSO - SVG - TBD - TCP diff --git a/docs/vale/styles/Dagster/colons.yml b/docs/vale/styles/Dagster/colons.yml deleted file mode 100644 index b7ff06e47f577..0000000000000 --- a/docs/vale/styles/Dagster/colons.yml +++ /dev/null @@ -1,10 +0,0 @@ -## This rule finds capitalized words immediately following a colon (:) - -extends: existence -message: "'%s' should be in lowercase." -link: 'https://developers.google.com/style/colons' -nonword: true -level: warning -scope: sentence -tokens: - - ':\s[A-Z]' \ No newline at end of file diff --git a/docs/vale/styles/Dagster/headings-casing.yml b/docs/vale/styles/Dagster/headings-casing.yml index f87146b9a7d67..412717f8112df 100644 --- a/docs/vale/styles/Dagster/headings-casing.yml +++ b/docs/vale/styles/Dagster/headings-casing.yml @@ -6,3 +6,43 @@ message: "'%s' should be in sentence case" level: error scope: heading match: $sentence +exceptions: + - Airbyte + - AirFlow + - "Amazon (ECS|Redshift)" + - Azure Active Directory + - BigQuery + - Branch Deployments + - Change Tracking + - CI + - CLI + - Databricks + - Datadog + - dbt + - DuckDB + - ETL + - GitHub + - Gitlab + - "Google (BigQuery|Cloud Platform|Workspace)" + - Hybrid + - Insights + - IP + - Microsoft Teams + - MLflow + - MongoDB + - MySQL + - OneLogin + - Okta + - OpenAI + - PagerDuty + - Pandera + - PingOne + - Postgres + - Role-based Access Control + - S3 + - SCIM + - Slack + - Snowflake + - SSO + - Twilio + - UI \ No newline at end of file diff --git a/docs/vale/styles/Dagster/parentheses.yml b/docs/vale/styles/Dagster/parentheses.yml deleted file mode 100644 index 3b8711d0c88f9..0000000000000 --- a/docs/vale/styles/Dagster/parentheses.yml +++ /dev/null @@ -1,7 +0,0 @@ -extends: existence -message: "Use parentheses judiciously." -link: 'https://developers.google.com/style/parentheses' -nonword: true -level: suggestion -tokens: - - '\(.+\)' diff --git a/docs/vale/styles/Dagster/readability.yml b/docs/vale/styles/Dagster/readability.yml deleted file mode 100644 index 556f0c2d60021..0000000000000 --- a/docs/vale/styles/Dagster/readability.yml +++ /dev/null @@ -1,8 +0,0 @@ -extends: metric -message: "Try to keep the Automated Readability Index (%s) below 8." -link: https://en.wikipedia.org/wiki/Automated_readability_index - -formula: | - (4.71 * (characters / words)) + (0.5 * (words / sentences)) - 21.43 - -condition: "> 8" \ No newline at end of file diff --git a/docs/vale/styles/Terms/engineering.yml b/docs/vale/styles/Terms/engineering.yml index b478bc812ae14..a94475159b4e2 100644 --- a/docs/vale/styles/Terms/engineering.yml +++ b/docs/vale/styles/Terms/engineering.yml @@ -20,6 +20,5 @@ swap: "[Nn]ode[.]?[Jj][Ss]": "Node.js" python: "Python" pythonic: "Pythonic" - react: "React" sql: "SQL" "[Tt]ypescript": "TypeScript" \ No newline at end of file diff --git a/docs/vale/styles/config/ignore/ignore.txt b/docs/vale/styles/config/ignore/ignore.txt index 5af9597d30f02..665d634279fa8 100644 --- a/docs/vale/styles/config/ignore/ignore.txt +++ b/docs/vale/styles/config/ignore/ignore.txt @@ -1,7 +1,10 @@ apis config +cron +pyenv pythonic scim +SDA[s] subprocess todo ui diff --git a/docs/vale/styles/config/vocabularies/Dagster/accept.txt b/docs/vale/styles/config/vocabularies/Dagster/accept.txt index e41298562317f..da73bea609b81 100644 --- a/docs/vale/styles/config/vocabularies/Dagster/accept.txt +++ b/docs/vale/styles/config/vocabularies/Dagster/accept.txt @@ -1,6 +1,8 @@ [Cc]onfig -Dagster +\bDagster\b DataFrame +Declarative Automation +dagster-.* gRPC [Mm]aterializations [Mm]emoization @@ -14,6 +16,9 @@ Databricks Datadog dbt DuckDB +GitHub +Gitlab +Microsoft Teams MLflow MongoDB MySQL @@ -25,4 +30,5 @@ Pandera PingOne Postgres S3 +Snowflake Twilio \ No newline at end of file