From 640a976fbe68457f49dfb3bdf0f98932b708e70b Mon Sep 17 00:00:00 2001 From: Erin Cochran Date: Fri, 7 Jun 2024 15:05:23 -0400 Subject: [PATCH] [docs] - Make new Schedules page public (#22331) ## Summary & Motivation This PR: - Makes the revamped Schedules content public and deprecates (removes) the old page (`/concepts/partitions-schedules-sensors/schedules`) - Updates links in the docs. **Note**: Only links in docs content are updated in this PR - links in the UI and elsewhere in the repo will be in a subsequent PR. - Adds redirects ## How I Tested These Changes local, testing redirects --- docs/content/_apidocs.mdx | 3 +- docs/content/_navigation.json | 36 +- docs/content/concepts.mdx | 2 +- .../concepts/assets/asset-auto-execution.mdx | 2 +- .../checking-for-data-freshness.mdx | 8 +- .../define-execute-asset-checks.mdx | 4 +- docs/content/concepts/assets/asset-jobs.mdx | 2 +- docs/content/concepts/automation.mdx | 4 +- .../customizing-executing-timezones.mdx | 4 +- .../automation/schedules/examples.mdx | 2 +- .../schedules/partitioned-schedules.mdx | 4 +- .../concepts/automation/schedules/testing.mdx | 2 +- .../automation/schedules/troubleshooting.mdx | 2 +- .../configuration/config-schema-legacy.mdx | 2 +- .../concepts/configuration/config-schema.mdx | 2 +- .../ops-jobs-graphs/job-execution.mdx | 2 +- .../content/concepts/ops-jobs-graphs/jobs.mdx | 2 +- .../concepts/ops-jobs-graphs/op-jobs.mdx | 2 +- .../partitioning-ops.mdx | 2 +- .../partitions.mdx | 2 +- .../schedules.mdx | 551 ------------------ .../repositories-workspaces/repositories.mdx | 2 +- docs/content/concepts/resources.mdx | 8 +- docs/content/concepts/webserver/ui.mdx | 2 +- .../managing-user-roles-permissions.mdx | 3 +- docs/content/deployment/dagster-daemon.mdx | 4 +- .../guides/kubernetes/deploying-with-helm.mdx | 2 +- docs/content/deployment/guides/service.mdx | 2 +- docs/content/getting-started.mdx | 4 +- ...enriching-with-software-defined-assets.mdx | 10 +- .../guides/dagster/example_project.mdx | 2 +- docs/content/guides/dagster/managing-ml.mdx | 2 +- docs/content/integrations/airbyte-cloud.mdx | 6 +- docs/content/integrations/airbyte.mdx | 6 +- docs/content/integrations/airflow.mdx | 2 +- docs/content/integrations/databricks.mdx | 2 +- docs/content/integrations/dbt/reference.mdx | 2 +- docs/content/integrations/fivetran.mdx | 4 +- docs/next/util/redirectUrls.json | 8 +- .../api/apidocs/schedules-sensors.rst | 2 +- 40 files changed, 97 insertions(+), 616 deletions(-) delete mode 100644 docs/content/concepts/partitions-schedules-sensors/schedules.mdx diff --git a/docs/content/_apidocs.mdx b/docs/content/_apidocs.mdx index a5778e5cb04bc..cd0075f767360 100644 --- a/docs/content/_apidocs.mdx +++ b/docs/content/_apidocs.mdx @@ -56,8 +56,7 @@ APIs from the core `dagster` package, divided roughly by topic: Schedules & Sensors - APIs to define{" "} - schedules{" "} + APIs to define schedules{" "} and sensors{" "} that initiate job execution, as well as some built-in helpers for common cases. diff --git a/docs/content/_navigation.json b/docs/content/_navigation.json index 5f3f1028bc131..c98bd462b4487 100644 --- a/docs/content/_navigation.json +++ b/docs/content/_navigation.json @@ -129,7 +129,41 @@ }, { "title": "Schedules", - "path": "/concepts/partitions-schedules-sensors/schedules" + "path": "/concepts/automation/schedules", + "children": [ + { + "title": "Overview", + "path": "/concepts/automation/schedules" + }, + { + "title": "Automating assets", + "path": "/concepts/automation/schedules/automating-assets-schedules-jobs" + }, + { + "title": "Automating ops", + "path": "/concepts/automation/schedules/automating-ops-schedules-jobs" + }, + { + "title": "Examples", + "path": "/concepts/automation/schedules/examples" + }, + { + "title": "Partitioned schedules", + "path": "/concepts/automation/schedules/partitioned-schedules" + }, + { + "title": "Customizing timezones", + "path": "/concepts/automation/schedules/customizing-executing-timezones" + }, + { + "title": "Testing", + "path": "/concepts/automation/schedules/testing" + }, + { + "title": "Troubleshooting", + "path": "/concepts/automation/schedules/troubleshooting" + } + ] }, { "title": "Sensors", diff --git a/docs/content/concepts.mdx b/docs/content/concepts.mdx index 8514cecd1c01a..9c794607a70ff 100644 --- a/docs/content/concepts.mdx +++ b/docs/content/concepts.mdx @@ -60,7 +60,7 @@ Dagster offers several ways to run data pipelines without manual intervention, i > to create a sensor to automatically run the check. Based on the check's parameters and last run time, the sensor will run the check when enough time has elapsed that the asset might fail the check: @@ -396,7 +396,7 @@ To alert on overdue assets, create an alert policy with the following settings: > and then trigger the jobs via [sensors](/concepts/partitions-schedules-sensors/sensors) or [schedules](/concepts/partitions-schedules-sensors/schedules). By default, checks are included with the assets they check. You can also define jobs that include only checks, or only assets. +To define jobs that execute sets of assets and checks, you can use and then trigger the jobs via [sensors](/concepts/partitions-schedules-sensors/sensors) or [schedules](/concepts/automation/schedules). By default, checks are included with the assets they check. You can also define jobs that include only checks, or only assets. ```python file=/concepts/assets/asset_checks/jobs.py from dagster import ( @@ -434,7 +434,7 @@ In Dagster+, you can set up alerts to notify you when assets checks fail. To ale > - Schedules + Schedules Starts a job at a specified time @@ -245,7 +245,7 @@ The following cheatsheet contains high-level details about each of the automatio [op-jobs]: /concepts/ops-jobs-graphs/op-jobs -[schedules]: /concepts/partitions-schedules-sensors/schedules +[schedules]: /concepts/automation/schedules [sensors]: /concepts/partitions-schedules-sensors/sensors diff --git a/docs/content/concepts/automation/schedules/customizing-executing-timezones.mdx b/docs/content/concepts/automation/schedules/customizing-executing-timezones.mdx index e37d7c3c8fa5e..6686cdd68aae7 100644 --- a/docs/content/concepts/automation/schedules/customizing-executing-timezones.mdx +++ b/docs/content/concepts/automation/schedules/customizing-executing-timezones.mdx @@ -5,7 +5,7 @@ description: "By default, schedules without a set timezone execute in UTC. Learn # Customizing a schedule's executing timezone -[Schedules](/concepts/partitions-schedules-sensors/schedules) that don't have a set timezone will, by default, execute in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). By the end of this guide, you'll know how to: +[Schedules](/concepts/automation/schedules) that don't have a set timezone will, by default, execute in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). By the end of this guide, you'll know how to: - Set custom timezones on schedule definitions - Set custom timezones on partitioned jobs @@ -17,7 +17,7 @@ description: "By default, schedules without a set timezone execute in UTC. Learn To follow this guide, you need to be familiar with: -- [Schedules](/concepts/partitions-schedules-sensors/schedules) +- [Schedules](/concepts/automation/schedules) - Jobs, either [asset](/concepts/assets/asset-jobs) or [op-based](/concepts/ops-jobs-graphs/op-jobs) - [Partitions](/concepts/partitions-schedules-sensors/partitions) (Optional) diff --git a/docs/content/concepts/automation/schedules/examples.mdx b/docs/content/concepts/automation/schedules/examples.mdx index 8e18e679a64da..1f211778e1aea 100644 --- a/docs/content/concepts/automation/schedules/examples.mdx +++ b/docs/content/concepts/automation/schedules/examples.mdx @@ -5,7 +5,7 @@ description: "Examples focused on Dagster schedules." # Schedule examples -This reference contains a variety of examples using Dagster [schedules](/concepts/partitions-schedules-sensors/schedules). Each example contains: +This reference contains a variety of examples using Dagster [schedules](/concepts/automation/schedules). Each example contains: - A summary - Additional notes diff --git a/docs/content/concepts/automation/schedules/partitioned-schedules.mdx b/docs/content/concepts/automation/schedules/partitioned-schedules.mdx index 6ed402c47c94f..b853bbff97976 100644 --- a/docs/content/concepts/automation/schedules/partitioned-schedules.mdx +++ b/docs/content/concepts/automation/schedules/partitioned-schedules.mdx @@ -18,7 +18,7 @@ In this guide, we'll walk you through how to construct schedules from partitione To follow this guide, you need to be familiar with: -- [Schedules](/concepts/partitions-schedules-sensors/schedules) +- [Schedules](/concepts/automation/schedules) - [Partitions](/concepts/partitions-schedules-sensors/partitions) - [Asset definitions](/concepts/assets/software-defined-assets) - [Asset jobs](/concepts/assets/asset-jobs) and [op jobs](/concepts/ops-jobs-graphs/op-jobs) @@ -243,7 +243,7 @@ def antarctica_schedule(): objects. Each run request launches a run. - - An optional , which specifies a message which describes why no runs were requested - -Dagster includes a scheduler, which runs as part of the dagster-daemon process. Once you have defined a schedule, see the [dagster-daemon](/deployment/dagster-daemon) page for instructions on how to run the daemon in order to execute your schedules. - ---- - -## Relevant APIs - -| Name | Description | -| --------------------------------------------------------- | --------------------------------------------------------------------------------------------------- | -| | Decorator that defines a schedule that executes according to a given cron schedule. | -| | Class for schedules. | -| | A function that constructs a schedule whose interval matches the partitioning of a partitioned job. | -| | The context passed to the schedule definition execution function | -| | A function that constructs a `ScheduleEvaluationContext`, typically used for testing. | - ---- - -## Defining schedules - -You define a schedule by constructing a . In this section: - -- [Basic schedules](#basic-schedules) -- [Schedules that provide custom run config and tags](#schedules-that-provide-custom-run-config-and-tags) -- [Schedules from partitioned assets and jobs](#schedules-from-partitioned-assets-and-jobs) -- [Customizing execution times](#customizing-execution-times), including [timezones](#customizing-the-executing-timezone) and accounting for [Daylight Savings Time](#execution-time-and-daylight-savings-time) - -### Basic schedules - -Here's a simple schedule that runs a job every day, at midnight: - -```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_basic_schedule endbefore=end_basic_schedule -@job -def my_job(): ... - - -basic_schedule = ScheduleDefinition(job=my_job, cron_schedule="0 0 * * *") -``` - -The `cron_schedule` argument accepts standard [cron expressions](https://en.wikipedia.org/wiki/Cron). It also accepts `"@hourly"`, `"@daily"`, `"@weekly"`, and `"@monthly"` if your `croniter` dependency's version is >= 1.0.12. - -To run schedules for assets, you can [build a job that materializes assets](/concepts/assets/software-defined-assets#building-jobs-that-materialize-assets) and construct a : - -```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_basic_asset_schedule endbefore=end_basic_asset_schedule -from dagster import AssetSelection, ScheduleDefinition, define_asset_job - -asset_job = define_asset_job("asset_job", AssetSelection.groups("some_asset_group")) - -basic_schedule = ScheduleDefinition(job=asset_job, cron_schedule="0 0 * * *") -``` - -### Schedules that provide custom run config and tags - -If you want to vary the behavior of your job based on the time it's scheduled to run, you can use the decorator, which decorates a function that returns run config based on a provided : - -```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_run_config_schedule endbefore=end_run_config_schedule -@op(config_schema={"scheduled_date": str}) -def configurable_op(context: OpExecutionContext): - context.log.info(context.op_config["scheduled_date"]) - - -@job -def configurable_job(): - configurable_op() - - -@schedule(job=configurable_job, cron_schedule="0 0 * * *") -def configurable_job_schedule(context: ScheduleEvaluationContext): - scheduled_date = context.scheduled_execution_time.strftime("%Y-%m-%d") - return RunRequest( - run_key=None, - run_config={ - "ops": {"configurable_op": {"config": {"scheduled_date": scheduled_date}}} - }, - tags={"date": scheduled_date}, - ) -``` - -If you don't need access to the context parameter, you can omit it from the decorated function. - -### Schedules from partitioned assets and jobs - -- [Time partitioned jobs and assets](#time-partitioned-jobs-and-assets) -- [Static partitioned jobs](#static-partitioned-jobs) - -#### Time partitioned jobs and assets - -When you have a [partitioned job](/concepts/partitions-schedules-sensors/partitions) that's partitioned by time, you can use the function to construct a schedule for it whose interval matches the spacing of partitions in your job. For example, if you have a daily partitioned job that fills in a date partition of a table each time it runs, you likely want to run that job every day. - -Having defined a date-partitioned job, you can construct a schedule for it using . For example: - -```python file=/concepts/partitions_schedules_sensors/schedule_from_partitions.py startafter=start_marker endbefore=end_marker -from dagster import build_schedule_from_partitioned_job, job - - -@job(config=partitioned_config) -def partitioned_op_job(): ... - - -partitioned_op_schedule = build_schedule_from_partitioned_job( - partitioned_op_job, -) -``` - -The [Partitioning assets](/concepts/partitions-schedules-sensors/partitioning-assets) documentation includes an [example of a date-partitioned asset](/concepts/partitions-schedules-sensors/partitioning-assets#defining-partitioned-assets). You can define a schedule similarly using : - -```python file=/concepts/partitions_schedules_sensors/schedule_from_partitions.py startafter=start_partitioned_asset_schedule endbefore=end_partitioned_asset_schedule -from dagster import ( - asset, - build_schedule_from_partitioned_job, - define_asset_job, - DailyPartitionsDefinition, -) - -daily_partition = DailyPartitionsDefinition(start_date="2024-05-20") - - -@asset(partitions_def=daily_partition) -def daily_asset(): ... - - -partitioned_asset_job = define_asset_job("partitioned_job", selection=[daily_asset]) - - -asset_partitioned_schedule = build_schedule_from_partitioned_job( - partitioned_asset_job, -) -``` - -Each schedule tick of a partitioned job fills in the latest partition in the partition set that exists as of the tick time. Note that this implies that when the schedule submits a run on a particular day, it will typically be for the partition whose key corresponds to the previous day. For example, the schedule will fill in the `2020-04-01` partition on `2020-04-02`. That's because each partition corresponds to a time window. The key of the partition is the start of the time window, but the partition isn't included in the list until its time window has completed. Waiting until the time window has finished before Kicking off a run means the run can process data from within that entire time window. - -However, you can use the `end_offset` parameter of to change which partition is the most recent partition that is filled in at each schedule tick. Setting `end_offset` to `1` will extend the partitions forward so that the schedule tick that runs on day `N` will fill in day `N`'s partition instead of day `N-1`, and setting `end_offset` to a negative number will cause the schedule to fill in earlier days' partitions. In general, setting `end_offset` to `X` will cause the partition that runs on day `N` to fill in the partition for day `N - 1 + X`. The same holds true for hourly, weekly, and monthly partitioned jobs, for their respective partition sizes. - -You can use the `minute_of_hour`, `hour_of_day`, `day_of_week`, and `day_of_month` parameters of `build_schedule_from_partitioned_job` to control the timing of the schedule. For example, if you have a job that's partitioned by date, and you set `minute_of_hour` to `30` and `hour_of_day` to `1`, the schedule would submit the run for partition `2020-04-01` at 1:30 AM on `2020-04-02`. - -#### Static partitioned jobs - -You can also create a schedule for a static partition. The Partitioned Jobs concepts page also includes an [example of how to define a static partitioned job](/concepts/partitions-schedules-sensors/partitioning-ops#defining-jobs-with-static-partitions). To define a schedule for a static partitioned job, we will construct a schedule from scratch, rather than using a helper function like `build_schedule_from_partitioned_job` this will allow more flexibility in determining which partitions should be run by the schedule. - -For example, if we have the continents static partitioned job from the Partitioned Jobs concept page - -```python file=/concepts/partitions_schedules_sensors/static_partitioned_job.py -from dagster import Config, OpExecutionContext, job, op, static_partitioned_config - -CONTINENTS = [ - "Africa", - "Antarctica", - "Asia", - "Europe", - "North America", - "Oceania", - "South America", -] - - -@static_partitioned_config(partition_keys=CONTINENTS) -def continent_config(partition_key: str): - return {"ops": {"continent_op": {"config": {"continent_name": partition_key}}}} - - -class ContinentOpConfig(Config): - continent_name: str - - -@op -def continent_op(context: OpExecutionContext, config: ContinentOpConfig): - context.log.info(config.continent_name) - - -@job(config=continent_config) -def continent_job(): - continent_op() -``` - -We can write a schedule that will run this partition: - -```python file=/concepts/partitions_schedules_sensors/schedule_from_partitions.py startafter=start_static_partition endbefore=end_static_partition -from dagster import schedule, RunRequest - - -@schedule(cron_schedule="0 0 * * *", job=continent_job) -def continent_schedule(): - for c in CONTINENTS: - yield RunRequest(run_key=c, partition_key=c) -``` - -Or a schedule that will run a subselection of the partition: - -```python file=/concepts/partitions_schedules_sensors/schedule_from_partitions.py startafter=start_single_partition endbefore=end_single_partition -@schedule(cron_schedule="0 0 * * *", job=continent_job) -def antarctica_schedule(): - return RunRequest(partition_key="Antarctica") -``` - -### Customizing execution times - -- [Customizing the executing timezone](#customizing-the-executing-timezone) -- [Execution time and Daylight Savings Time](#execution-time-and-daylight-savings-time) - -#### Customizing the executing timezone - -You can customize the timezone in which your schedule executes by setting the `execution_timezone` parameter on your schedule to any [`tz` timezone](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). Schedules with no timezone set run in UTC. - -For example, the following schedule executes daily at 9AM in US/Pacific time: - -```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_timezone endbefore=end_timezone -my_timezone_schedule = ScheduleDefinition( - job=my_job, cron_schedule="0 9 * * *", execution_timezone="America/Los_Angeles" -) -``` - -The decorator accepts the same argument. Schedules from partitioned jobs execute in the timezone defined on the partitioned config. - -#### Execution time and Daylight Savings Time - -Because of Daylight Savings Time transitions, it's possible to specify an execution time that does not exist for every scheduled interval. For example, say you have a daily schedule with an execution time of 2:30 AM in the US/Eastern timezone. On 2019/03/10, the time jumps from 2:00 AM to 3:00 AM when Daylight Savings Time begins. Therefore, the time of 2:30 AM did not exist for the day. - -If you specify such an execution time, Dagster runs your schedule at the next time that exists. In the previous example, the schedule would run at 3:00 AM. - -It's also possible to specify an execution time that exists twice on one day every year. For example, on 2019/11/03 in US/Eastern time, the hour from 1:00 AM to 2:00 AM repeats, so a daily schedule running at 1:30 AM has two possible times in which it could execute. In this case, Dagster will execute your schedule at the latter of the two possible times. - -Hourly schedules will be unaffected by daylight savings time transitions - the schedule will continue to run exactly once every hour, even as the timezone changes. In the example above where the hour from 1:00 AM to 2:00 AM repeats, an hourly schedule running at 30 minutes past the hour would run at 12:30 AM, both instances of 1:30 AM, and then proceed normally from 2:30 AM on. - -### Using resources in schedules - -Dagster's [resources](/concepts/resources) system can be used with schedules to make it easier to interact with external systems or to make components of a schedule easier to plug in for testing purposes. - -To specify resource dependencies, annotate the resource as a parameter to the schedule's function. Resources are provided by attaching them to your call. - -Here, a resource is provided that helps a schedule generate a date string: - -```python file=/concepts/resources/pythonic_resources.py startafter=start_new_resource_on_schedule endbefore=end_new_resource_on_schedule dedent=4 -from dagster import ( - schedule, - ScheduleEvaluationContext, - ConfigurableResource, - job, - RunRequest, - RunConfig, - Definitions, -) -from datetime import datetime -from typing import List - -class DateFormatter(ConfigurableResource): - format: str - - def strftime(self, dt: datetime) -> str: - return dt.strftime(self.format) - -@job -def process_data(): ... - -@schedule(job=process_data, cron_schedule="* * * * *") -def process_data_schedule( - context: ScheduleEvaluationContext, - date_formatter: DateFormatter, -): - formatted_date = date_formatter.strftime(context.scheduled_execution_time) - - return RunRequest( - run_key=None, - tags={"date": formatted_date}, - ) - -defs = Definitions( - jobs=[process_data], - schedules=[process_data_schedule], - resources={"date_formatter": DateFormatter(format="%Y-%m-%d")}, -) -``` - -For more information on resources, refer to the [Resources documentation](/concepts/resources). To see how to test schedules with resources, refer to the section on [Testing schedules with resources](#testing-schedules-with-resources). - ---- - -## Running the scheduler - -Schedules must be started for them to run. Schedules can be started and stopped: - -- In the Dagster UI using the **Schedules** tab: - - - - Schedules tab in the Dagster UI - -- Using the CLI: - - ```shell - dagster schedule start - dagster schedule stop - ``` - -- In code by setting the schedule's default status to `DefaultScheduleStatus.RUNNING`: - - ```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_running_in_code endbefore=end_running_in_code - my_running_schedule = ScheduleDefinition( - job=my_job, cron_schedule="0 9 * * *", default_status=DefaultScheduleStatus.RUNNING - ) - ``` - -If you manually start or stop a schedule in the UI, that overrides any default status set in code. - -Once the schedule is started, the schedule will begin executing immediately if you're running the [dagster-daemon](/deployment/dagster-daemon) process as part of your deployment. Refer to the [Troubleshooting](/concepts/partitions-schedules-sensors/schedules#troubleshooting) section if your schedule has been started but isn't submitting runs. - ---- - -## Logging in schedules - - - Schedule logs are stored in your{" "} - - Dagster instance's compute log storage - - . You should ensure that your compute log storage is configured to view your schedule - logs. - - -Any schedule can emit log messages during its evaluation function: - -```python file=concepts/partitions_schedules_sensors/schedules/schedules.py startafter=start_schedule_logging endbefore=end_schedule_logging -@schedule(job=my_job, cron_schedule="* * * * *") -def logs_then_skips(context): - context.log.info("Logging from a schedule!") - return SkipReason("Nothing to do") -``` - -These logs can be viewed when inspecting a tick in the tick history view on the corresponding schedule page. - ---- - -## Testing schedules - - - - -### Via the Dagster UI - -In the UI, you can manually trigger a test evaluation of a schedule using a mock evaluation time and view the results. - -On the overview page for a particular schedule, there is a `Test schedule` button. Clicking this button will perform a test evaluation of your schedule for a provided mock evaluation time, and show you the results of that evaluation. - -1. Click **Overview > Schedules**. - -2. Click the schedule you want to test. - -3. Click the **Test Schedule** button, located near the top right corner of the page: - - - -4. You'll be prompted to select a mock schedule evaluation time. As schedules are defined on a cadence, the evaluation times provided in the dropdown are past and future times along that cadence. - - For example, let's say you're testing a schedule with a cadence of `"Every day at X time"`. In the dropdown, you'd see five evaluation times in the future and five evaluation times in the past along that cadence. - - - -5. After selecting an evaluation time, click the **Evaluate** button. A window containing the result of the evaluation will display: - - - - If the evaluation was successful and a run request was produced, you can open the launchpad pre-scaffolded with the config corresponding to that run request. - - - - -### Via Python - -To test a function decorated by the decorator, you can invoke the schedule definition like it's a regular Python function. The invocation will return run config, which can then be validated using the function. Below is a test for the `configurable_job_schedule` that we defined in [an earlier section](#schedules-that-provide-custom-run-config-and-tags). - -It uses to construct a to provide for the `context` parameter. - -```python file=concepts/partitions_schedules_sensors/schedules/schedule_examples.py startafter=start_test_cron_schedule_context endbefore=end_test_cron_schedule_context -from dagster import build_schedule_context, validate_run_config - - -def test_configurable_job_schedule(): - context = build_schedule_context( - scheduled_execution_time=datetime.datetime(2020, 1, 1) - ) - run_request = configurable_job_schedule(context) - assert validate_run_config(configurable_job, run_request.run_config) -``` - -If your `@schedule`-decorated function doesn't have a context parameter, you don't need to provide one when invoking it. - -#### Testing schedules with resources - -For schedules that utilize [resources](/concepts/resources), you can provide the necessary resources when invoking the schedule function. - -Below is a test for the `process_data_schedule` that we defined in the [Using resources in schedules](#using-resources-in-schedules) section, which uses the `date_formatter` resource. - -```python file=/concepts/resources/pythonic_resources.py startafter=start_test_resource_on_schedule endbefore=end_test_resource_on_schedule dedent=4 -from dagster import build_schedule_context, validate_run_config - -def test_process_data_schedule(): - context = build_schedule_context( - scheduled_execution_time=datetime.datetime(2020, 1, 1) - ) - run_request = process_data_schedule( - context, date_formatter=DateFormatter(format="%Y-%m-%d") - ) - assert ( - run_request.run_config["ops"]["fetch_data"]["config"]["date"] - == "2020-01-01" - ) -``` - - - - ---- - -## Troubleshooting - -Try these steps if you're trying to run a schedule and are running into problems: - -- [Verify the schedule is included as a definition](#verify-the-schedule-is-included-as-a-definition) -- [Verify the schedule has been started](#verify-that-the-schedule-has-been-started) -- [Verify schedule interval configuration](#verify-schedule-interval-configuration) -- [Verify dagster-daemon setup](#verify-dagster-daemon-setup) - -### Verify the schedule is included as a definition - -First, verify that the schedule has been included in a object. This ensures that the schedule is detectable and loadable by Dagster tools like the UI and CLI: - -```python -defs = Definitions( - assets=[asset_1, asset_2], - jobs=[job_1], - schedules=[all_assets_job_schedule], -) -``` - -Refer to the [Code locations documentation](/concepts/code-locations) for more info. - -### Verify that the schedule has been started - -Next, using the UI, verify the schedule has been started: - -1. Open the left sidenav and locate the job attached to the schedule. Schedules that have been started will have a **green clock icon** next to them: - - Started schedule icon next to schedule in left sidenav in the Dagster UI - - If the schedule appears in the list but doesn't have the green clock icon, click the schedule. On the page that displays, use the **toggle at the top of the page** to mark it as running: - - Start/stop toggle for schedules in Code Locations tab in the Dagster UI - -2. Next, verify that the UI has loaded the latest version of your schedule code: - - 1. Click **Deployment** in the top navigation. - - 2. In the **Code locations** tab, click **Reload** (local webserver) or **Redeploy** (Dagster+). - - If the webserver is unable to load the code location - for example, due to a syntax error - an error message with additional info will display in the left UI sidenav. - - If the code location is loaded successfully but the schedule doesn't appear in the list of schedules, verify that the schedule is included in a object. - -### Verify schedule interval configuration - -Clicking the schedule in the left sidenav in the UI opens the **Schedule details** page for the schedule. - -If the schedule is running, a **Next tick** will display near the top of the page. This indicates when the schedule is next expected to run: - - - -Verify that the time is what you expect, including the timezone. - -### Verify dagster-daemon setup - -This section is applicable to Open Source (OSS) deployments. - -If the schedule interval is correctly configured but runs aren't being created, it's possible that the dagster-daemon process isn't working correctly. If you haven't set up a Dagster daemon yet, refer to the [Deployment guides](/deployment) for more info. - -1. First, check that the daemon is running: - - 1. In the UI, click **Deployment** in the top navigation. - 2. Click the **Daemons** tab. - 3. Locate the **Scheduler** row. - - The daemon process periodically sends out a hearbeat from the scheduler. If the scheduler daemon has a status of **Not running**, this indicates that there's an issue with your daemon deployment. If the daemon ran into an error that resulted in an exception, this error will often display in this tab. - - If there isn't a clear error on this page or if the daemon should be sending heartbeans but isn't, move on to step two. - -2. Next, check the logs from the daemon process. The steps to do this will depend on your deployment - for example, if you're using Kubernetes, you'll need to get the logs from the pod that's running the daemon. You should be able to search those logs for the name of your schedule (or `SchedulerDaemon` to see all logs associated with the scheduler) to gain an understanding of what's going wrong. - - If the daemon output contains error indicating the schedule couldn't be found, verify that the daemon is using the same `workspace.yaml` file as the webserver. The daemon does not need to restart in order to pick up changes to the `workspace.yaml` file. Refer to the [Workspace files documentation](/concepts/code-locations/workspace-files) for more info. - - If the logs don't indicate the cause of the issue, move on to step three. - -3. Lastly, double-check your schedule code: - - 1. In the UI, open the schedule's **Schedule details** page by clicking the schedule in the left sidenav. - 2. On this page, locate the latest tick for the schedule. - - If there was an error trying to submit runs for the schedule, a red **Failure** badge should display in the **Status** column. Click the badge to display an error and stack trace describing the execution failure. - -**Still stuck?** If these steps didn't resolve the issue, reach out in Slack or file an issue on GitHub. - ---- - -## See it in action - -For more examples of schedules, check out the following in our [Hacker News example](https://github.com/dagster-io/dagster/tree/master/examples/project_fully_featured): - -- [Defining a partitioned asset job and a schedule based on it](https://github.com/dagster-io/dagster/blob/master/examples/project_fully_featured/project_fully_featured/jobs.py) diff --git a/docs/content/concepts/repositories-workspaces/repositories.mdx b/docs/content/concepts/repositories-workspaces/repositories.mdx index a3425bd074227..809340e3c70da 100644 --- a/docs/content/concepts/repositories-workspaces/repositories.mdx +++ b/docs/content/concepts/repositories-workspaces/repositories.mdx @@ -17,7 +17,7 @@ A repository is a collection of asset definitions, jobs, schedules, and sensors. A convenient way to organize your job and other definitions, each repository: -- Includes various definitions: [Asset definitions](/concepts/assets/software-defined-assets), [Jobs](/concepts/ops-jobs-graphs/jobs), [Schedules](/concepts/partitions-schedules-sensors/schedules), and [Sensors](/concepts/partitions-schedules-sensors/sensors). +- Includes various definitions: [Asset definitions](/concepts/assets/software-defined-assets), [Jobs](/concepts/ops-jobs-graphs/jobs), [Schedules](/concepts/automation/schedules), and [Sensors](/concepts/partitions-schedules-sensors/sensors). - Is loaded in a different process than Dagster system processes like the webserver. Any communication between the Dagster system and repository code occurs over an RPC mechanism, ensuring that problems in repository code can't affect Dagster or other repositories. - Can be loaded in its own Python environment, so you can manage your dependencies (or even your own Python versions) separately. diff --git a/docs/content/concepts/resources.mdx b/docs/content/concepts/resources.mdx index c0f3e14cf37b2..05f45c351d722 100644 --- a/docs/content/concepts/resources.mdx +++ b/docs/content/concepts/resources.mdx @@ -23,7 +23,7 @@ In data engineering, resources are the external services, tools, and storage you - The Snowflake/Databricks/BigQuery account the data is ingested into - The BI tool the dashboard was made in -Using Dagster resources, you can standardize connections and integrations to these tools across Dagster definitions like [asset definitions](/concepts/assets/software-defined-assets), [schedules](/concepts/partitions-schedules-sensors/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), [ops](/concepts/ops-jobs-graphs/ops), and [jobs](/concepts/ops-jobs-graphs/jobs). +Using Dagster resources, you can standardize connections and integrations to these tools across Dagster definitions like [asset definitions](/concepts/assets/software-defined-assets), [schedules](/concepts/automation/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), [ops](/concepts/ops-jobs-graphs/ops), and [jobs](/concepts/ops-jobs-graphs/jobs). So, why use resources? @@ -102,9 +102,9 @@ To specify resource dependencies on a sensor, annotate the resource type as a pa ### With schedules -[Schedules](/concepts/partitions-schedules-sensors/schedules) can use resources in case your schedule logic needs to interface with an external tool or to make your schedule logic more testable. +[Schedules](/concepts/automation/schedules) can use resources in case your schedule logic needs to interface with an external tool or to make your schedule logic more testable. -To specify resource dependencies on a schedule, annotate the resource type as a parameter to the schedule's function. For more information and examples, refer to the [Schedules documentation](/concepts/partitions-schedules-sensors/schedules#using-resources-in-schedules). +To specify resource dependencies on a schedule, annotate the resource type as a parameter to the schedule's function. Refer to the [Schedule examples reference](/concepts/automation/schedules/examples#using-resources-in-schedules) for more info. ### With ops and jobs @@ -186,7 +186,7 @@ For more information on using environment variables with Dagster, refer to the [ ### Configuring resources at launch time -In some cases, you may want to specify configuration for a resource at launch time, in the Launchpad or in a for a [schedule](/concepts/partitions-schedules-sensors/schedules) or [sensor](/concepts/partitions-schedules-sensors/sensors). For example, you may want a sensor-triggered run to specify a different target table in a database resource for each run. +In some cases, you may want to specify configuration for a resource at launch time, in the Launchpad or in a for a [schedule](/concepts/automation/schedules) or [sensor](/concepts/partitions-schedules-sensors/sensors). For example, you may want a sensor-triggered run to specify a different target table in a database resource for each run. You can use the `configure_at_launch()` method to defer the construction of a configurable resource until launch time: diff --git a/docs/content/concepts/webserver/ui.mdx b/docs/content/concepts/webserver/ui.mdx index a783555491337..1cbc921f396e1 100644 --- a/docs/content/concepts/webserver/ui.mdx +++ b/docs/content/concepts/webserver/ui.mdx @@ -259,7 +259,7 @@ height={826} ### All schedules -- **Description**: The **Schedules** page lists all [schedules](/concepts/partitions-schedules-sensors/schedules) defined in your Dagster deployment, as well as information about upcoming ticks for anticipated scheduled runs. Click a schedule to open the [**Schedule details**](#schedule-details) page. +- **Description**: The **Schedules** page lists all [schedules](/concepts/automation/schedules) defined in your Dagster deployment, as well as information about upcoming ticks for anticipated scheduled runs. Click a schedule to open the [**Schedule details**](#schedule-details) page. - **Accessed by**: Clicking **Overview (top nav) > Schedules tab** diff --git a/docs/content/dagster-plus/account/managing-users/managing-user-roles-permissions.mdx b/docs/content/dagster-plus/account/managing-users/managing-user-roles-permissions.mdx index 9deaf57947488..f09573cff9e94 100644 --- a/docs/content/dagster-plus/account/managing-users/managing-user-roles-permissions.mdx +++ b/docs/content/dagster-plus/account/managing-users/managing-user-roles-permissions.mdx @@ -205,8 +205,7 @@ height={554} - Start and stop{" "} - schedules + Start and stop schedules ❌ ❌ diff --git a/docs/content/deployment/dagster-daemon.mdx b/docs/content/deployment/dagster-daemon.mdx index ceaf8374d170f..c62f97b56e63e 100644 --- a/docs/content/deployment/dagster-daemon.mdx +++ b/docs/content/deployment/dagster-daemon.mdx @@ -5,7 +5,7 @@ description: Several Dagster features require a long-running daemon process with # Dagster daemon -Several Dagster features, like [schedules](/concepts/partitions-schedules-sensors/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), and [run queueing](/guides/customizing-run-queue-priority), require a long-running `dagster-daemon` process to be included with your deployment. +Several Dagster features, like [schedules](/concepts/automation/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), and [run queueing](/guides/customizing-run-queue-priority), require a long-running `dagster-daemon` process to be included with your deployment. --- @@ -77,7 +77,7 @@ The following daemons are currently available: Scheduler daemon Creates runs from active{" "} - schedules + schedules Enabled / runs as long as the default{" "} diff --git a/docs/content/deployment/guides/kubernetes/deploying-with-helm.mdx b/docs/content/deployment/guides/kubernetes/deploying-with-helm.mdx index badd9403e0acd..db9b5e8263532 100644 --- a/docs/content/deployment/guides/kubernetes/deploying-with-helm.mdx +++ b/docs/content/deployment/guides/kubernetes/deploying-with-helm.mdx @@ -158,7 +158,7 @@ By default, the webserver launches runs via the -The [daemon](/deployment/dagster-daemon) periodically checks the runs table in PostgreSQL for runs that are ready to be launched. The daemon also submits runs from [schedules](/concepts/partitions-schedules-sensors/schedules) and [sensors](/concepts/partitions-schedules-sensors/sensors). +The [daemon](/deployment/dagster-daemon) periodically checks the runs table in PostgreSQL for runs that are ready to be launched. The daemon also submits runs from [schedules](/concepts/automation/schedules) and [sensors](/concepts/partitions-schedules-sensors/sensors). The daemon launches runs via the , creating a run worker [job](https://kubernetes.io/docs/concepts/workloads/controllers/job/) with the image specified in the user code deployment. diff --git a/docs/content/deployment/guides/service.mdx b/docs/content/deployment/guides/service.mdx index b7c441fd8718f..dab906ece55da 100644 --- a/docs/content/deployment/guides/service.mdx +++ b/docs/content/deployment/guides/service.mdx @@ -25,7 +25,7 @@ In this configuration, the webserver will write execution logs to `$DAGSTER_HOME ## Running the Dagster daemon -If you're using [schedules](/concepts/partitions-schedules-sensors/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), or [backfills](/concepts/partitions-schedules-sensors/backfills), or want to set limits on the number of runs that can be executed at once, you'll want to also run a [dagster-daemon service](/deployment/dagster-daemon) as part of your deployment. To run this service locally, run the following command: +If you're using [schedules](/concepts/automation/schedules), [sensors](/concepts/partitions-schedules-sensors/sensors), or [backfills](/concepts/partitions-schedules-sensors/backfills), or want to set limits on the number of runs that can be executed at once, you'll want to also run a [dagster-daemon service](/deployment/dagster-daemon) as part of your deployment. To run this service locally, run the following command: ```shell pip install dagster diff --git a/docs/content/getting-started.mdx b/docs/content/getting-started.mdx index 49376d2f85e56..231bb752c046a 100644 --- a/docs/content/getting-started.mdx +++ b/docs/content/getting-started.mdx @@ -93,8 +93,8 @@ Use one of our examples to explore Dagster concepts, integrations, and realistic href="/deployment/guides/kubernetes/deploying-with-helm" > diff --git a/docs/content/guides/dagster/example_project.mdx b/docs/content/guides/dagster/example_project.mdx index def2ff1edd6d7..6931f1b380526 100644 --- a/docs/content/guides/dagster/example_project.mdx +++ b/docs/content/guides/dagster/example_project.mdx @@ -81,7 +81,7 @@ The way we model resources helps separate the business logic in code from enviro ### Scheduling and triggering jobs -[Schedules](/concepts/partitions-schedules-sensors/schedules) - A schedule allows you to execute a [job](/concepts/ops-jobs-graphs/jobs) at a fixed interval. This example includes an [hourly schedule](https://github.com/dagster-io/dagster/blob/master/examples/project_fully_featured/project_fully_featured/jobs.py) that materializes the `core` asset group every hour. +[Schedules](/concepts/automation/schedules) - A schedule allows you to execute a [job](/concepts/ops-jobs-graphs/jobs) at a fixed interval. This example includes an [hourly schedule](https://github.com/dagster-io/dagster/blob/master/examples/project_fully_featured/project_fully_featured/jobs.py) that materializes the `core` asset group every hour. [Sensors](/concepts/partitions-schedules-sensors/sensors) - A sensor allows you to instigate runs based on some external state change. In this example, we have sensors to react to different state changes: diff --git a/docs/content/guides/dagster/managing-ml.mdx b/docs/content/guides/dagster/managing-ml.mdx index f966c5063edd1..13736fe71cc1e 100644 --- a/docs/content/guides/dagster/managing-ml.mdx +++ b/docs/content/guides/dagster/managing-ml.mdx @@ -82,7 +82,7 @@ def some_ml_model(some_data): ... def predictions(some_ml_model): ... ``` -A more traditional schedule can also be used to update machine learning assets, causing them to be re-materialized or retrained on the latest data. For example, setting up a [cron schedule on a daily basis](/concepts/partitions-schedules-sensors/schedules). +A more traditional schedule can also be used to update machine learning assets, causing them to be re-materialized or retrained on the latest data. For example, setting up a [cron schedule on a daily basis](/concepts/automation/schedules). This can be useful if you have data that is also being scheduled on a cron schedule and want to add your machine model jobs to run on a schedule as well. diff --git a/docs/content/integrations/airbyte-cloud.mdx b/docs/content/integrations/airbyte-cloud.mdx index c05bd3e23cd88..46b31893cd3a5 100644 --- a/docs/content/integrations/airbyte-cloud.mdx +++ b/docs/content/integrations/airbyte-cloud.mdx @@ -283,7 +283,7 @@ defs = Definitions( ) ``` -Refer to the [Schedule documentation](/concepts/partitions-schedules-sensors/schedules#running-the-scheduler) for more info on running jobs on a schedule. +Refer to the [Schedule documentation](/concepts/automation/schedules) for more info on running jobs on a schedule. --- @@ -326,7 +326,7 @@ If you have questions on using Airbyte with Dagster, we'd love to hear from you: title="Resources" > diff --git a/docs/content/integrations/airbyte.mdx b/docs/content/integrations/airbyte.mdx index 5a860d8eab269..5f9f0feae9532 100644 --- a/docs/content/integrations/airbyte.mdx +++ b/docs/content/integrations/airbyte.mdx @@ -347,7 +347,7 @@ defs = Definitions( ) ``` -Refer to the [Schedule documentation](/concepts/partitions-schedules-sensors/schedules#running-the-scheduler) for more info on running jobs on a schedule. +Refer to the [Schedule documentation](/concepts/automation/schedules) for more info on running jobs on a schedule. --- @@ -386,7 +386,7 @@ If you have questions on using Airbyte with Dagster, we'd love to hear from you: title="Resources" > diff --git a/docs/content/integrations/airflow.mdx b/docs/content/integrations/airflow.mdx index 97448fe7afbf4..141841265eb63 100644 --- a/docs/content/integrations/airflow.mdx +++ b/docs/content/integrations/airflow.mdx @@ -210,7 +210,7 @@ While Airflow and Dagster have some significant differences, there are many conc Schedulers - Schedules + Schedules diff --git a/docs/content/integrations/databricks.mdx b/docs/content/integrations/databricks.mdx index c16a7d19116ba..bea80caeff022 100644 --- a/docs/content/integrations/databricks.mdx +++ b/docs/content/integrations/databricks.mdx @@ -126,7 +126,7 @@ materialize_databricks_table = define_asset_job( ## Step 3: Schedule your Databricks computation -Now that your Databricks API calls are modeled within Dagster, you can [schedule](/concepts/partitions-schedules-sensors/schedules) them to run regularly. +Now that your Databricks API calls are modeled within Dagster, you can [schedule](/concepts/automation/schedules) them to run regularly. In the example below, we schedule the `materialize_databricks_table` and `my_databricks_job` jobs to run daily: diff --git a/docs/content/integrations/dbt/reference.mdx b/docs/content/integrations/dbt/reference.mdx index 832102596464a..ee43145baed2b 100644 --- a/docs/content/integrations/dbt/reference.mdx +++ b/docs/content/integrations/dbt/reference.mdx @@ -286,7 +286,7 @@ daily_dbt_assets_and_downstream_schedule = ScheduleDefinition( ) ``` -Refer to the [Schedule documentation](/concepts/partitions-schedules-sensors/schedules#running-the-scheduler) for more info on running jobs on a schedule. +Refer to the [Schedule documentation](/concepts/automation/schedules) for more info on running jobs on a schedule. --- diff --git a/docs/content/integrations/fivetran.mdx b/docs/content/integrations/fivetran.mdx index 7f2198f1cb27a..a9c9b942bb177 100644 --- a/docs/content/integrations/fivetran.mdx +++ b/docs/content/integrations/fivetran.mdx @@ -261,7 +261,7 @@ defs = Definitions( ) ``` -Refer to the [Schedule documentation](/concepts/partitions-schedules-sensors/schedules#running-the-scheduler) for more info on running jobs on a schedule. +Refer to the [Schedule documentation](/concepts/automation/schedules) for more info on running jobs on a schedule. --- @@ -277,4 +277,4 @@ If you find a bug or want to add a feature to the `dagster-fivetran` library, we - [Asset definitions](/concepts/assets/software-defined-assets) - [Resources](/concepts/resources) - [Using environment variables and secrets](/guides/dagster/using-environment-variables-and-secrets) -- [Scheduling Dagster jobs](/concepts/partitions-schedules-sensors/schedules#running-the-scheduler) +- [Schedules](/concepts/automation/schedules) diff --git a/docs/next/util/redirectUrls.json b/docs/next/util/redirectUrls.json index 080363bdba34c..7dfdeac331589 100644 --- a/docs/next/util/redirectUrls.json +++ b/docs/next/util/redirectUrls.json @@ -199,8 +199,8 @@ "statusCode": 302 }, { - "source": "/overview/schedules-sensors/schedules", - "destination": "/concepts/partitions-schedules-sensors/schedules", + "source": "/concepts/partitions-schedules-sensors/schedules", + "destination": "/concepts/automation/schedules", "statusCode": 302 }, @@ -351,7 +351,7 @@ }, { "source": "/troubleshooting/schedules", - "destination": "/concepts/partitions-schedules-sensors/schedules", + "destination": "/concepts/automation/schedules/troubleshooting", "statusCode": 302 }, { @@ -541,7 +541,7 @@ }, { "source": "/tutorial/advanced-tutorial/scheduling", - "destination": "/concepts/partitions-schedules-sensors/schedules", + "destination": "/concepts/automation/schedules", "statusCode": 302 }, { diff --git a/docs/sphinx/sections/api/apidocs/schedules-sensors.rst b/docs/sphinx/sections/api/apidocs/schedules-sensors.rst index 0a1361ed39630..a261d1066efd5 100644 --- a/docs/sphinx/sections/api/apidocs/schedules-sensors.rst +++ b/docs/sphinx/sections/api/apidocs/schedules-sensors.rst @@ -17,7 +17,7 @@ Run requests Schedules --------- -`Schedules `__ are Dagster's way to support traditional ways of automation, such as specifying a job should run at Mondays at 9:00AM. Jobs triggered by schedules can contain a subset of `assets `__ or `ops `__. +`Schedules `__ are Dagster's way to support traditional ways of automation, such as specifying a job should run at Mondays at 9:00AM. Jobs triggered by schedules can contain a subset of `assets `__ or `ops `__. .. autodecorator:: schedule