From 869a56d0977712da238ac240e014f883a678f7b4 Mon Sep 17 00:00:00 2001 From: Ben Cassell <98852248+benc-db@users.noreply.github.com> Date: Thu, 30 Nov 2023 12:58:50 -0800 Subject: [PATCH] Update databricks-configs.md to cover compute per model --- .../resource-configs/databricks-configs.md | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/website/docs/reference/resource-configs/databricks-configs.md b/website/docs/reference/resource-configs/databricks-configs.md index a3b00177967..8b09eb5326c 100644 --- a/website/docs/reference/resource-configs/databricks-configs.md +++ b/website/docs/reference/resource-configs/databricks-configs.md @@ -361,6 +361,150 @@ insert into analytics.replace_where_incremental + + +## Selecting compute per model + +Beginning in version 1.7.2, you can assign which compute to use on a per-model basis. +To take advantage of this capability, you will need to add compute blocks to your profile: + + + +```yaml + +: + target: # this is the default target + outputs: + : + type: databricks + catalog: [optional catalog name if you are using Unity Catalog] + schema: [schema name] # Required + host: [yourorg.databrickshost.com] # Required + + ### This path is used as the default compute + http_path: [/sql/your/http/path] # Required + + ### New compute section + compute: + + ### Name that you will use to refer to an alternate compute + AltCompute: + http_path: [‘/sql/your/http/path’] # Required of each alternate compute + + ### A third named compute, use whatever name you like + Compute2: + http_path: [‘/some/other/path’] # Required of each alternate compute + ... + + : # additional targets + ... + ### For each target, you need to define the same compute, + ### but you can specify different paths + compute: + + ### Name that you will use to refer to an alternate compute + Compute1: + http_path: [‘/sql/your/http/path’] # Required of each alternate compute + + ### A third named compute, use whatever name you like + Compute2: + http_path: [‘/some/other/path’] # Required of each alternate compute + ... + +``` + + + +The new compute section is a map of user chosen names to objects with an http_path property. +Each compute is keyed by a name which is used in the model definition/configuration to indicate which compute you wish to use for that model/selection of models. + +:::note + +You need to use the same set of names for compute across your outputs, though you may supply different http_paths, allowing you to use different computes in different deployment scenarios. + +::: + +### Specifying the compute for models + +As with many other configuaration options, you can specify the compute for a model in multiple ways, using `databricks_compute`. +In your `dbt_project.yml`, the selected compute can be specified for all the models in a given directory: + + + +```yaml + +... + +models: + +databricks_compute: "Compute1" # use the `Compute1` warehouse/cluster for all models in the project... + my_project: + clickstream: + +databricks_compute: "Compute2" # ...except for the models in the `clickstream` folder, which will use `Compute2`. + +snapshots: + +databricks_compute: "Compute1" # all Snapshot models are configured to use `Compute1`. + +``` + + + +For an individual model the compute can be specified in the model config in your schema file. + + + +```yaml + +models: + - name: table_model + config: + databricks_compute: Compute1 + columns: + - name: id + data_type: int + +``` + + + + +Alternatively the warehouse can be specified in the config block of a model's SQL file. + + + +```sql + +{{ + config( + materialized='table', + databricks_compute='Compute1' + ) +}} +select * from {{ ref('seed') }} + +``` + + + +:::note + +In the absence of a specified compute, we will default to the compute specified by http_path in the top level of the output section in your profile. +This is also the compute that will be used for tasks not associated with a particular model, such as gathering metadata for all tables in a schema. + +::: + +To validate that the specified compute is being used, look for lines in your dbt.log like: + +``` +Databricks adapter ... using default compute resource. +``` + +or + +``` +Databricks adapter ... using compute resource . +``` + + ## Persisting model descriptions