Skip to content

Commit

Permalink
Refactor python config handling (#830)
Browse files Browse the repository at this point in the history
  • Loading branch information
benc-db authored Oct 22, 2024
1 parent f546cbe commit 84fc024
Show file tree
Hide file tree
Showing 11 changed files with 1,258 additions and 568 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

### Under the Hood

- Significant refactoring and increased testing of python_submissions ([830](https://github.com/databricks/dbt-databricks/pull/830))
- Fix places where we were not properly closing cursors, and other test warnings ([713](https://github.com/databricks/dbt-databricks/pull/713))
- Drop support for Python 3.8 ([713](https://github.com/databricks/dbt-databricks/pull/713))
- Upgrade databricks-sql-connector dependency to 3.5.0 ([833](https://github.com/databricks/dbt-databricks/pull/833))
Expand Down
57 changes: 57 additions & 0 deletions dbt/adapters/databricks/python_models/python_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import Any, Dict, List, Optional
import uuid
from pydantic import BaseModel, Field


DEFAULT_TIMEOUT = 60 * 60 * 24


class PythonJobConfig(BaseModel):
"""Pydantic model for config found in python_job_config."""

name: Optional[str] = None
grants: Dict[str, List[Dict[str, str]]] = Field(exclude=True, default_factory=dict)
existing_job_id: str = Field("", exclude=True)
post_hook_tasks: List[Dict[str, Any]] = Field(exclude=True, default_factory=list)
additional_task_settings: Dict[str, Any] = Field(exclude=True, default_factory=dict)

class Config:
extra = "allow"


class PythonModelConfig(BaseModel):
"""
Pydantic model for a Python model configuration.
Includes some job-specific settings that are not yet part of PythonJobConfig.
"""

user_folder_for_python: bool = False
timeout: int = Field(DEFAULT_TIMEOUT, gt=0)
job_cluster_config: Dict[str, Any] = Field(default_factory=dict)
access_control_list: List[Dict[str, str]] = Field(default_factory=list)
packages: List[str] = Field(default_factory=list)
index_url: Optional[str] = None
additional_libs: List[Dict[str, Any]] = Field(default_factory=list)
python_job_config: PythonJobConfig = Field(default_factory=lambda: PythonJobConfig(**{}))
cluster_id: Optional[str] = None
http_path: Optional[str] = None
create_notebook: bool = False


class ParsedPythonModel(BaseModel):
"""Pydantic model for a Python model parsed from a dbt manifest"""

catalog: str = Field("hive_metastore", alias="database")

# Schema is a reserved name in Pydantic
schema_: str = Field("default", alias="schema")

identifier: str = Field(alias="alias")
config: PythonModelConfig

@property
def run_name(self) -> str:
return f"{self.catalog}-{self.schema_}-{self.identifier}-{uuid.uuid4()}"

class Config:
allow_population_by_field_name = True
Loading

0 comments on commit 84fc024

Please sign in to comment.