From 95a864976e7ec40fbcd16464fc4dbbb5463b6199 Mon Sep 17 00:00:00 2001 From: Michael Connell Date: Mon, 12 Aug 2024 17:15:46 -0700 Subject: [PATCH] add initial support for semantic_manifest files --- README.md | 13 ++ dbt_artifacts_parser/parser.py | 17 +++ .../parsers/semantic_manifest/__init__.py | 16 +++ .../semantic_manifest/semantic_manifest_v1.py | 127 ++++++++++++++++++ dbt_artifacts_parser/parsers/version_map.py | 5 + tests/parsers/test_utils.py | 3 + .../v1/jaffle_shop/semantic_manifest.json | 81 +++++++++++ tests/test_parser.py | 21 +++ 8 files changed, 283 insertions(+) create mode 100644 dbt_artifacts_parser/parsers/semantic_manifest/__init__.py create mode 100644 dbt_artifacts_parser/parsers/semantic_manifest/semantic_manifest_v1.py create mode 100644 tests/resources/v1/jaffle_shop/semantic_manifest.json diff --git a/README.md b/README.md index f09b3b3..5700e26 100644 --- a/README.md +++ b/README.md @@ -237,6 +237,19 @@ with open("path/to/run-results.json", "r") as fp: run_results_obj = parse_run_results_v6(run_results=run_results_dict) ``` +### Parse semantic_manifest.json + +```python +import json + +# parse any version of semantic_manifest.json +from dbt_artifacts_parser.parser import parse_semantic_manifest + +with open("path/to/semantic_manifest.json", "r") as fp: + semantic_manifest_dict = json.load(fp) + semantic_manifest_obj = parse_semantic_manifest(semantic_manifest_dict) +``` + ### Parse sources.json ```python diff --git a/dbt_artifacts_parser/parser.py b/dbt_artifacts_parser/parser.py index 9bad25f..0ca05a3 100644 --- a/dbt_artifacts_parser/parser.py +++ b/dbt_artifacts_parser/parser.py @@ -35,6 +35,7 @@ from dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 from dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5 from dbt_artifacts_parser.parsers.run_results.run_results_v6 import RunResultsV6 +from dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1 from dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1 from dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2 from dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3 @@ -299,6 +300,22 @@ def parse_run_results_v6(run_results: dict) -> RunResultsV6: return RunResultsV6(**run_results) raise ValueError("Not a run-results.json v6") +# +# semantic-manifest + +def parse_semantic_manifest(semantic_manifest: dict) -> SemanticManifestV1: + """ + # todo once dbt adds a schema version to their semantic manifest file + dbt_schema_version = get_dbt_schema_version(artifact_json=semantic_manifest) + if dbt_schema_version == ArtifactTypes.SOURCES_V1.value.dbt_schema_version: + return SemanticManifestV1(**semantic_manifest) + elif dbt_schema_version == ArtifactTypes.SEMANTIC_MANIFEST_V2.value.dbt_schema_version: + return SemanticManifestV2(**semantic_manifest) + elif ... + raise ValueError("Not a semantic_manifest.json") + """ + return SemanticManifestV1(**semantic_manifest) + # # sources # diff --git a/dbt_artifacts_parser/parsers/semantic_manifest/__init__.py b/dbt_artifacts_parser/parsers/semantic_manifest/__init__.py new file mode 100644 index 0000000..3cfa191 --- /dev/null +++ b/dbt_artifacts_parser/parsers/semantic_manifest/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/dbt_artifacts_parser/parsers/semantic_manifest/semantic_manifest_v1.py b/dbt_artifacts_parser/parsers/semantic_manifest/semantic_manifest_v1.py new file mode 100644 index 0000000..d179290 --- /dev/null +++ b/dbt_artifacts_parser/parsers/semantic_manifest/semantic_manifest_v1.py @@ -0,0 +1,127 @@ +from typing import Dict, List, Optional + +from pydantic import ConfigDict +from dbt_artifacts_parser.parsers.base import BaseParserModel + +class NodeRelation(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + alias: str + schema_name: str + database: str + relation_name: str + + +class Measure(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + name: str + filter: Optional[str] + alias: Optional[str] + + +class TypeParams(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + measure: Measure + numerator: Optional[str] = "" + denominator: Optional[str] = "" + expr: Optional[str] = "" + window: Optional[str] = "" + grain_to_date: Optional[str] = "" + metrics: List[str] + input_measures: List[str] + + +class Metric(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + name: str + description: str + type: str + type_params: TypeParams + filter: Optional[str] = "" + metadata: Optional[Dict[str, str]] = {} + + +class TimeSpineTableConfiguration(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + location: str + column_name: str + grain: str + + +class ProjectConfiguration(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + time_spine_table_configurations: List[TimeSpineTableConfiguration] + metadata: Optional[Dict[str, str]] = {} + dsi_package_version: Dict[str, str] + + +class SavedQueryExportConfig(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + export_as: str + schema_name: Optional[str] = "" + alias: Optional[str] = "" + + +class SavedQueryExport(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + name: str + config: SavedQueryExportConfig + + +class SavedQueryParams(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + metrics: List[str] + group_by: List[str] + where: Optional[List[str]] = [] + + +class SavedQuery(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + name: str + query_params: SavedQueryParams + description: str + metadata: Optional[Dict[str, str]] = {} + label: Optional[str] = "" + exports: List[SavedQueryExport] + + +class SemanticModel(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + name: str + defaults: Optional[Dict[str, str]] = {} + description: str + node_relation: NodeRelation + entities: List[str] + measures: List[str] + dimensions: List[str] + metrics: List[Metric] + project_configuration: ProjectConfiguration + saved_queries: List[SavedQuery] + + +class SemanticManifestV1(BaseParserModel): + model_config = ConfigDict( + extra='forbid', + ) + semantic_models: List[SemanticModel] diff --git a/dbt_artifacts_parser/parsers/version_map.py b/dbt_artifacts_parser/parsers/version_map.py index 959bb20..28a4fca 100644 --- a/dbt_artifacts_parser/parsers/version_map.py +++ b/dbt_artifacts_parser/parsers/version_map.py @@ -38,6 +38,7 @@ from dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 from dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5 from dbt_artifacts_parser.parsers.run_results.run_results_v6 import RunResultsV6 +from dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1 from dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1 from dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2 from dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3 @@ -93,6 +94,10 @@ class ArtifactTypes(Enum): "https://schemas.getdbt.com/dbt/run-results/v5.json", RunResultsV5) RUN_RESULTS_V6 = ArtifactType( "https://schemas.getdbt.com/dbt/run-results/v6.json", RunResultsV6) + # Semantic Manifests + SEMANTIC_MANIFEST_V1 = ArtifactType( + "https://schemas.getdbt.com/dbt/semantic-manifest/v1.json", SemanticManifestV1 + ) # Sources SOURCES_V1 = ArtifactType("https://schemas.getdbt.com/dbt/sources/v1.json", SourcesV1) diff --git a/tests/parsers/test_utils.py b/tests/parsers/test_utils.py index 0684fa4..1f08975 100644 --- a/tests/parsers/test_utils.py +++ b/tests/parsers/test_utils.py @@ -35,6 +35,7 @@ from dbt_artifacts_parser.parsers.run_results.run_results_v2 import RunResultsV2 from dbt_artifacts_parser.parsers.run_results.run_results_v3 import RunResultsV3 from dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 +from dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1 from dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1 from dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2 from dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3 @@ -120,6 +121,8 @@ def test_get_dbt_schema_version(self, version, artifacts): (ArtifactTypes.MANIFEST_V1, ManifestV1), (ArtifactTypes.RUN_RESULTS_V1, RunResultsV1), (ArtifactTypes.SOURCES_V1, SourcesV1), + # todo: once dbt adds the metadata and schema to the semantic_manifest file + # (ArtifactTypes.SEMANTIC_MANIFEST_V1, SemanticManifestV1), # v2 (ArtifactTypes.MANIFEST_V2, ManifestV2), (ArtifactTypes.RUN_RESULTS_V2, RunResultsV2), diff --git a/tests/resources/v1/jaffle_shop/semantic_manifest.json b/tests/resources/v1/jaffle_shop/semantic_manifest.json new file mode 100644 index 0000000..1f2473e --- /dev/null +++ b/tests/resources/v1/jaffle_shop/semantic_manifest.json @@ -0,0 +1,81 @@ +{ + "semantic_models": [ + { + "name": "semantic model name", + "defaults": null, + "description": "semantic model description", + "node_relation": { + "alias": "model alias", + "schema_name": "model schema", + "database": "model db", + "relation_name": "Fully qualified relation name" + }, + "entities": ["entities in the semantic model"], + "measures": ["measures in the semantic model"], + "dimensions": ["dimensions in the semantic model" ], + "metrics": [ + { + "name": "name of the metric", + "description": "metric description", + "type": "metric type", + "type_params": { + "measure": { + "name": "name for measure", + "filter": "filter for measure", + "alias": "alias for measure" + }, + "numerator": null, + "denominator": null, + "expr": null, + "window": null, + "grain_to_date": null, + "metrics": ["metrics used in defining the metric. this is used in derived metrics"], + "input_measures": [] + }, + "filter": null, + "metadata": null + } + ], + "project_configuration": { + "time_spine_table_configurations": [ + { + "location": "fully qualified table name for timespine", + "column_name": "date column", + "grain": "day" + } + ], + "metadata": null, + "dsi_package_version": {} + }, + "saved_queries": [ + { + "name": "name of the saved query", + "query_params": { + "metrics": [ + "metrics used in the saved query" + ], + "group_by": [ + "TimeDimension('model_primary_key__date_column', 'day')", + "Dimension('model_primary_key__metric_one')", + "Dimension('model__dimension')" + ], + "where": null + }, + "description": "Description of the saved query", + "metadata": null, + "label": null, + "exports": [ + { + "name": "saved_query_name", + "config": { + "export_as": "view", + "schema_name": null, + "alias": null + } + } + ] + } + ] +} + ] +} diff --git a/tests/test_parser.py b/tests/test_parser.py index 1f83c09..ac8c67f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -133,6 +133,27 @@ def test_parse_run_results_specific(self, version): == f"https://schemas.getdbt.com/dbt/run-results/{version}.json" ) +# TODO test semantic manifest version parsing +# @pytest.mark.parametrize("version", ["v1"]) +# def test_parse_semantic_manifest(self, version): +# path = os.path.join( +# get_project_root(), +# "tests", +# "resources", +# version, +# "jaffle_shop", +# "semantic_manifest.json", +# ) +# with open(path, "r", encoding="utf-8") as fp: +# semantic_manifest_dict = yaml.safe_load(fp) +# semantic_manifest_obj = getattr(parser, f"parse_semantic_manifest_{version}")( +# semantic_manifest_dict +# ) +# assert ( +# semantic_manifest_obj.metadata.dbt_schema_version +# == f"https://schemas.getdbt.com/dbt/run-results/{version}.json" +# ) + # TODO add fixtures of sources.json # @pytest.mark.parametrize("version", ["v1", "v2", "v3"])